Re: [PATCH v6 1/5] x86/paravirt: introduce ALT_NOT_XEN
On December 9, 2023 10:21:34 PM PST, Juergen Gross wrote: >Introduce the macro ALT_NOT_XEN as a short form of >ALT_NOT(X86_FEATURE_XENPV). > >Suggested-by: Peter Zijlstra (Intel) >Signed-off-by: Juergen Gross >--- >V3: >- split off from next patch >V5: >- move patch to the start of the series (Boris Petkov) >--- > arch/x86/include/asm/paravirt.h | 42 --- > arch/x86/include/asm/paravirt_types.h | 3 ++ > 2 files changed, 21 insertions(+), 24 deletions(-) > >diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h >index 693c61dbdd9c..aa76ac7c806c 100644 >--- a/arch/x86/include/asm/paravirt.h >+++ b/arch/x86/include/asm/paravirt.h >@@ -142,8 +142,7 @@ static inline void write_cr0(unsigned long x) > static __always_inline unsigned long read_cr2(void) > { > return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2, >- "mov %%cr2, %%rax;", >- ALT_NOT(X86_FEATURE_XENPV)); >+ "mov %%cr2, %%rax;", ALT_NOT_XEN); > } > > static __always_inline void write_cr2(unsigned long x) >@@ -154,13 +153,12 @@ static __always_inline void write_cr2(unsigned long x) > static inline unsigned long __read_cr3(void) > { > return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3, >-"mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV)); >+"mov %%cr3, %%rax;", ALT_NOT_XEN); > } > > static inline void write_cr3(unsigned long x) > { >- PVOP_ALT_VCALL1(mmu.write_cr3, x, >- "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV)); >+ PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN); > } > > static inline void __write_cr4(unsigned long x) >@@ -182,7 +180,7 @@ extern noinstr void pv_native_wbinvd(void); > > static __always_inline void wbinvd(void) > { >- PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV)); >+ PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN); > } > > static inline u64 paravirt_read_msr(unsigned msr) >@@ -390,27 +388,25 @@ static inline void paravirt_release_p4d(unsigned long >pfn) > static inline pte_t __pte(pteval_t val) > { > return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val, >-"mov %%rdi, %%rax", >-ALT_NOT(X86_FEATURE_XENPV)) }; >+"mov %%rdi, %%rax", ALT_NOT_XEN) }; > } > > static inline pteval_t pte_val(pte_t pte) > { > return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte, >- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); >+ "mov %%rdi, %%rax", ALT_NOT_XEN); > } > > static inline pgd_t __pgd(pgdval_t val) > { > return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val, >-"mov %%rdi, %%rax", >-ALT_NOT(X86_FEATURE_XENPV)) }; >+"mov %%rdi, %%rax", ALT_NOT_XEN) }; > } > > static inline pgdval_t pgd_val(pgd_t pgd) > { > return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd, >- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); >+ "mov %%rdi, %%rax", ALT_NOT_XEN); > } > > #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION >@@ -444,14 +440,13 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) > static inline pmd_t __pmd(pmdval_t val) > { > return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val, >-"mov %%rdi, %%rax", >-ALT_NOT(X86_FEATURE_XENPV)) }; >+"mov %%rdi, %%rax", ALT_NOT_XEN) }; > } > > static inline pmdval_t pmd_val(pmd_t pmd) > { > return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd, >- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); >+ "mov %%rdi, %%rax", ALT_NOT_XEN); > } > > static inline void set_pud(pud_t *pudp, pud_t pud) >@@ -464,7 +459,7 @@ static inline pud_t __pud(pudval_t val) > pudval_t ret; > > ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val, >- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); >+ "mov %%rdi, %%rax", ALT_NOT_XEN); > > return (pud_t) { ret }; > } >@@ -472,7 +467,7 @@ static inline pud_t __pud(pudval_t val) > static inline pudval_t pud_val(pud_t pud) > { > return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud, >- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV)); >+ "mov %%rdi, %%rax", ALT_NOT_XEN); > } > > static inline void pud_clear(pud_t *pudp) >@@ -492,8 +487,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) > static inline p4d_t __p4d(p4dval_t val) > { > p4dval_t ret =
RE: [PATCH 0/3] x86 disk image and modules initramfs generation
Ok... not really sure how this relates to my patch. You are mentioned three separate things: modules, headers, and enough of the kernel machinery to build out of tree modules. The latter it normally done with a tree that corresponds to the state after build + "make clean"; which I *believe* is the same as after "make prepare". The former two are "make modules_install" and "make headers_install", respectively. Note you can direct them to directory hierarchies other than the local system ones for archiving. It is simply not possible to provide a *general* solution that fits all distributions × all boot scenarios. On April 20, 2021 1:30:07 AM PDT, David Laight wrote: >From: H. Peter Anvin >> Sent: 20 April 2021 00:03 >> >> When compiling on a different machine than the runtime target, >> including but not limited to simulators, it is rather handy to be >able >> to produce a bootable image. The scripts for that in x86 are >> relatively old, and assume a BIOS system. > >I've given up and copied the kernel tree onto all my test systems. > >I needed something like 'make modules_install' and 'make install' >that would generated a directory tree that could be copied (scp -r) >onto the target system. > >But the script to run 'update-grub' is all intwined in the commands. > >You also don't get a copy of the headers. >Even for the local system (as root) you just get a symlink into >the source tree. >This causes a problem trying to build 'out of tree' modules >after updating the kernel source tree (but not rebulding). > >I can (and do) write 'horrid' makefiles (gmake and nmake) >but this seemed to need more refactoring that I wanted to do. > > David > >- >Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, >MK1 1PT, UK >Registration No: 1397386 (Wales) -- Sent from my Android device with K-9 Mail. Please excuse my brevity.
[PATCH v2 1/3] x86/boot: Modernize genimage script; hdimage support; remove bzlilo
From: "H. Peter Anvin (Intel)" The image generation scripts in arch/x86/boot are pretty out of date, except for the isoimage target. Update and clean up the genimage.sh script, and make it support an arbitrary number of initramfs files in the image. Add a "hdimage" target, which can be booted by either BIOS or EFI (if the kernel is compiled with the EFI stub.) For EFI to be able to pass the command line to the kernel, we need the EFI shell, but the firmware builtin EFI shell, if it even exists, is pretty much always the last resort boot option, so search for OVMF or EDK2 and explicitly include a copy of the EFI shell. To make this all work, use bash features in the script. Furthermore, this version of the script makes use of some mtools features, especially mpartition, that might not exist in very old version of mtools, but given all the other dependencies on this script this doesn't seem such a big deal. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/Makefile| 5 +- arch/x86/boot/.gitignore | 1 + arch/x86/boot/Makefile | 44 +++--- arch/x86/boot/genimage.sh| 294 ++- arch/x86/boot/mtools.conf.in | 3 + 5 files changed, 247 insertions(+), 100 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 9a85eae37b17..943f26a32834 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -247,7 +247,7 @@ drivers-$(CONFIG_FB) += arch/x86/video/ boot := arch/x86/boot -BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 isoimage +BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 hdimage isoimage PHONY += bzImage $(BOOT_TARGETS) @@ -305,8 +305,9 @@ define archhelp echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)' + echo ' hdimage - Create a BIOS/EFI hard disk image (arch/x86/boot/hdimage)' echo ' isoimage- Create a boot CD-ROM image (arch/x86/boot/image.iso)' - echo 'bzdisk/fdimage*/isoimage also accept:' + echo 'bzdisk/fdimage*/hdimage/isoimage also accept:' echo 'FDARGS="..." arguments for the booted kernel' echo 'FDINITRD=file initrd for the booted kernel' echo '' diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore index 9cc7f1357b9b..1189be057ebd 100644 --- a/arch/x86/boot/.gitignore +++ b/arch/x86/boot/.gitignore @@ -11,3 +11,4 @@ setup.elf fdimage mtools.conf image.iso +hdimage diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index fe605205b4ce..dfbc26a8e924 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -29,7 +29,7 @@ KCOV_INSTRUMENT := n SVGA_MODE := -DSVGA_MODE=NORMAL_VGA targets:= vmlinux.bin setup.bin setup.elf bzImage -targets+= fdimage fdimage144 fdimage288 image.iso mtools.conf +targets+= fdimage fdimage144 fdimage288 image.iso hdimage subdir-:= compressed setup-y+= a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o @@ -115,47 +115,49 @@ $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ # Set this if you want to pass append arguments to the -# bzdisk/fdimage/isoimage kernel +# bzdisk/fdimage/hdimage/isoimage kernel FDARGS = -# Set this if you want an initrd included with the -# bzdisk/fdimage/isoimage kernel +# Set this if you want one or more initrds included in the image FDINITRD = -image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) +imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ +targets += mtools.conf + +# genimage.sh requires bash, but it also has a bunch of other +# external dependencies. quiet_cmd_genimage = GENIMAGE $3 -cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ - $(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD) +cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ + $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD) -PHONY += bzdisk fdimage fdimage144 fdimage288 isoimage bzlilo install +PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install # This requires write access to /dev/fd0 -bzdisk: $(obj)/bzImage $(obj)/mtools.conf +# All images require syslinux to be installed; hdimage also requires +# EDK2/OVMF if the kernel is compiled with the EFI stub. +bzdisk: $(imgdeps) $(call cmd,genimage,bzdisk,/dev/fd0) -# These require being root or having syslinux 2.02 or higher installed -fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf +fdimage fd
[PATCH v2 3/3] x86/boot: Add option to add modules.img to {fd,hd,iso}image
From: "H. Peter Anvin (Intel)" Make it easy to generate a disk image which includes the all-modules initramfs image. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/Makefile | 3 ++- arch/x86/boot/Makefile | 20 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 943f26a32834..74f4e66568d7 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -309,7 +309,8 @@ define archhelp echo ' isoimage- Create a boot CD-ROM image (arch/x86/boot/image.iso)' echo 'bzdisk/fdimage*/hdimage/isoimage also accept:' echo 'FDARGS="..." arguments for the booted kernel' - echo 'FDINITRD=file initrd for the booted kernel' + echo 'FDINITRD=file initrd for the booted kernel' + echo 'FDMODS=1 to include all modules as an initrd' echo '' echo ' kvm_guest.config- Enable Kconfig items for running this kernel as a KVM guest' echo ' xen.config - Enable Kconfig items for running this kernel as a Xen guest' diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index dfbc26a8e924..a4f8c66a63d0 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -116,11 +116,23 @@ $(obj)/compressed/vmlinux: FORCE # Set this if you want to pass append arguments to the # bzdisk/fdimage/hdimage/isoimage kernel -FDARGS = +FDARGS := # Set this if you want one or more initrds included in the image -FDINITRD = +FDINITRD := +# Set this to 1 if you want usr/modules.img included in the image +FDMODS := -imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh +imgdeps := $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh +fdinitrds := $(FDINITRD) + +ifneq ($(FDMODS),) +imgdeps += $(objtree)/usr/modules.img +fdinitrds += $(objtree)/usr/modules.img + +$(objtree)/usr/modules.img: + $(Q)$(MAKE) -f $(srctree)/Makefile usr/modules.img +KBUILD_MODULES := 1 +endif $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ @@ -131,7 +143,7 @@ targets += mtools.conf # external dependencies. quiet_cmd_genimage = GENIMAGE $3 cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ - $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD) + $(obj)/mtools.conf '$(FDARGS)' $(fdinitrds) PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install -- 2.30.2
[PATCH v2 2/3] usr, modules: Add build target for creating a modules initramfs image
From: "H. Peter Anvin (Intel)" Some distributions really cannot be booted without modules anymore. To allow an externally built kernel to be run, it is handy to be able to create an initramfs image with all the modules, that can appended to an existing initramfs image (preferrably without any modules, but not necessarily, since the /lib/modules path includes the kernel version.) This image is generated in usr/modules.img. Since this image may end up being rather large, change the default for CONFIG_INITRAMFS_COMPRESSION_*. We already have a special case in the Makefile to not re-compress an existing initramfs image; add the very small default builtin initramfs as another case to suppress the compression. To avoid losing the default compression setting, add a separate variable for the possibly overridden builtin initramfs compression method. Signed-off-by: H. Peter Anvin (Intel) --- Makefile | 17 - usr/.gitignore | 3 +++ usr/Kconfig| 31 +-- usr/Makefile | 39 +++ 4 files changed, 67 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 4730cf156f6b..89e40760384d 100644 --- a/Makefile +++ b/Makefile @@ -622,7 +622,7 @@ KBUILD_MODULES := KBUILD_BUILTIN := 1 # If we have only "make modules", don't compile built-in objects. -ifeq ($(MAKECMDGOALS),modules) +ifeq (modules,$(MAKECMDGOALS)) KBUILD_BUILTIN := endif @@ -630,7 +630,8 @@ endif # in addition to whatever we do anyway. # Just "make" or "make all" shall build modules as well -ifneq ($(filter all modules nsdeps %compile_commands.json clang-%,$(MAKECMDGOALS)),) +ifneq ($(filter all modules nsdeps %compile_commands.json clang-% \ + modules.img usr/modules.img,$(MAKECMDGOALS)),) KBUILD_MODULES := 1 endif @@ -1461,15 +1462,17 @@ modules_prepare: prepare PHONY += modules_install modules_install: _modinst_ _modinst_post +INSTALL_MOD_LN ?= ln -s + PHONY += _modinst_ _modinst_: @rm -rf $(MODLIB)/kernel @rm -f $(MODLIB)/source @mkdir -p $(MODLIB)/kernel - @ln -s $(abspath $(srctree)) $(MODLIB)/source + @$(INSTALL_MOD_LN) $(abspath $(srctree)) $(MODLIB)/source @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \ - rm -f $(MODLIB)/build ; \ - ln -s $(CURDIR) $(MODLIB)/build ; \ + rm -rf $(MODLIB)/build ; \ + $(INSTALL_MOD_LN) $(CURDIR) $(MODLIB)/build ; \ fi @sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order @cp -f modules.builtin $(MODLIB)/ @@ -1489,6 +1492,10 @@ modules_sign: $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modsign endif +# initramfs image containing all modules +modules.img usr/modules.img: modules + $(Q)$(MAKE) $(build)=usr usr/modules.img + else # CONFIG_MODULES # Modules not configured diff --git a/usr/.gitignore b/usr/.gitignore index 935442ed1eb2..14e2fae88d8a 100644 --- a/usr/.gitignore +++ b/usr/.gitignore @@ -2,3 +2,6 @@ gen_init_cpio initramfs_data.cpio /initramfs_inc_data +/modules +modules.cpio +modules.img diff --git a/usr/Kconfig b/usr/Kconfig index 8bbcf699fe3b..625372682b5e 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -109,18 +109,23 @@ config RD_ZSTD If unsure, say N. choice - prompt "Built-in initramfs compression mode" - depends on INITRAMFS_SOURCE != "" - help - This option allows you to decide by which algorithm the builtin - initramfs will be compressed. Several compression algorithms are - available, which differ in efficiency, compression and - decompression speed. Compression speed is only relevant - when building a kernel. Decompression speed is relevant at - each boot. Also the memory usage during decompression may become - relevant on memory constrained systems. This is usually based on the - dictionary size of the algorithm with algorithms like XZ and LZMA - featuring large dictionary sizes. + prompt "Preferred initramfs compression mode" + default INITRAMFS_COMPRESSION_XZ if RD_XZ + default INITRAMFS_COMPRESSION_GZIP if RD_GZIP + default INITRAMFS_COMPRESSION_LZO if RD_LZO + default INITRAMFS_COMPRESSION_LZ4 if RD_LZ4 + default INITRAMFS_COMPRESSION_NONE + help + This option allows you to decide by which algorithm the + builtin initramfs and usr/modules.img will be compressed. + Several compression algorithms are available, which differ + in efficiency, compression and decompression speed. + Compression speed is only relevant when building a kernel. + Decompression speed is relevant at each boot. Also the + memory usage during decompression may become relevant on + memory constrained systems. This is usually based on the + di
[PATCH 0/3] x86 disk image and modules initramfs generation
From: "H. Peter Anvin" (Intel) When compiling on a different machine than the runtime target, including but not limited to simulators, it is rather handy to be able to produce a bootable image. The scripts for that in x86 are relatively old, and assume a BIOS system. This adds a build target to generate a hdimage which can be booted either from BIOS or EFI, and modernizes the genimage.sh script including adding the ability to add an arbitrary number of initramfs files (limited only by the length of the command line.) Possibly more controversial, at least from a Kbuild design perspective (as usual I'm the guy who wants to do something with Kbuild which it seems it was never really designed to do), is add the ability to create an initramfs image which includes all the built modules. Some distributions cannot be easily booted without modules in initramfs, and this creates an image which can be added to initramfs to provide the kernel modules, as finalized by "make modules_install". The final patch put these two together, and allows the modules initramfs to be included in the x86 boot image. --- Fixes in v2: 1/3: correct command line generation for multiple initrds 2/3: no change 3/3: append, don't overwrite FDINITRD when adding modules.img Makefile | 17 ++- arch/x86/Makefile| 8 +- arch/x86/boot/.gitignore | 1 + arch/x86/boot/Makefile | 62 + arch/x86/boot/genimage.sh| 294 +++ arch/x86/boot/mtools.conf.in | 3 + usr/.gitignore | 3 + usr/Kconfig | 31 ++--- usr/Makefile | 39 +- 9 files changed, 331 insertions(+), 127 deletions(-)
[PATCH 2/3] usr, modules: Add build target for creating a modules initramfs image
From: "H. Peter Anvin" (Intel) Some distributions really cannot be booted without modules anymore. To allow an externally built kernel to be run, it is handy to be able to create an initramfs image with all the modules, that can appended to an existing initramfs image (preferrably without any modules, but not necessarily, since the /lib/modules path includes the kernel version.) This image is generated in usr/modules.img. Since this image may end up being rather large, change the default for CONFIG_INITRAMFS_COMPRESSION_*. We already have a special case in the Makefile to not re-compress an existing initramfs image; add the very small default builtin initramfs as another case to suppress the compression. To avoid losing the default compression setting, add a separate variable for the possibly overridden builtin initramfs compression method. Signed-off-by: H. Peter Anvin (Intel) --- Makefile | 17 - usr/.gitignore | 3 +++ usr/Kconfig| 31 +-- usr/Makefile | 39 +++ 4 files changed, 67 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 4730cf156f6b..89e40760384d 100644 --- a/Makefile +++ b/Makefile @@ -622,7 +622,7 @@ KBUILD_MODULES := KBUILD_BUILTIN := 1 # If we have only "make modules", don't compile built-in objects. -ifeq ($(MAKECMDGOALS),modules) +ifeq (modules,$(MAKECMDGOALS)) KBUILD_BUILTIN := endif @@ -630,7 +630,8 @@ endif # in addition to whatever we do anyway. # Just "make" or "make all" shall build modules as well -ifneq ($(filter all modules nsdeps %compile_commands.json clang-%,$(MAKECMDGOALS)),) +ifneq ($(filter all modules nsdeps %compile_commands.json clang-% \ + modules.img usr/modules.img,$(MAKECMDGOALS)),) KBUILD_MODULES := 1 endif @@ -1461,15 +1462,17 @@ modules_prepare: prepare PHONY += modules_install modules_install: _modinst_ _modinst_post +INSTALL_MOD_LN ?= ln -s + PHONY += _modinst_ _modinst_: @rm -rf $(MODLIB)/kernel @rm -f $(MODLIB)/source @mkdir -p $(MODLIB)/kernel - @ln -s $(abspath $(srctree)) $(MODLIB)/source + @$(INSTALL_MOD_LN) $(abspath $(srctree)) $(MODLIB)/source @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \ - rm -f $(MODLIB)/build ; \ - ln -s $(CURDIR) $(MODLIB)/build ; \ + rm -rf $(MODLIB)/build ; \ + $(INSTALL_MOD_LN) $(CURDIR) $(MODLIB)/build ; \ fi @sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order @cp -f modules.builtin $(MODLIB)/ @@ -1489,6 +1492,10 @@ modules_sign: $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modsign endif +# initramfs image containing all modules +modules.img usr/modules.img: modules + $(Q)$(MAKE) $(build)=usr usr/modules.img + else # CONFIG_MODULES # Modules not configured diff --git a/usr/.gitignore b/usr/.gitignore index 935442ed1eb2..14e2fae88d8a 100644 --- a/usr/.gitignore +++ b/usr/.gitignore @@ -2,3 +2,6 @@ gen_init_cpio initramfs_data.cpio /initramfs_inc_data +/modules +modules.cpio +modules.img diff --git a/usr/Kconfig b/usr/Kconfig index 8bbcf699fe3b..625372682b5e 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -109,18 +109,23 @@ config RD_ZSTD If unsure, say N. choice - prompt "Built-in initramfs compression mode" - depends on INITRAMFS_SOURCE != "" - help - This option allows you to decide by which algorithm the builtin - initramfs will be compressed. Several compression algorithms are - available, which differ in efficiency, compression and - decompression speed. Compression speed is only relevant - when building a kernel. Decompression speed is relevant at - each boot. Also the memory usage during decompression may become - relevant on memory constrained systems. This is usually based on the - dictionary size of the algorithm with algorithms like XZ and LZMA - featuring large dictionary sizes. + prompt "Preferred initramfs compression mode" + default INITRAMFS_COMPRESSION_XZ if RD_XZ + default INITRAMFS_COMPRESSION_GZIP if RD_GZIP + default INITRAMFS_COMPRESSION_LZO if RD_LZO + default INITRAMFS_COMPRESSION_LZ4 if RD_LZ4 + default INITRAMFS_COMPRESSION_NONE + help + This option allows you to decide by which algorithm the + builtin initramfs and usr/modules.img will be compressed. + Several compression algorithms are available, which differ + in efficiency, compression and decompression speed. + Compression speed is only relevant when building a kernel. + Decompression speed is relevant at each boot. Also the + memory usage during decompression may become relevant on + memory constrained systems. This is usually based on the + di
[PATCH 1/3] x86/boot: Modernize genimage script; hdimage support; remove bzlilo
From: "H. Peter Anvin" (Intel) The image generation scripts in arch/x86/boot are pretty out of date, except for the isoimage target. Update and clean up the genimage.sh script, and make it support an arbitrary number of initramfs files in the image. Add a "hdimage" target, which can be booted by either BIOS or EFI (if the kernel is compiled with the EFI stub.) For EFI to be able to pass the command line to the kernel, we need the EFI shell, but the firmware builtin EFI shell, if it even exists, is pretty much always the last resort boot option, so search for OVMF or EDK2 and explicitly include a copy of the EFI shell. To make this all work, use bash features in the script. Furthermore, this version of the script makes use of some mtools features, especially mpartition, that might not exist in very old version of mtools, but given all the other dependencies on this script this doesn't seem such a big deal. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/Makefile| 5 +- arch/x86/boot/.gitignore | 1 + arch/x86/boot/Makefile | 44 +++--- arch/x86/boot/genimage.sh| 284 +-- arch/x86/boot/mtools.conf.in | 3 + 5 files changed, 238 insertions(+), 99 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 9a85eae37b17..943f26a32834 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -247,7 +247,7 @@ drivers-$(CONFIG_FB) += arch/x86/video/ boot := arch/x86/boot -BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 isoimage +BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 hdimage isoimage PHONY += bzImage $(BOOT_TARGETS) @@ -305,8 +305,9 @@ define archhelp echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)' + echo ' hdimage - Create a BIOS/EFI hard disk image (arch/x86/boot/hdimage)' echo ' isoimage- Create a boot CD-ROM image (arch/x86/boot/image.iso)' - echo 'bzdisk/fdimage*/isoimage also accept:' + echo 'bzdisk/fdimage*/hdimage/isoimage also accept:' echo 'FDARGS="..." arguments for the booted kernel' echo 'FDINITRD=file initrd for the booted kernel' echo '' diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore index 9cc7f1357b9b..1189be057ebd 100644 --- a/arch/x86/boot/.gitignore +++ b/arch/x86/boot/.gitignore @@ -11,3 +11,4 @@ setup.elf fdimage mtools.conf image.iso +hdimage diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index fe605205b4ce..dfbc26a8e924 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -29,7 +29,7 @@ KCOV_INSTRUMENT := n SVGA_MODE := -DSVGA_MODE=NORMAL_VGA targets:= vmlinux.bin setup.bin setup.elf bzImage -targets+= fdimage fdimage144 fdimage288 image.iso mtools.conf +targets+= fdimage fdimage144 fdimage288 image.iso hdimage subdir-:= compressed setup-y+= a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o @@ -115,47 +115,49 @@ $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ # Set this if you want to pass append arguments to the -# bzdisk/fdimage/isoimage kernel +# bzdisk/fdimage/hdimage/isoimage kernel FDARGS = -# Set this if you want an initrd included with the -# bzdisk/fdimage/isoimage kernel +# Set this if you want one or more initrds included in the image FDINITRD = -image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) +imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ +targets += mtools.conf + +# genimage.sh requires bash, but it also has a bunch of other +# external dependencies. quiet_cmd_genimage = GENIMAGE $3 -cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ - $(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD) +cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ + $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD) -PHONY += bzdisk fdimage fdimage144 fdimage288 isoimage bzlilo install +PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install # This requires write access to /dev/fd0 -bzdisk: $(obj)/bzImage $(obj)/mtools.conf +# All images require syslinux to be installed; hdimage also requires +# EDK2/OVMF if the kernel is compiled with the EFI stub. +bzdisk: $(imgdeps) $(call cmd,genimage,bzdisk,/dev/fd0) -# These require being root or having syslinux 2.02 or higher installed -fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf +fdimage fd
[PATCH 0/3] x86 disk image and modules initramfs generation
From: "H. Peter Anvin" (Intel) When compiling on a different machine than the runtime target, including but not limited to simulators, it is rather handy to be able to produce a bootable image. The scripts for that in x86 are relatively old, and assume a BIOS system. This adds a build target to generate a hdimage which can be booted either from BIOS or EFI, and modernizes the genimage.sh script including adding the ability to add an arbitrary number of initramfs files (limited only by the length of the command line.) Possibly more controversial, at least from a Kbuild design perspective (as usual I'm the guy who wants to do something with Kbuild which it seems it was never really designed to do), is add the ability to create an initramfs image which includes all the built modules. Some distributions cannot be easily booted without modules in initramfs, and this creates an image which can be added to initramfs to provide the kernel modules, as finalized by "make modules_install". The final patch put these two together, and allows the modules initramfs to be included in the x86 boot image. Makefile | 17 ++- arch/x86/Makefile| 8 +- arch/x86/boot/.gitignore | 1 + arch/x86/boot/Makefile | 55 + arch/x86/boot/genimage.sh| 284 +++ arch/x86/boot/mtools.conf.in | 3 + usr/.gitignore | 3 + usr/Kconfig | 31 ++--- usr/Makefile | 39 +- 9 files changed, 318 insertions(+), 123 deletions(-)
[PATCH 3/3] x86/boot: Add option to add modules.img to {fd,hd,iso}image
From: "H. Peter Anvin" (Intel) Make it easy to generate a disk image which includes the all-modules initramfs image. Signed-off-by: H. Peter Anvin (Intel) --- arch/x86/Makefile | 3 ++- arch/x86/boot/Makefile | 11 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 943f26a32834..74f4e66568d7 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -309,7 +309,8 @@ define archhelp echo ' isoimage- Create a boot CD-ROM image (arch/x86/boot/image.iso)' echo 'bzdisk/fdimage*/hdimage/isoimage also accept:' echo 'FDARGS="..." arguments for the booted kernel' - echo 'FDINITRD=file initrd for the booted kernel' + echo 'FDINITRD=file initrd for the booted kernel' + echo 'FDMODS=1 to include all modules as an initrd' echo '' echo ' kvm_guest.config- Enable Kconfig items for running this kernel as a KVM guest' echo ' xen.config - Enable Kconfig items for running this kernel as a Xen guest' diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index dfbc26a8e924..601ade7adc70 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -119,9 +119,20 @@ $(obj)/compressed/vmlinux: FORCE FDARGS = # Set this if you want one or more initrds included in the image FDINITRD = +# Set this to 1 if you want usr/modules.img included in the image +FDMODS = imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh +ifneq ($(FDMODS),) +imgdeps += $(objtree)/usr/modules.img +FDINITRD += $(objtree)/usr/modules.img + +$(objtree)/usr/modules.img: + $(Q)$(MAKE) -f $(srctree)/Makefile usr/modules.img +KBUILD_MODULES := 1 +endif + $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ -- 2.30.2
Re: [RFC PATCH 1/2] Documentation/admin-guide: README & svga: remove use of "rdev"
On 2020-08-31 22:38, Randy Dunlap wrote: > > --- linux-next-20200828.orig/Documentation/admin-guide/svga.rst > +++ linux-next-20200828/Documentation/admin-guide/svga.rst > @@ -12,7 +12,7 @@ Intro > This small document describes the "Video Mode Selection" feature which > allows the use of various special video modes supported by the video BIOS. > Due > to usage of the BIOS, the selection is limited to boot time (before the > -kernel decompression starts) and works only on 80X86 machines. > +kernel decompression starts) and works only on 32-bit 80X86 machines. > Incorrect. What controls if this is available is whether or not the kernel is booted through BIOS firmware (as opposed to UEFI, kexec, etc.) -hpa
Re: [PATCH] Documentation/x86/boot.rst: minor improvement
If you are going to fix the language... On 2020-08-31 22:25, Cao jin wrote: > Sorry, I mis-copied 2 addresses. make sure they are CCed. > > On 9/1/20 11:41 AM, Cao jin wrote: >> Typo fix & file name update >> >> Signed-off-by: Cao jin >> --- >> Documentation/x86/boot.rst | 4 ++-- >> 1 file changed, 2 insertions(+), 2 deletions(-) >> >> diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst >> index 7fafc7ac00d7..c04afec90486 100644 >> --- a/Documentation/x86/boot.rst >> +++ b/Documentation/x86/boot.rst >> @@ -1379,7 +1379,7 @@ can be calculated as follows:: >> In addition to read/modify/write the setup header of the struct >> boot_params as that of 16-bit boot protocol, the boot loader should >> also fill the additional fields of the struct boot_params as described >> -in zero-page.txt. >> +in zero-page.rst. >> >> After setting up the struct boot_params, the boot loader can load >> 64-bit kernel in the same way as that of 16-bit boot protocol, but >> @@ -1391,7 +1391,7 @@ In 64-bit boot protocol, the kernel is started by >> jumping to the >> >> At entry, the CPU must be in 64-bit mode with paging enabled. (Paging enabled is redundant here.) >> The range with setup_header.init_size from start address of loaded >> -kernel and zero page and command line buffer get ident mapping; >> +kernel and zero page and command line buffer get identity mapping; The range with setup_header.init_size from start address of the loaded kernel, the zero page, and the command line buffer get identity-mapped, anda GDT must be loaded with the descriptors for selectors __BOOT_CS(0x10) and __BOOT_DS(0x18): both descriptors must be 4G flat segment with __BOOT_CS having execute/read permission and __BOOT_DS... Also, it might be useful to take a look to see if other data structures, like setup_data and the initrd also need to be in the identity map. -hpa
Re: [PATCH] [v2] Documentation: clarify driver licensing rules
On 2020-08-14 07:56, Dave Hansen wrote: > > From: Dave Hansen > > Greg has challenged some recent driver submitters on their license > choices. He was correct to do so, as the choices in these instances > did not always advance the aims of the submitters. > > But, this left submitters (and the folks who help them pick licenses) > a bit confused. They have read things like > Documentation/process/license-rules.rst which says: > > individual source files can have a different license > which is required to be compatible with the GPL-2.0 > > and Documentation/process/submitting-drivers.rst: > > We don't insist on any kind of exclusive GPL licensing, > and if you wish ... you may well wish to release under > multiple licenses. > > As written, these appear a _bit_ more laissez faire than we've been in > practice lately. It sounds like we at least expect submitters to make > a well-reasoned license choice and to explain their rationale. It does > not appear that we blindly accept anything that is simply > GPLv2-compatible. > > Drivers appear to be the most acute source of misunderstanding, so fix > the driver documentation first. Update it to clarify expectations. > Well written! Retroactive Ack from me :) -hpa
Re: [PATCH v2] x86: Use xorl %0,%0 in __get_user_asm
On 2020-08-27 11:09, Uros Bizjak wrote: > xorl %0,%0 is equivalent to xorq %0,%0 as both will zero the > entire register. Use xorl %0,%0 for all operand sizes to avoid > REX prefix byte when legacy registers are used and to avoid size > prefix byte when 16bit registers are used. > > Zeroing the full register is OK in this use case. xorl %0,%0 also > breaks register dependency chains, avoiding potential partial > register stalls with 8 and 16bit operands. > > The patch lowers the size of .fixup section by 20 bytes. > > Changes since v1: > - Rewrite commit message. > > Signed-off-by: Uros Bizjak > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: Borislav Petkov > Cc: "H. Peter Anvin" Reviewed-by: H. Peter Anvin (Intel)
Re: [PATCH] x86: Use XORL r32,r32 in __get_user_asm
On 2020-08-27 09:49, Uros Bizjak wrote: > Use XORL r32,r32 for all operand sizes. x86_64 zero extends 32bit > operations, so for 64bit operands, XORL r32,r32 is functionally > equal to XORL r64,r64, but avoids a REX prefix byte when legacy > registers are used. Please make it clear that this refers specifically to the use case of both registers being the same, for zeroing. This could otherwise be misread. "xorl r64,r64" is nonsensical: you're referring to xorq. The apparent visual mix here between Intel and gas syntax is also confusing. I would explicily say, using gcc inline syntax: xorl %0,%0 is equivalent to xorq %0,%0 as both will zero the entire register. > 32bit operation also avoids 0x66 size prefix for 16bit operands > and REX prefix when %sil, %dil and %bpl 8bit registers are used. > > As a bonus, 32bit XORL breaks register dependency chains, avoiding > potential partial register stalls with 8 and 16bit operands. Please make it clear that zeroing the full register is OK in this use case (which it is.) > The patch lowers the size of .fixup section by 20 bytes. -hpa
Re: [REGRESSION] x86/cpu fsgsbase breaks TLS in 32 bit rr tracees on a 64 bit system
On 2020-08-24 14:10, Andy Lutomirski wrote: > > PTRACE_READ_SEGMENT_DESCRIPTOR to read a segment descriptor. > > PTRACE_SET_FS / PTRACE_SET_GS: Sets FS or GS and updates the base accordingly. > > PTRACE_READ_SEGMENT_BASE: pass in a segment selector, get a base out. > You would use this to populate the base fields. > > or perhaps a ptrace SETREGS variant that tries to preserve the old > base semantics and magically sets the bases to match the selectors if > the selectors are nonzero. > > Do any of these choices sound preferable to any of you? > My suggestion would be to export the GDT and LDT as a (readonly or mostly readonly) regset(s) rather than adding entirely new operations. We could allow the LDT and the per-thread GDT entries to be written, subject to the same limitations as the corresponding system calls. -hpa
Re: [PATCH 0/4] -ffreestanding/-fno-builtin-* patches
On 2020-08-18 13:58, Nick Desaulniers wrote: > On Tue, Aug 18, 2020 at 1:27 PM Nick Desaulniers > wrote: >> >> On Tue, Aug 18, 2020 at 1:24 PM Arvind Sankar wrote: >>> >>> On Tue, Aug 18, 2020 at 12:13:22PM -0700, Linus Torvalds wrote: >>>> On Tue, Aug 18, 2020 at 12:03 PM H. Peter Anvin wrote: >>>>> >>>>> I'm not saying "change the semantics", nor am I saying that playing >>>>> whack-a-mole *for a limited time* is unreasonable. But I would like to go >>>>> back >>>>> to the compiler authors and get them to implement such a #pragma: "this >>>>> freestanding implementation *does* support *this specific library >>>>> function*, >>>>> and you are free to call it." >>>> >>>> I'd much rather just see the library functions as builtins that always >>>> do the right thing (with the fallback being "just call the standard >>>> function"). >>>> >>>> IOW, there's nothing wrong with -ffreestanding if you then also have >>>> __builtin_memcpy() etc, and they do the sane compiler optimizations >>>> for memcpy(). >>>> >>>> What we want to avoid is the compiler making *assumptions* based on >>>> standard names, because we may implement some of those things >>>> differently. >>>> >>> >>> -ffreestanding as it stands today does have __builtin_memcpy and >>> friends. But you need to then use #define memcpy __builtin_memcpy etc, >>> which is messy and also doesn't fully express what you want. #pragma, or >>> even just allowing -fbuiltin-foo options would be useful. > > I do really like the idea of -fbuiltin-foo. For example, you'd specify: > > -ffreestanding -fbuiltin-bcmp > > as an example. `-ffreestanding` would opt you out of ALL libcall > optimizations, `-fbuiltin-bcmp` would then opt you back in to > transforms that produce bcmp. That way you're informing the compiler > more precisely about the environment you'd be targeting. It feels > symmetric to existing `-fno-` flags (clang makes -f vs -fno- pretty > easy when there is such symmetry). And it's already convention that > if you specify multiple conflicting compiler flags, then the latter > one specified "wins." In that sense, turning back on specific > libcalls after disabling the rest looks more ergonomic to me. > > Maybe Eli or David have thoughts on why that may or may not be as > ergonomic or possible to implement as I imagine? > I would prefer this to be a #pragma for a header file, rather than having a very long command line for everything... -hpa
Re: [PATCH 0/4] -ffreestanding/-fno-builtin-* patches
On 2020-08-18 10:56, Nick Desaulniers wrote: >> >> The problem here is twofold: >> >> 1. The user would be expected to know what kind of the optimizations the >> compiler can do on what function, which is private knowledge to the >> compiler. >> >> 2. The only way to override -fno-builtin is by a header file with macros >> overriding the function names with __builtin, but that doesn't tell the >> compiler proper anything about the execution environment. >> >> So the Right Thing is for the compiler authors to change the way >> -ffreestanding works. > > Sir, this is an Arby's > > There are things all across the compilation landscape that make we > want to pontificate or even throw a tantrum in an Arby's. Would I? > Well, no, I'm just trying to flip burgers or shovel the elephant > sh...or w/e they do at Arby's (I've never actually been; I detest > roast beef). > > Would it be interesting to have a way of opting in, as you describe, > such that your compiler knew exactly what kind of embedded environment > it was targeting? Maybe, but I'd argue that opting out is just the > other side of the same coin. Heads, I win; tails, you lose. That the > opt in or opt out list is shorter for a given project is not > particularly interesting. Should we change the semantics of a fairly > commonly used compiler flag that multiple toolchains are in agreement > of, then fix all of the breakage in all of the code that relied on > those semantics? I'm afraid that ship may have already > sailed...probably 20 or 30 years ago. > >> -ffreestanding means, by definition, that there >> are no library calls (other than libgcc or whatever else is supplied >> with the compiler) that the compiler can call. That is currently an >> all-or-nothing choice, or at least one choice per C standard implemented. > > Yes? > I'm not saying "change the semantics", nor am I saying that playing whack-a-mole *for a limited time* is unreasonable. But I would like to go back to the compiler authors and get them to implement such a #pragma: "this freestanding implementation *does* support *this specific library function*, and you are free to call it." The only way we can get what we really need from the compilers is by speaking up and requesting it, and we have done so very successfully recently; further back we tended to get a lot of language-lawyering, but these days both the gcc and the clang teams have been wonderfully responsive. -hpa
Re: [PATCH 0/4] -ffreestanding/-fno-builtin-* patches
On 2020-08-17 15:02, Nick Desaulniers wrote: > -ffreestanding typically inhibits "libcall optimizations" where calls to > certain library functions can be replaced by the compiler in certain > cases to calls to other library functions that may be more efficient. > This can be problematic for embedded targets that don't provide full > libc implementations. > > -ffreestanding inhibits all such optimizations, which is the safe > choice, but generally we want the optimizations that are performed. The > Linux kernel does implement a fair amount of libc routines. Instead of > -ffreestanding (which makes more sense in smaller images like kexec's > purgatory image), prefer -fno-builtin-* flags to disable the compiler > from emitting calls to functions which may not be defined. > > If you see a linkage failure due to a missing symbol that's typically > defined in a libc, and not explicitly called from the source code, then > the compiler may have done such a transform. You can either implement > such a function (ie. in lib/string.c) or disable the transform outright > via -fno-builtin-* flag (where * is the name of the library routine, ie. > -fno-builtin-bcmp). > This is arguably exactly the wrong way around. The way this *should* be done is by opt-in, not opt-out, which by almost definition ends up being a game of whack-a-mole, like in this case stpcpy(). Furthermore, it is unlikely that people will remember what options to flip when and if stpcpy() happens to be implemented in the kernel. The problem here is twofold: 1. The user would be expected to know what kind of the optimizations the compiler can do on what function, which is private knowledge to the compiler. 2. The only way to override -fno-builtin is by a header file with macros overriding the function names with __builtin, but that doesn't tell the compiler proper anything about the execution environment. So the Right Thing is for the compiler authors to change the way -ffreestanding works. -ffreestanding means, by definition, that there are no library calls (other than libgcc or whatever else is supplied with the compiler) that the compiler can call. That is currently an all-or-nothing choice, or at least one choice per C standard implemented. Instead, a compile job with -ffreestanding should be able to provide a list of standard C functions that the compiler may call, and thus the compiler actually can do the right thing depending on which exact functions it would consider calling. This list is probably most easily supplied in the form of a header file with #pragma directives. -hpa
Re: [PATCH 1/4] Makefile: add -fno-builtin-stpcpy
On 2020-08-17 15:02, Nick Desaulniers wrote: > LLVM implemented a recent "libcall optimization" that lowers calls to > `sprintf(dest, "%s", str)` where the return value is used to > `stpcpy(dest, str) - dest`. This generally avoids the machinery involved > in parsing format strings. This optimization was introduced into > clang-12. Because the kernel does not provide an implementation of > stpcpy, we observe linkage failures for almost all targets when building > with ToT clang. > > The interface is unsafe as it does not perform any bounds checking. > Disable this "libcall optimization" via `-fno-builtin-stpcpy`. > > Unlike > commit 5f074f3e192f ("lib/string.c: implement a basic bcmp") > which cited failures with `-fno-builtin-*` flags being retained in LLVM > LTO, that bug seems to have been fixed by > https://reviews.llvm.org/D71193, so the above sha can now be reverted in > favor of `-fno-builtin-bcmp`. > stpcpy() and (to a lesser degree) mempcpy() are fairly useful routines in general. Perhaps we *should* provide them? -hpa
Re: [PATCH] decompress_bunzip2: fix sizeof type in start_bunzip
On 2020-07-12 05:59, t...@redhat.com wrote: > From: Tom Rix > > clang static analysis flags this error > > lib/decompress_bunzip2.c:671:13: warning: Result of 'malloc' is converted > to a pointer of type 'unsigned int', which is incompatible with sizeof > operand type 'int' [unix.MallocSizeof] > bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); >^~~~ > > Reviewing the bunzip_data structure, the element dbuf is type > > /* Intermediate buffer and its size (in bytes) */ > unsigned int *dbuf, dbufSize; > > So change the type in sizeof to 'unsigned int' > You must be kidding. If you want to change it, change it to sizeof(bd->dbuf) instead, but this flag is at least in my opinion a total joke. For sizeof(int) != sizeof(unsigned int) is beyond bizarre, no matter how stupid the platform. -hpa
Re: [PATCH 09/16] initrd: remove the BLKFLSBUF call in handle_initrd
On 2020-06-15 05:53, Christoph Hellwig wrote: > BLKFLSBUF used to be overloaded for the ramdisk driver to free the whole > ramdisk, which was completely different behavior compared to all other > drivers. But this magic overload got removed in commit ff26956875c2 > ("brd: remove support for BLKFLSBUF"), so this call is entirely > pointless now. > > Signed-off-by: Christoph Hellwig Does *anyone* use initrd as opposed to initramfs anymore? It would seem like a good candidate for deprecation/removal. -hpa
Re: [PATCH] initrd: Remove erroneous comment
On 2020-06-22 14:01, Tom Rini wrote: > > I'm picky here because, well, there's a whole lot of moving parts in the > pre-kernel world. In a strict sense, "UEFI" doesn't do anything with > the kernel but based on hpa's comments I assume that at least the > in-kernel UEFI stub does what Documentation/x86/booting.rst suggests to > do and consumes initrd=/file just like "initrd /file" in extlinux.conf, > etc do. And since the EFI stub is cross-platform, it's worth noting > this too. > For what it's worth, normally boot loaders don't strip this from the kernel command line passed to the kernel, although there might be ones which do so. In general this is bad practice; it is better to let the initrd show in /proc/cmdline. -hpa
Re: [PATCH] initrd: Remove erroneous comment
On 2020-06-22 13:40, Tom Rini wrote: > On Mon, Jun 22, 2020 at 01:02:16PM -0700, ron minnich wrote: > >> The other thing you ought to consider fixing: >> initrd is documented as follows: >> >> initrd= [BOOT] Specify the location of the initial ramdisk >> >> for bootloaders only. >> >> UEFI consumes initrd from the command line as well. As ARM servers >> increasingly use UEFI, there may be situations in which the initrd >> option doesn't make its way to the kernel? I don't know, UEFI is such >> a black box to me. But I've seen this "initrd consumption" happen. >> >> Based on docs, and the growing use of bootloaders that are happy to >> consume initrd= and not pass it to the kernel, you might be better off >> trying to move to the new command line option anyway. >> >> IOW, this comment may not be what people want to see, but ... it might >> also be right. Or possibly changed to: >> >> /* >> * The initrd keyword is in use today on ARM, PowerPC, and MIPS. >> * It is also reserved for use by bootloaders such as UEFI and may >> * be consumed by them and not passed on to the kernel. >> * The documentation also shows it as reserved for bootloaders. >> * It is advised to move to the initrdmem= option whereever possible. >> */ > > Fair warning, one of the other hats I wear is the chief custodian of the > U-Boot project. > > Note that on most architectures in modern times the device tree is used > to pass in initrd type information and "initrd=" on the command line is > quite legacy. > > But what do you mean UEFI "consumes" initrd= ? It's quite expected that > when you configure grub/syslinux/systemd-boot/whatever via extlinux.conf > or similar with "initrd /some/file" something reasonable happens to > read that in to memory and pass along the location to Linux (which can > vary from arch to arch, when not using device tree). I guess looking at > Documentation/x86/boot.rst is where treating initrd= as a file that > should be handled and ramdisk_image / ramdisk_size set came from. I do > wonder what happens in the case of ARM/ARM64 + UEFI without device tree. > UEFI plus the in-kernel UEFI stub is, in some ways, a "bootloader" in the traditional sense. It is totally fair that we should update the documentation with this as a different case, though, because it is part of the kernel tree and so the kernel now has partial ownership of the namespace. I suggest "STUB" for "in-kernel firmware stub" for this purpose; no need to restrict it to a specific firmware for the purpose of namespace reservation. -hpa
Re: umip: AMD Ryzen 3900X, pagefault after emulate SLDT/SIDT instruction
On 2020-05-19 07:38, Andreas Rammhold wrote: > Hi, > > I've been running into a weird problem with UMIP on a current Ryzen > 3900x with kernel 5.6.11 where a process receives a page fault after the > kernel handled the SLDT (or SIDT) instruction (emulation). > > The program I am running is run through WINE in 32bit mode and tries to > figure out if it is running in a VMWare machine by comparing the results > of SLDT against well known constants (basically as shown in the > [example] linked below). > Extremely weird. What is it expecting to happen -- or rather, what do you *want* it to do? -hpa
Re: [PATCH] x86: Use INVPCID mnemonic in invpcid.h
Reviewed-by: H. Peter Anvin (Intel) On 2020-05-08 02:22, Uros Bizjak wrote: > Current minimum required version of binutils is 2.23, > which supports INVPCID instruction mnemonic. > > Replace the byte-wise specification of INVPCID with > this proper mnemonic. > > Signed-off-by: Uros Bizjak > CC: "H. Peter Anvin" > CC: Ingo Molnar > CC: Thomas Gleixner > --- > arch/x86/include/asm/invpcid.h | 7 ++- > 1 file changed, 2 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h > index 989cfa86de85..23749bbca0ad 100644 > --- a/arch/x86/include/asm/invpcid.h > +++ b/arch/x86/include/asm/invpcid.h > @@ -12,12 +12,9 @@ static inline void __invpcid(unsigned long pcid, unsigned > long addr, >* stale TLB entries and, especially if we're flushing global >* mappings, we don't want the compiler to reorder any subsequent >* memory accesses before the TLB flush. > - * > - * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and > - * invpcid (%rcx), %rax in long mode. >*/ > - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" > - : : "m" (desc), "a" (type), "c" () : "memory"); > + asm volatile ("invpcid %1, %0" > + : : "r" (type), "m" (desc) : "memory"); > } > > #define INVPCID_TYPE_INDIV_ADDR 0 >
Re: [PATCH] x86: bitops: fix build regression
On 2020-05-08 10:21, Nick Desaulniers wrote: >> >> One last suggestion. Add the "b" modifier to the mask operand: "orb >> %b1, %0". That forces the compiler to use the 8-bit register name >> instead of trying to deduce the width from the input. > > Ah right: > https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#x86Operandmodifiers > > Looks like that works for both compilers. In that case, we can likely > drop the `& 0xff`, too. Let me play with that, then I'll hopefully > send a v3 today. > Good idea. I requested a while ago that they document these modifiers; they chose not to document them all which in some ways is good; it shows what they are willing to commit to indefinitely. -hpa
Re: [tip: x86/urgent] x86/CPU/AMD: Clear RDRAND CPUID bit on AMD family 15h/16h
On 8/24/19 11:19 AM, Pavel Machek wrote: > On Fri 2019-08-23 01:10:49, tip-bot2 for Tom Lendacky wrote: >> The following commit has been merged into the x86/urgent branch of tip: >> >> Commit-ID: c49a0a80137c7ca7d6ced4c812c9e07a949f6f24 >> Gitweb: >> https://git.kernel.org/tip/c49a0a80137c7ca7d6ced4c812c9e07a949f6f24 >> Author:Tom Lendacky >> AuthorDate:Mon, 19 Aug 2019 15:52:35 >> Committer: Borislav Petkov >> CommitterDate: Mon, 19 Aug 2019 19:42:52 +02:00 >> >> x86/CPU/AMD: Clear RDRAND CPUID bit on AMD family 15h/16h >> >> There have been reports of RDRAND issues after resuming from suspend on >> some AMD family 15h and family 16h systems. This issue stems from a BIOS >> not performing the proper steps during resume to ensure RDRAND continues >> to function properly. > > There are quite a few unanswered questions here. > > a) Is there/should there be CVE for this? > > b) Can we perform proper steps in kernel, thus making RDRAND usable > even when BIOS is buggy? > The kernel should at least be able to set its internal "CPUID" bit, visible through /proc/cpuinfo. -hpa
Re: [PATCH 1/1] x86/boot: clear some fields explicitly
On 7/25/19 3:03 PM, Thomas Gleixner wrote: > On Thu, 25 Jul 2019, h...@zytor.com wrote: >> On July 25, 2019 2:48:30 PM PDT, Thomas Gleixner wrote: >>> >>> But seriously I think it's not completely insane what they are doing >>> and the table based approach is definitely more readable and maintainable >>> than the existing stuff. >> >> Doing this table based does seem like a good idea. > > The question is whether we use a 'toclear' table or a 'preserve' table. I'd > argue that the 'preserve' approach is saner. > I agree. >> Sent from my Android device with K-9 Mail. Please excuse my brevity. > > I surely excuse the brevity, but the formatting mess which that brevity app > creates is not excusable. > I'll try to improve it... -hpa
Re: [PATCH 1/1] x86/boot: clear some fields explicitly
On 7/25/19 12:22 AM, Thomas Gleixner wrote: >> >> The problem with this is that it will break silently when changes are >> made to this structure. > > That's not really the worst problem. Changes to that struct which touch any > of the to be cleared ranges will break anyway if not handled correctly in > the sanitizer function. > Not really... that's kind of the point (the cleared ranges are cleared explicitly because the boot loader failed to do so, so zeroing them is what the boot loader should have done.) The most correct way to address this would be to have an explicit list of members to be *preserved* even if the sentinel triggers. The easy way would be to put in a suitable cast to clear the warning -- I would not be surprised if an explicit cast to something like (void *) would quiet the warning, or else (yuck) put in an explicit (well-commented) #pragma to shut it up. -hpa
Re: [PATCH RFC 1/2] x86/boot: Introduce the setup_header2
On 5/24/19 2:55 AM, Daniel Kiper wrote: > Due to limited space left in the setup header it was decided to > introduce the setup_header2. Its role is to communicate Linux kernel > supported features to the boot loader. Starting from now this is the > primary way to communicate things to the boot loader. > > Suggested-by: H. Peter Anvin > Signed-off-by: Daniel Kiper > Reviewed-by: Ross Philipson > Reviewed-by: Eric Snowberg > --- > I know that setup_header2 is not the best name. There were some > alternatives proposed like setup_header_extra, setup_header_addendum, > setup_header_more, ext_setup_header, extended_setup_header, extended_header > and extended_setup. Sadly, I am not happy with any of them. So, > leaving setup_header2 as is but still looking for better name. > Probably shorter == better... I would say kernel_info. The relationships between the headers are analogous to the various data sections: setup_header= .data boot_params/setup_data = .bss What is missing from the above list? That's right: kernel_info = .rodata We have been (ab)using .data for things that could go into .rodata or .bss for a long time, for lack of alternatives and -- especially early on -- intertia. Also, the BIOS stub is responsible for creating boot_params, so it isn't available to a BIOS-based loader (setup_data is, though.) setup_header is permanently limited to 144 bytes due to the reach of the 2-byte jump field, which doubles as a length field for the structure, combined with the size of the "hole" in struct boot_params that a protected-mode loader or the BIOS stub has to copy it into. It is currently 119 bytes long, which leaves us with 25 very precious bytes. This isn't something that can be fixed without revising the boot protocol entirely, breaking backwards compatibility. boot_params proper is limited to 4096 bytes, but can be arbitrarily extended by adding setup_data entries. It cannot be used to communicate properties of the kernel image, because it is .bss and has no image-provided content. kernel_info solves this by providing an extensible place for information about the kernel image. It is readonly, because the kernel cannot rely on a bootloader copying its contents anywhere, but that is OK; if it becomes necessary it can still contain data items that an enabled bootloader would be expected to copy into a setup_data chunk. ^ The above or some variant thereof may be a good thing to put both in your patch comments as well as in the boot protocol documentation. While we are making a change that bumps the version number anyway, there is another change I would like to make to the boot protocol which we might as well do at the same time. setup_data is a bit awkward to use for extremely large data objects, both because the setup_data header has to be adjacent to the data object, and because it has a 32-bit length field. However, it is important that intermediate stages of the boot process have a way to identify which chunks of memory are occupied by kernel data. Thus I think we should introduce a uniform way to specify such indirect data. We define a new setup_data type we can maybe call SETUP_INDIRECT; a SETUP_INDIRECT data item would be an array of structures of the form: struct setup_indirect { __u32 type; __u32 reserved; /* Reserved, must be set to zero */ __u64 len; __u64 addr; }; ... where type is itself simply a SETUP_* type -- although we probably don't want to let it be SETUP_INDIRECT itself since making it a tree structure could require a lot of stack space in something that needs to parse it, and stack space can be limited in boot contexts. This would be particularly useful for having SETUP_INITRAMFS, if it becomes desirable to allow the kernel to parse a non-contiguous set of memory regions for the initramfs. It might be a good idea to immediately start out struct kernel_info with either a high mark or a bitmask of SETUP_* types that the kernel supports. A bitmask would be more flexible, but would need provisions to be grown in the future. Which leads me to yet another thought. We probably want to make the contents of kernel_info a bit more structured to allow for content that may need to be extended in the future, or is inherently variable length (like strings.) This would lend itself to a structure such as: - Magic number - Length of total structure ... followed by a list of data chunks, each prefixed by a length field. The first data chunk would be the main (root) structure; other data structures are pointed to from the root structure using offsets from the beginning of the structure (the magic number field.) As an implementation detail, strings can of course be "pooled" into a single data chunk as long as they are zero-terminated. I have intentionally avoided specifying a type field for each data chunk; history s
Re: [PATCH v3 2/2] initramfs: introduce do_readxattrs()
On 5/17/19 2:02 PM, Arvind Sankar wrote: > On Fri, May 17, 2019 at 01:18:11PM -0700, h...@zytor.com wrote: >> >> Ok... I just realized this does not work for a modular initramfs, composed >> at load time from multiple files, which is a very real problem. Should be >> easy enough to deal with: instead of one large file, use one companion file >> per source file, perhaps something like filename..xattrs (suggesting double >> dots to make it less likely to conflict with a "real" file.) No leading dot, >> as it makes it more likely that archivers will sort them before the file >> proper. > This version of the patch was changed from the previous one exactly to deal > with this case -- > it allows for the bootloader to load multiple initramfs archives, each > with its own .xattr-list file, and to have that work properly. > Could you elaborate on the issue that you see? > Well, for one thing, how do you define "cpio archive", each with its own .xattr-list file? Second, that would seem to depend on the ordering, no, in which case you depend critically on .xattr-list file following the files, which most archivers won't do. Either way it seems cleaner to have this per file; especially if/as it can be done without actually mucking up the format. I need to run, but I'll post a more detailed explanation of what I did in a little bit. -hpa
Re: [PATCH v3 2/2] initramfs: introduce do_readxattrs()
On 5/17/19 1:18 PM, h...@zytor.com wrote: > > Ok... I just realized this does not work for a modular initramfs, composed at > load time from multiple files, which is a very real problem. Should be easy > enough to deal with: instead of one large file, use one companion file per > source file, perhaps something like filename..xattrs (suggesting double dots > to make it less likely to conflict with a "real" file.) No leading dot, as it > makes it more likely that archivers will sort them before the file proper. > > A side benefit is that the format can be simpler as there is no need to > encode the filename. > > A technically cleaner solution still, but which would need archiver > modifications, would be to encode the xattrs as an optionally nameless file > (just an empty string) with a new file mode value, immediately following the > original file. The advantage there is that the archiver itself could support > xattrs and other extended metadata (which has been requested elsewhere); the > disadvantage obviously is that that it requires new support in the archiver. > However, at least it ought to be simpler since it is still a higher protocol > level than the cpio archive itself. > > There's already one special case in cpio, which is the "!!!TRAILER!!!" > filename; although I don't think it is part of the formal spec, to the extent > there is one, I would expect that in practice it is always encoded with a > mode of 0, which incidentally could be used to unbreak the case where such a > filename actually exists. So one way to support such extended metadata would > be to set mode to 0 and use the filename to encode the type of metadata. I > wonder how existing GNU or BSD cpio (the BSD one is better maintained these > days) would deal with reading such a file; it would at least not be a > regression if it just read it still, possibly with warnings. It could also be > possible to use bits 17:16 in the mode, which are traditionally always zero > (mode_t being 16 bits), but I believe are present in most or all of the cpio > formats for historical reasons. It might be accepted better by existing > implementations to use one of these high bits combined with S_IFREG, I dont > know. > Correction: it's just !!!TRAILER!!!. I tested with GNU cpio, BSD cpio, scpio and pax. With a mode of 0: - GNU cpio errors, but extracts all the other files. - BSD cpio extracts them as regular files. - scpio and pax abort. With a mode of 0x18000 (bit 16 + S_IFREG), all of them happily extracted the data as regular files. -hpa
Re: Potentially missing "memory" clobbers in bitops.h for x86
On 3/29/19 2:09 PM, Paul E. McKenney wrote: >> >> Note: the atomic versions of these functions obviously need to have >> "volatile" and the clobber anyway, as they are by definition barriers >> and moving memory operations around them would be a very serious error. > > The atomic functions that return void don't need to order anything except > the input and output arguments. The oddness with clear_bit() is that the > memory changed isn't necessarily the quantity referenced by the argument, > if the number of bits specified is large. > > So (for example) atomic_inc() does not need a "memory" clobber, right? > I don't believe that is true: the code calling it has a reasonable expectation that previous memory operations have finished and later memory operations have not started from the point of view of another processor. You are more of an expert on memory ordering than I am, but I'm 89% sure that there is plenty of code in the kernel which makes that assumption. -hpa
Re: Potentially missing "memory" clobbers in bitops.h for x86
On 3/29/19 8:54 AM, Alexander Potapenko wrote: > >> Of course, this would force the compiler to actually compute the >> offset, which would slow things down. I have no idea whether this >> would be better or worse than just using the "memory" clobber. > Just adding the "memory" clobber to clear_bit() changes sizes of 5 > kernel functions (the three mentioned above, plus hub_activate() and > native_send_call_func_ipi()) by a small margin. > This probably means the performance impact of this clobber is > negligible in this case. I would agree with that. Could you perhaps verify whether or not any of the above functions contains a currently manifest bug? Note: the atomic versions of these functions obviously need to have "volatile" and the clobber anyway, as they are by definition barriers and moving memory operations around them would be a very serious error. -hpa
Re: [PATCH v2] tty/serial: Add a serial port simulator
Dumb question: this is basically a pty on steroids. Wouldn't this be better done by enhancing the pty devices? -0hpa
Re: [RFC] Provide in-kernel headers for making it easy to extend the kernel
On 3/6/19 3:37 PM, Daniel Colascione wrote: > > I just don't get the opposition to Joel's work. The rest of the thread > already goes into detail about the problems with pure-filesystem > solutions, and you and others are just totally ignoring those > well-thought-out rationales for the module approach and doing > inflooping on "lol just use a tarball". That's not productive. > You might think they are well thought out, but at least from what I can tell they seem completely spurious. -hpa
Re: [RFC] Provide in-kernel headers for making it easy to extend the kernel
On 3/6/19 3:37 PM, Daniel Colascione wrote: > > I just don't get the opposition to Joel's work. The rest of the thread > already goes into detail about the problems with pure-filesystem > solutions, and you and others are just totally ignoring those > well-thought-out rationales for the module approach and doing > inflooping on "lol just use a tarball". That's not productive. > > Look; here's the bottom line: without this work, doing certain kinds > of system tracing is a nightmare, and with this patch, it Just Works. > You're arguing that various tools should do a better job of keeping > the filesystem in sync with the kernel. Maybe you're right. But we > don't live in a world where they will, because if this coherence were > going to happen, it'd work already. But this work solves the problem: > by necessity, anything that changes a kernel image *must* update > modules coherently, whether the kernel image and module come from the > filesystem, network boot, some kind of SQL database, or carrier > pigeon. There's nothing wrong with work that very cheaply makes the > kernel self-describing (introspection is elegant) and that takes > advantage of *existing* kernel tooling infrastructure to transparently > do a new thing. > > You don't have to use this patch if you don't want to. Please stop > trying to block it. > No, that's not how this works. It is far worse to do something the wrong way than not doing it at all, when it affects the kernel-user space interactions. Experience -- and we have almost 30 years of it -- has shown that hacks of this nature become engrained and all of a sudden is "mandatory". At the *very least* it needs to comply with the zero-one-infinity rule rather than being yet another ad hoc hack. More fundamentally, we already have multiple ways to handle objects that need to go into the filesystem: they can be installed with (or as) modules, they can use the firmware interface, and so on. Saying "it can be a module" is worse than a copout: even if dynamically loaded -- and many setups lock out post-boot module loadings for security reasons -- there is nothing to cause it to unload. The bottom line is that in the end there is no difference between making this an archive of some sort and a module, except that to use the module you need privilege that you otherwise may not need. If your argument is that someone may not be providing the whole set of items provided by "make modules_install", what is there to say that they would include your specific module? Here are some better ways of implementation I can see: 1. Include an archive in "make modules_install". Most trivial; no kernel changes at all. 2. Generalize the initramfs code to be able to create a pre-populated tmpfs at any time, that can be populated from an archive provided by the firmware loading mechanism; like all firmware allows it to either be built in or fetched from the filesystem. This allows it to be built in to the kernel image if that becomes necessary; using tmpfs means that it can be pushed out to swap rather than permanently stored in kernel memory, and this filesystem can be unmounted freeing its memory. 3. Use a squashfs image instead of an archive. -hpa
Re: [RFC] Provide in-kernel headers for making it easy to extend the kernel
On 3/6/19 3:09 PM, Pavel Machek wrote: > On Fri 2019-01-18 17:55:43, Joel Fernandes wrote: >> From: "Joel Fernandes (Google)" >> >> Introduce in-kernel headers and other artifacts which are made available >> as an archive through proc (/proc/kheaders.tgz file). This archive makes >> it possible to build kernel modules, run eBPF programs, and other >> tracing programs that need to extend the kernel for tracing purposes >> without any dependency on the file system having headers and build >> artifacts. >> >> On Android and embedded systems, it is common to switch kernels but not >> have kernel headers available on the file system. Raw kernel headers >> also cannot be copied into the filesystem like they can be on other >> distros, due to licensing and other issues. There's no linux-headers > > If your licensing prevents you from having headers on the > filesystem... then I guess you should fix the licensing. > > I agree with Christoph, this looks pretty horrible. > Pavel > The argument that "it can be a module" is basically an admission of failure - if it isn't part of the kernel image itself there is no benefit over where the modules are stored, which will be *somewhere* in the filesystem. What I *do* think makes sense is to create an archive with this information and stuff it in the same place as the modules. It reduces the amount it is possible to muck it up. -hpa
Re: [PATCH] sched/x86: Save [ER]FLAGS on context switch
On 2/19/19 4:48 AM, Will Deacon wrote: > > I think you'll still hate this, but could we not disable preemption during > the uaccess-enabled region, re-enabling it on the fault path after we've > toggled uaccess off and disable it again when we return back to the > uaccess-enabled region? Doesn't help with tracing, but it should at least > handle the common case. > There is a worse problem with this, I still realize: this would mean blocking preemption across what could possibly be a *very* large copy_from_user(), for example. Exceptions *have* to handle this; there is no way around it. Perhaps the scheduler isn't the right place to put these kinds of asserts, either. Now, __fentry__ is kind of a special beast; in some ways it is an "exception implemented as a function call"; on x86 one could even consider using an INT instruction in order to reduce the NOP footprint in the unarmed case. Nor is __fentry__ a C function; it has far more of an exception-like ABI. *Regardless* of what else we do, I believe __fentry__ ought to save/disable/restore AC, just like an exception does. The idea of using s/a gcc plugin/objtool/ for this isn't really a bad idea. Obviously the general problem is undecidable :) but the enforcement of some simple, fairly draconian rules ("as tight as possible, but no tighter") shouldn't be a huge problem. An actual gcc plugin -- which would probably be quite complex -- could make gcc itself aware of user space accesses and be able to rearrange them to minimize STAC/CLAC and avoid kernel-space accesses inside those brackets. Finally, of course, there is the option of simply outlawing this practice as a matter of policy and require that all structures be accessed through a limited set of APIs. As I recall, the number of places where there were performance-critical regions which could not use the normal accessors are fairly small (signal frames being the main one.) Doing bulk copy to/from kernel memory and then accessing them from there would have some performance cost, but would eliminate the need for this complexity entirely. -hpa
Re: [PATCH] sched/x86: Save [ER]FLAGS on context switch
On 2/18/19 6:20 PM, Andy Lutomirski wrote: > > >> On Feb 18, 2019, at 4:24 PM, Linus Torvalds >> wrote: >> >>> On Mon, Feb 18, 2019 at 2:31 PM H. Peter Anvin wrote: >>> >>> The question is what "fix it" means. I'm really concerned about AC escapes, >>> and everyone else should be, too. >> >> I do think that it might be the right thing to do to add some kind of >> WARN_ON_ONCE() for AC being set in various can-reschedule situations. >> >> We'd just have to abstract it sanely. I'm sure arm64 has the exact >> same issue with PAN - maybe it saves properly, but the same "we >> wouldn't want to go through the scheduler with PAN clear". >> >> On x86, we might as well check DF at the same time as AC. >> > > hpa is right, though — calling into tracing code with AC set is not really so > good. And calling schedule() (via preempt_enable() or whatever) is also bad > because it runs all the scheduler code with AC on. Admittedly, the scheduler > is not *that* interesting of an attack surface. > Not just that, but the other question is just how much code we are running with AC open. It really should only be done in some very small regions. -hpa
Re: [PATCH] sched/x86: Save [ER]FLAGS on context switch
On 2/16/19 2:30 AM, Peter Zijlstra wrote: > On Fri, Feb 15, 2019 at 08:06:56PM -0800, h...@zytor.com wrote: >> This implies we invoke schedule -- a restricted operation (consider >> may_sleep) during execution of STAC-enabled code, but *not* as an >> exception or interrupt, since those preserve the flags. > > Meet preempt_enable(). I believe this falls under "doctor, it hurts when I do that." And it hurts for very good reasons. See below. >> I have serious concerns about this. This is more or less saying that >> we have left an unlimited gap and have had AC escape. > > Yes; by allowing normal C in between STAC and CLAC this is so. > >> Does *anyone* see a need to allow this? I got a question at LPC from >> someone about this, and what they were trying to do once all the >> layers had been unwound was so far down the wrong track for a root >> problem that actually has a very simple solution. > > Have you read the rest of the thread? > > All it takes for this to explode is a call to a C function that has > tracing on it in between the user_access_begin() and user_access_end() > things. That is a _very_ easy thing to do. > > Heck, GCC is allowed to generate that broken code compiling > lib/strnlen_user.c; it doesn't _have_ to inline do_strnlen_user (esp. > with CONFIG_OPTIMIZE_INLINING), and making that a function call would > get us fentry hooks and ftrace and *BOOM*. > > (Now, of course, its a static function with a single caller, and GCC > isn't _THAT_ stupid, but it could, if it wanted to) > > Since the bar is _that_ low for mistakes, I figure we'd better fix it. > The question is what "fix it" means. I'm really concerned about AC escapes, and everyone else should be, too. For an issue specific to tracing, it would be more appropriate to do the appropriate things in the tracing entry/exit than in schedule. Otherwise, we don't want to silently paper over mistakes which could mean that we run a large portion of the kernel without protection we thought we had. In that sense, calling schedule() with AC set is in fact a great place to have a WARN() or even BUG(), because such an event really could imply that the kernel has been security compromised. Does that make more sense? -hpa
Re: [PATCH 17/17] module: Prevent module removal racing with text_poke()
On 1/16/19 11:54 PM, Masami Hiramatsu wrote: > On Wed, 16 Jan 2019 16:32:59 -0800 > Rick Edgecombe wrote: > >> From: Nadav Amit >> >> It seems dangerous to allow code modifications to take place >> concurrently with module unloading. So take the text_mutex while the >> memory of the module is freed. > > At that point, since the module itself is removed from module list, > it seems no actual harm. Or would you have any concern? > The issue isn't the module list, but rather when it is safe to free the contents, so we don't clobber anything. We absolutely need to enforce that we can't text_poke() something that might have already been freed. That being said, we *also* really would prefer to enforce that we can't text_poke() memory that doesn't actually contain code; as far as I can tell we don't currently do that check. This, again, is a good use for a separate mm context. We can enforce that that context will only ever contain valid page mappings for actual code pages. (Note: in my proposed algorithm, with a separate mm, replace INVLPG with switching CR3 if we have to do a rollback or roll forward in the breakpoint handler.) -hpa
Re: [PATCH 17/17] module: Prevent module removal racing with text_poke()
On 1/17/19 10:07 AM, Nadav Amit wrote: >> On Jan 16, 2019, at 11:54 PM, Masami Hiramatsu wrote: >> >> On Wed, 16 Jan 2019 16:32:59 -0800 >> Rick Edgecombe wrote: >> >>> From: Nadav Amit >>> >>> It seems dangerous to allow code modifications to take place >>> concurrently with module unloading. So take the text_mutex while the >>> memory of the module is freed. >> >> At that point, since the module itself is removed from module list, >> it seems no actual harm. Or would you have any concern? > > So it appears that you are right and all the users of text_poke() and > text_poke_bp() do install module notifiers, and remove the module from their > internal data structure when they are done (*). As long as they prevent > text_poke*() to be called concurrently (e.g., using jump_label_lock()), > everything is fine. > > Having said that, the question is whether you “trust” text_poke*() users to > do so. text_poke() description does not day explicitly that you need to > prevent modules from being removed. > > What do you say? > Please make it explicit. -hpa
Re: [PATCH v3 0/6] Static calls
On 1/14/19 9:01 PM, H. Peter Anvin wrote: > > This could be as simple as spinning for a limited time waiting for > states 0 or 3 if we are not the patching CPU. It is also not necessary > to wait for the mask to become zero for the first sync if we find > ourselves suddenly in state 4. > So this would look something like this for the #BP handler; I think this is safe. This uses the TLB miss on the write page intentionally to slow down the loop a bit to reduce the risk of livelock. Note that "bp_write_addr" here refers to the write address for the breakpoint that was taken. state = atomic_read(_poke_state); if (state == 0) return 0; /* No patching in progress */ recheck: clear bit in mask switch (state) { case 1: case 4: if (smp_processor_id() != bp_patching_cpu) { int retries = NNN; while (retries--) { invlpg if (*bp_write_addr != 0xcc) goto recheck; state = atomic_read(_poke_state); if (state != 1 && state != 4) goto recheck; } } state = cmpxchg(_poke_state, 1, 4); if (state != 1 && state != 4) goto recheck; atomic_write(bp_write_addr, bp_old_value); break; case 2: if (smp_processor_id() != bp_patching_cpu) { invlpg state = atomic_read(_poke_state); if (state != 2) goto recheck; } complete patch sequence remove breakpoint break; case 3: case 0: /* * If we are here, the #BP will go away on its * own, or we will re-take it if it was a "real" * breakpoint. */ break; } return 1;
Re: [PATCH v3 0/6] Static calls
On 1/14/19 7:05 PM, Andy Lutomirski wrote: > On Mon, Jan 14, 2019 at 2:55 PM H. Peter Anvin wrote: >> >> I think this sequence ought to work (keep in mind we are already under a >> mutex, so the global data is safe even if we are preempted): > > I'm trying to wrap my head around this. The states are: > > 0: normal operation > 1: writing 0xcc, can be canceled > 2: writing final instruction. The 0xcc was definitely synced to all CPUs. > 3: patch is definitely installed but maybe not sync_cored. > 4: breakpoint has been canceled; need to redo patching. >> >> set up page table entries >> invlpg >> set up bp patching global data >> >> cpu = get_cpu() >> > So we're assuming that the state is > >> bp_old_value = atomic_read(bp_write_addr) >> >> do { > > So we're assuming that the state is 0 here. A WARN_ON_ONCE to check > that would be nice. The state here can be 0 or 4. >> atomic_write(_poke_state, 1) >> >> atomic_write(bp_write_addr, 0xcc) >> >> mask <- online_cpu_mask - self >> send IPIs >> wait for mask = 0 >> >> } while (cmpxchg(_poke_state, 1, 2) != 1); >> >> patch sites, remove breakpoints after patching each one > > Not sure what you mean by patch *sites*. As written, this only > supports one patch site at a time, since there's only one > bp_write_addr, and fixing that may be complicated. Not fixing it > might also be a scalability problem. Fixing it isn't all that complicated; we just need to have a list of patch locations (which we need anyway!) and walk (or search) it instead of checking just one; I omitted that detail for simplicity. >> atomic_write(_poke_state, 3); >> >> mask <- online_cpu_mask - self >> send IPIs >> wait for mask = 0 >> >> atomic_write(_poke_state, 0); >> >> tear down patching global data >> tear down page table entries >> >> >> >> The #BP handler would then look like: >> >> state = cmpxchg(_poke_state, 1, 4); >> switch (state) { >> case 1: >> case 4: > > What is state 4? > >> invlpg >> cmpxchg(bp_write_addr, 0xcc, bp_old_value) I'm 85% sure that the cmpxchg here is actually unnecessary, an atomic_write() is sufficient. >> break; >> case 2: >> invlpg >> complete patch sequence >> remove breakpoint >> break; > > ISTM you might as well change state to 3 here, but it's arguably unnecessary. If and only if you have only one patch location you could, but again, unnecessary. >> case 3: >> /* If we are here, the #BP will go away on its own */ >> break; >> case 0: >> /* No patching in progress!!! */ >> return 0; >> } >> >> clear bit in mask >> return 1; >> >> The IPI handler: >> >> clear bit in mask >> sync_core /* Needed if multiple IPI events are chained */ > > I really like that this doesn't require fixups -- text_poke_bp() just > works. But I'm nervous about livelocks or maybe just extreme slowness > under nasty loads. Suppose some perf NMI code does a static call or > uses a static call. Now there's a situation where, under high > frequency perf sampling, the patch process might almost always hit the > breakpoint while in state 1. It'll get reversed and done again, and > we get stuck. It would be neat if we could get the same "no > deadlocks" property while significantly reducing the chance of a > rollback. This could be as simple as spinning for a limited time waiting for states 0 or 3 if we are not the patching CPU. It is also not necessary to wait for the mask to become zero for the first sync if we find ourselves suddenly in state 4. This wouldn't reduce the livelock probability to zero, but it ought to reduce it enough that if we really are under such heavy event load we may end up getting stuck in any number of ways... > This is why I proposed something where we try to guarantee forward > progress by making sure that any NMI code that might spin and wait for > other CPUs is guaranteed to eventually sync_core(), clear its bit, and > possibly finish a patch. But this is a bit gross. Yes, this gets really grotty and who knows how many code paths it would touch. -hpa
Re: [PATCH v3 0/6] Static calls
I think this sequence ought to work (keep in mind we are already under a mutex, so the global data is safe even if we are preempted): set up page table entries invlpg set up bp patching global data cpu = get_cpu() bp_old_value = atomic_read(bp_write_addr) do { atomic_write(_poke_state, 1) atomic_write(bp_write_addr, 0xcc) mask <- online_cpu_mask - self send IPIs wait for mask = 0 } while (cmpxchg(_poke_state, 1, 2) != 1); patch sites, remove breakpoints after patching each one atomic_write(_poke_state, 3); mask <- online_cpu_mask - self send IPIs wait for mask = 0 atomic_write(_poke_state, 0); tear down patching global data tear down page table entries The #BP handler would then look like: state = cmpxchg(_poke_state, 1, 4); switch (state) { case 1: case 4: invlpg cmpxchg(bp_write_addr, 0xcc, bp_old_value) break; case 2: invlpg complete patch sequence remove breakpoint break; case 3: /* If we are here, the #BP will go away on its own */ break; case 0: /* No patching in progress!!! */ return 0; } clear bit in mask return 1; The IPI handler: clear bit in mask sync_core /* Needed if multiple IPI events are chained */
Re: [PATCH v3 0/6] Static calls
So I was already in the middle of composing this message when Andy posted: > I don't even think this is sufficient. I think we also need everyone > who clears the bit to check if all bits are clear and, if so, remove > the breakpoint. Otherwise we have a situation where, if you are in > text_poke_bp() and you take an NMI (or interrupt or MCE or whatever) > and that interrupt then hits the breakpoint, then you deadlock because > no one removes the breakpoint. > > If we do this, and if we can guarantee that all CPUs make forward > progress, then maybe the problem is solved. Can we guarantee something > like all NMI handlers that might wait in a spinlock or for any other > reason will periodically check if a sync is needed while they're > spinning? So the really, really nasty case is when an asynchronous event on the *patching* processor gets stuck spinning on a resource which is unavailable due to another processor spinning on the #BP. We can disable interrupts, but we can't stop NMIs from coming in (although we could test in the NMI handler if we are in that condition and return immediately; I'm not sure we want to do that, and we still have to deal with #MC and what not.) The fundamental problem here is that we don't see the #BP on the patching processor, in which case we could simply complete the patching from the #BP handler on that processor. On 1/13/19 6:40 PM, H. Peter Anvin wrote: > On 1/13/19 6:31 PM, H. Peter Anvin wrote: >> >> static cpumask_t text_poke_cpumask; >> >> static void text_poke_sync(void) >> { >> smp_wmb(); >> text_poke_cpumask = cpu_online_mask; >> smp_wmb(); /* Should be optional on x86 */ >> cpumask_clear_cpu(_poke_cpumask, smp_processor_id()); >> on_each_cpu_mask(_poke_cpumask, text_poke_sync_cpu, NULL, false); >> while (!cpumask_empty(_poke_cpumask)) { >> cpu_relax(); >> smp_rmb(); >> } >> } >> >> static void text_poke_sync_cpu(void *dummy) >> { >> (void)dummy; >> >> smp_rmb(); >> cpumask_clear_cpu(_bitmask, smp_processor_id()); >> /* >> * We are guaranteed to return with an IRET, either from the >> * IPI or the #BP handler; this provides serialization. >> */ >> } >> > > The invariants here are: > > 1. The patching routine must set each bit in the cpumask after each event >that requires synchronization is complete. > 2. The bit can be (atomically) cleared on the target CPU only, and only in a >place that guarantees a synchronizing event (e.g. IRET) before it may >reaching the poked instruction. > 3. At a minimum the IPI handler and #BP handler needs to clear the bit. It >*is* also possible to clear it in other places, e.g. the NMI handler, if >necessary as long as condition 2 is satisfied. > OK, so with interrupts enabled *on the processor doing the patching* we still have a problem if it takes an interrupt which in turn takes a #BP. Disabling interrupts would not help, because but an NMI and #MC could still cause problems unless we can guarantee that no path which may be invoked by NMI/#MC can do text_poke, which seems to be a very aggressive assumption. Note: I am assuming preemption is disabled. The easiest/sanest way to deal with this might be to switch the IDT (or provide a hook in the generic exception entry code) on the patching processor, such that if an asynchronous event comes in, we either roll forward or revert. This is doable because the second sync we currently do is not actually necessary per the hardware guys. If we take that #BP during the breakpoint deployment phase -- that is, before the first sync has completed -- restore the previous value of the breakpoint byte. Upon return text_poke_bp() will then have to loop back to the beginning and do it again. If we take the #BP after that point, we can complete the patch in the normal manner, by writing the rest of the instruction and then removing the #BP. text_poke_bp() will complete the synchronization sequence on return, but if another processor is spinning and sees the breakpoint having been removed, it is good to go. Rignt now we do completely unnecessary setup and teardown of the PDT entries for each phase of the patching. This would have to be removed, so that the asynchronous event handler will always be able to do the roll forward/roll back as required. If this is unpalatable, the solution you touched on is probably also doable, but I need to think *really* carefully about the sequencing constraints, because now you also have to worry about events interrupting a patch in progress but not completed. It would however have the advantage that an arbitrary interrupt on the patching processor is unlikely to cause a rollback, and s
Re: [PATCH v3 0/6] Static calls
On 1/13/19 6:31 PM, H. Peter Anvin wrote: > > static cpumask_t text_poke_cpumask; > > static void text_poke_sync(void) > { > smp_wmb(); > text_poke_cpumask = cpu_online_mask; > smp_wmb(); /* Should be optional on x86 */ > cpumask_clear_cpu(_poke_cpumask, smp_processor_id()); > on_each_cpu_mask(_poke_cpumask, text_poke_sync_cpu, NULL, false); > while (!cpumask_empty(_poke_cpumask)) { > cpu_relax(); > smp_rmb(); > } > } > > static void text_poke_sync_cpu(void *dummy) > { > (void)dummy; > > smp_rmb(); > cpumask_clear_cpu(_bitmask, smp_processor_id()); > /* >* We are guaranteed to return with an IRET, either from the >* IPI or the #BP handler; this provides serialization. >*/ > } > The invariants here are: 1. The patching routine must set each bit in the cpumask after each event that requires synchronization is complete. 2. The bit can be (atomically) cleared on the target CPU only, and only in a place that guarantees a synchronizing event (e.g. IRET) before it may reaching the poked instruction. 3. At a minimum the IPI handler and #BP handler needs to clear the bit. It *is* also possible to clear it in other places, e.g. the NMI handler, if necessary as long as condition 2 is satisfied. -hpa
Re: [PATCH v3 0/6] Static calls
On 1/11/19 11:39 AM, Jiri Kosina wrote: > On Fri, 11 Jan 2019, h...@zytor.com wrote: > >> I still don't see why can't simply spin in the #BP handler until the >> patch is complete. > > I think this brings us to the already discussed possible deadlock, when > one CPU#0 is in the middle of text_poke_bp(), CPU#1 is spinning inside > spin_lock_irq*() and CPU#2 hits the breakpont while holding that very > 'lock'. > > Then we're stuck forever, because CPU#1 will never handle the pending > sync_core() IPI (it's not NMI). > > Or have I misunderstood what you meant? > OK, I was thinking about this quite a while ago, and even started hacking on it, but apparently I managed to forget some key details. Specifically, you do *not* want to use the acknowledgment of the IPI as the blocking condition, so don't use a waiting IPI. Instead, you want a CPU bitmap (or percpu variable) that the IPI handler clears. When you are spinning in the IPI handler, you *also* need to clear that bit. Doing so is safe even in the case of batched updates, because you are guaranteed to execute an IRET before you get to patched code. So the synchronization part of the patching routine becomes: static cpumask_t text_poke_cpumask; static void text_poke_sync(void) { smp_wmb(); text_poke_cpumask = cpu_online_mask; smp_wmb(); /* Optional on x86 */ cpumask_clear_cpu(_poke_cpumask, smp_processor_id()); on_each_cpu_mask(_poke_cpumask, text_poke_sync_cpu, NULL, false); while (!cpumask_empty(_poke_cpumask)) { cpu_relax(); smp_rmb(); } } static void text_poke_sync_cpu(void *dummy) { (void)dummy; smp_rmb(); cpumask_clear_cpu(_bitmask, smp_processor_id()); /* * We are guaranteed to return with an IRET, either from the * IPI or the #BP handler; this provides serialization. */ } The spin routine then needs add a call to do something like this. By (optionally) not comparing to a specific breakpoint address we allow for batching, but we may end up spinning on a breakpoint that is not actually a patching breakpoint until the patching is done. int poke_int3_handler(struct pt_regs *regs) { /* In the current handler, but shouldn't be needed... */ smp_rmb(); if (likely(!atomic_read(bp_patching_in_progress))) return 0; if (user_mode(regs) unlikely(!user_mode(regs) && atomic_read(_patching_in_progress)) { text_poke_sync(); regs->ip--; return 1; /* May end up retaking the trap */ } else { return 0; } } Unless I'm totally mistaken, the worst thing that will happen with this code is that it may end up taking a harmless spurious IPI at a later time. -hpa
Re: [PATCH v3 0/6] Static calls
On 1/10/19 9:31 AM, Linus Torvalds wrote: > On Wed, Jan 9, 2019 at 2:59 PM Josh Poimboeuf wrote: >> >> NOTE: At least experimentally, the call destination writes seem to be >> atomic with respect to instruction fetching. On Nehalem I can easily >> trigger crashes when writing a call destination across cachelines while >> reading the instruction on other CPU; but I get no such crashes when >> respecting cacheline boundaries. > > I still doubt ifetch is atomic on a cacheline boundary for the simple > reason that the bus between the IU and the L1 I$ is narrower in older > CPU's. > As far as I understand, on P6+ (and possibly earlier, but I don't know) it is atomic on a 16-byte fetch datum, at least for Intel CPUs. However, single byte accesses are always going to be safe. -hpa
Re: [PATCH] x86/boot: clear rsdp address in boot_params for broken loaders
On 12/3/18 9:32 PM, Juergen Gross wrote: > > I'd like to send a followup patch doing that. And I'd like to not only > test sentinel for being non-zero, but all padding fields as well. This > should be 4.21 material, though. > No, you can't do that. That breaks backwards compatibility. -hpa
Re: [PATCH] x86/boot: clear rsdp address in boot_params for broken loaders
On 12/3/18 9:32 PM, Juergen Gross wrote: > > I'd like to send a followup patch doing that. And I'd like to not only > test sentinel for being non-zero, but all padding fields as well. This > should be 4.21 material, though. > No, you can't do that. That breaks backwards compatibility. -hpa
Re: Sleeping in user_access section
On 11/23/18 3:57 AM, Julien Thierry wrote: > > On x86, the EFLAGS.AC bit is also saved upon exception and I think it is > cleared upon exception entry so there is implicit exit from the > user_access mode when taking exception/interrupt. > No, it is restored, not cleared. In summary: on exceptions, user_access regions are suspended, and on return the user_access status is resumed. However, explicitly calling sleeping functions is not supported. -hpa
Re: Sleeping in user_access section
On 11/23/18 3:57 AM, Julien Thierry wrote: > > On x86, the EFLAGS.AC bit is also saved upon exception and I think it is > cleared upon exception entry so there is implicit exit from the > user_access mode when taking exception/interrupt. > No, it is restored, not cleared. In summary: on exceptions, user_access regions are suspended, and on return the user_access status is resumed. However, explicitly calling sleeping functions is not supported. -hpa
Re: [PATCH] x86/fpu: XRSTOR is expected to raise #GP
On 11/26/18 9:49 AM, Sebastian Andrzej Siewior wrote: > On 2018-11-26 18:27:06 [+0100], Jann Horn wrote: >> commit 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess >> fixups") incorrectly replaced the fixup entry for XSTATE_OP with a >> user-#PF-only fixup. However, XRSTOR can also raise #GP when the supplied >> address points to userspace memory. Change it back. > > The #GP is raised if the xstate content is invalid. But I guess the > details don't matter. > >> Reported-by: Sebastian Andrzej Siewior >> Fixes: 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess >> fixups") >> Signed-off-by: Jann Horn > Acked-by: Sebastian Andrzej Siewior > It does matter -- please correct the patch description, or we might have some serious confusion at some arbitrary point in the future with the result that the bug gets re-introduced; it would not be the first time. -hpa
Re: [PATCH] x86/fpu: XRSTOR is expected to raise #GP
On 11/26/18 9:49 AM, Sebastian Andrzej Siewior wrote: > On 2018-11-26 18:27:06 [+0100], Jann Horn wrote: >> commit 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess >> fixups") incorrectly replaced the fixup entry for XSTATE_OP with a >> user-#PF-only fixup. However, XRSTOR can also raise #GP when the supplied >> address points to userspace memory. Change it back. > > The #GP is raised if the xstate content is invalid. But I guess the > details don't matter. > >> Reported-by: Sebastian Andrzej Siewior >> Fixes: 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess >> fixups") >> Signed-off-by: Jann Horn > Acked-by: Sebastian Andrzej Siewior > It does matter -- please correct the patch description, or we might have some serious confusion at some arbitrary point in the future with the result that the bug gets re-introduced; it would not be the first time. -hpa
Re: [PATCH v5 02/10] x86/jump_label: Use text_poke_early() during early init
On 11/20/18 10:18 AM, Peter Zijlstra wrote: >> >> Can't we make this test in text_poke() directly, please? > > He does that in 9/10 iirc. > No, in 9/10 he does that change locally for the jump_label, but there is absolutely no reason not to do that test in text_poke() proper, and simply use text_poke() everywhere in the kernel. -hpa
Re: [PATCH v5 02/10] x86/jump_label: Use text_poke_early() during early init
On 11/20/18 10:18 AM, Peter Zijlstra wrote: >> >> Can't we make this test in text_poke() directly, please? > > He does that in 9/10 iirc. > No, in 9/10 he does that change locally for the jump_label, but there is absolutely no reason not to do that test in text_poke() proper, and simply use text_poke() everywhere in the kernel. -hpa
Re: [PATCH v5 02/10] x86/jump_label: Use text_poke_early() during early init
On 11/13/18 5:07 AM, Nadav Amit wrote: > There is no apparent reason not to use text_poke_early() while we are > during early-init and we do not patch code that might be on the stack > (i.e., we'll return to the middle of the patched code). This appears to > be the case of jump-labels, so do so. > > This is required for the next patches that would set a temporary mm for > patching, which is initialized after some static-keys are > enabled/disabled. > > Cc: Andy Lutomirski > Cc: Kees Cook > Cc: Dave Hansen > Cc: Masami Hiramatsu > Co-Developed-by: Peter Zijlstra > Signed-off-by: Nadav Amit > --- > arch/x86/kernel/jump_label.c | 7 ++- > 1 file changed, 6 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c > index aac0c1f7e354..ed5fe274a7d8 100644 > --- a/arch/x86/kernel/jump_label.c > +++ b/arch/x86/kernel/jump_label.c > @@ -52,7 +52,12 @@ static void __ref __jump_label_transform(struct jump_entry > *entry, > jmp.offset = jump_entry_target(entry) - >(jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); > > - if (early_boot_irqs_disabled) > + /* > + * As long as we're UP and not yet marked RO, we can use > + * text_poke_early; SYSTEM_BOOTING guarantees both, as we switch to > + * SYSTEM_SCHEDULING before going either. > + */ > + if (system_state == SYSTEM_BOOTING) > poker = text_poke_early; > > if (type == JUMP_LABEL_JMP) { > Can't we make this test in text_poke() directly, please? -hpa
Re: [PATCH v5 02/10] x86/jump_label: Use text_poke_early() during early init
On 11/13/18 5:07 AM, Nadav Amit wrote: > There is no apparent reason not to use text_poke_early() while we are > during early-init and we do not patch code that might be on the stack > (i.e., we'll return to the middle of the patched code). This appears to > be the case of jump-labels, so do so. > > This is required for the next patches that would set a temporary mm for > patching, which is initialized after some static-keys are > enabled/disabled. > > Cc: Andy Lutomirski > Cc: Kees Cook > Cc: Dave Hansen > Cc: Masami Hiramatsu > Co-Developed-by: Peter Zijlstra > Signed-off-by: Nadav Amit > --- > arch/x86/kernel/jump_label.c | 7 ++- > 1 file changed, 6 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c > index aac0c1f7e354..ed5fe274a7d8 100644 > --- a/arch/x86/kernel/jump_label.c > +++ b/arch/x86/kernel/jump_label.c > @@ -52,7 +52,12 @@ static void __ref __jump_label_transform(struct jump_entry > *entry, > jmp.offset = jump_entry_target(entry) - >(jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); > > - if (early_boot_irqs_disabled) > + /* > + * As long as we're UP and not yet marked RO, we can use > + * text_poke_early; SYSTEM_BOOTING guarantees both, as we switch to > + * SYSTEM_SCHEDULING before going either. > + */ > + if (system_state == SYSTEM_BOOTING) > poker = text_poke_early; > > if (type == JUMP_LABEL_JMP) { > Can't we make this test in text_poke() directly, please? -hpa
Re: [PATCH] x86/TSC: Use RDTSCP
On 11/19/18 11:52 AM, Andy Lutomirski wrote: > > I thought I benchmarked this on Intel at some point and found the > LFENCE;RDTSC variant to be slightly faster. But I believe you, so: > > Acked-by: Andy Lutomirski > As long as the difference isn't significant, the simplicity would seem to be a win. -hpa
Re: [PATCH] x86/TSC: Use RDTSCP
On 11/19/18 11:52 AM, Andy Lutomirski wrote: > > I thought I benchmarked this on Intel at some point and found the > LFENCE;RDTSC variant to be slightly faster. But I believe you, so: > > Acked-by: Andy Lutomirski > As long as the difference isn't significant, the simplicity would seem to be a win. -hpa
Re: PLEASE REVERT URGENTLY: Re: [PATCH v5 2/3] x86/boot: add acpi rsdp address to setup_header
On 11/10/18 1:03 AM, Juergen Gross wrote: > > How would that help? The garabge data written could have the correct > terminal sentinel value by chance. > > That's why I re-used an existing field in setup_header (the version) to > let grub tell the kernel which part of setup_header was written by grub. > > That's the only way I could find to let the kernel distinguish between > garbage and actual data. There is plenty of space *before* the setup_header part of struct boot_params too -- look a the various __pad fields, especially (in your case), __pad3[16] and __pad4[116] would suit the bill just fine. >> It would be enormously helpful if you could find out any more details about >> exactly what they are doing to break things. > > That's easy: > > The memory layout is: > > 0x1f1 bytes of data, including the sentinel, the setup_header, and then > more data. > > grub did read the kernel's setup_header in the correct size into its > buffer (which contains random garbage before that), intializes the first > 0x1f1 including the sentinel byte, and then writes back the buffer, but > using a too large length for that. Are you kidding me... it really overwrites it with completely random data, and not simply overspilling contents of the file? In that case it might not be possible (or desirable) to use those N bytes following the setup_heaader, or we need to a bigger sentinel than one byte (probability being what it is, 256^n gets to be a pretty big number for any n, very quickly drowning in the noise compared to other potential sources of boot failures, and most likely less fatal than most.) How big is this garbage dump? I'm going to brave a guess it is 512 bytes. In that case this is hardly a big deal: the E820 map begins at 0x290, and the setup_header maximum goes to 0x280, so it is only 15 bytes lost. If it is worse than that, we would risk losing __pad8[48] and __pad9[276], and especially the latter would be painful. In those case perhaps we should use 0x281..0x290 as a 15-byte sentinel; that is going to be virtually foolproof. I'm also thinking that it might be desirable to add a flags field (__pad2 would be ideal) to struct boot_params; it would let us recycle some of the obsolete fields (hd0_info, hd1_info, sys_desc_table, olpc_ofw_header, ...) and perhaps be able to add some more robustness against these sort of things. This would be the right way to do what your version feedback mechanism would do. The reason why the feedback mechanism is fundamentally broken is that it only gives the boot loader a way to assert that it supports a certain version of the protocol, but it doesn't say *which* bootloader does such an assert -- and therefore it is still wide open to implementation error. We do, in fact, already have a feedback mechanism: the bootloader ID and bootloader version. One way we could deal with this problem is to bump the bootloader version reported by Grub, and add a quirk to the kernel that if the bootloader ID is Grub (7) and the version is less than a certain number, zero those fields. In fact, the more I think about it, this is what we should do. That being said, Grub really needs to be kept honest. They keep making both severe design (like refusing to use the BIOS and UEFI entry points provided by the kernel by default) and implementation errors, apparently without meaningful oversight. I really ask that the people more closely involved with Grub try to keep a closer eye on their code as it applies to Linux. -hpa
Re: PLEASE REVERT URGENTLY: Re: [PATCH v5 2/3] x86/boot: add acpi rsdp address to setup_header
On 11/10/18 1:03 AM, Juergen Gross wrote: > > How would that help? The garabge data written could have the correct > terminal sentinel value by chance. > > That's why I re-used an existing field in setup_header (the version) to > let grub tell the kernel which part of setup_header was written by grub. > > That's the only way I could find to let the kernel distinguish between > garbage and actual data. There is plenty of space *before* the setup_header part of struct boot_params too -- look a the various __pad fields, especially (in your case), __pad3[16] and __pad4[116] would suit the bill just fine. >> It would be enormously helpful if you could find out any more details about >> exactly what they are doing to break things. > > That's easy: > > The memory layout is: > > 0x1f1 bytes of data, including the sentinel, the setup_header, and then > more data. > > grub did read the kernel's setup_header in the correct size into its > buffer (which contains random garbage before that), intializes the first > 0x1f1 including the sentinel byte, and then writes back the buffer, but > using a too large length for that. Are you kidding me... it really overwrites it with completely random data, and not simply overspilling contents of the file? In that case it might not be possible (or desirable) to use those N bytes following the setup_heaader, or we need to a bigger sentinel than one byte (probability being what it is, 256^n gets to be a pretty big number for any n, very quickly drowning in the noise compared to other potential sources of boot failures, and most likely less fatal than most.) How big is this garbage dump? I'm going to brave a guess it is 512 bytes. In that case this is hardly a big deal: the E820 map begins at 0x290, and the setup_header maximum goes to 0x280, so it is only 15 bytes lost. If it is worse than that, we would risk losing __pad8[48] and __pad9[276], and especially the latter would be painful. In those case perhaps we should use 0x281..0x290 as a 15-byte sentinel; that is going to be virtually foolproof. I'm also thinking that it might be desirable to add a flags field (__pad2 would be ideal) to struct boot_params; it would let us recycle some of the obsolete fields (hd0_info, hd1_info, sys_desc_table, olpc_ofw_header, ...) and perhaps be able to add some more robustness against these sort of things. This would be the right way to do what your version feedback mechanism would do. The reason why the feedback mechanism is fundamentally broken is that it only gives the boot loader a way to assert that it supports a certain version of the protocol, but it doesn't say *which* bootloader does such an assert -- and therefore it is still wide open to implementation error. We do, in fact, already have a feedback mechanism: the bootloader ID and bootloader version. One way we could deal with this problem is to bump the bootloader version reported by Grub, and add a quirk to the kernel that if the bootloader ID is Grub (7) and the version is less than a certain number, zero those fields. In fact, the more I think about it, this is what we should do. That being said, Grub really needs to be kept honest. They keep making both severe design (like refusing to use the BIOS and UEFI entry points provided by the kernel by default) and implementation errors, apparently without meaningful oversight. I really ask that the people more closely involved with Grub try to keep a closer eye on their code as it applies to Linux. -hpa
Re: [PATCH 2/2] x86/ldt: Unmap PTEs for the slow before freeing LDT
On 10/23/18 9:31 AM, Kirill A. Shutemov wrote: > > It shouldn't be a particularly hot path anyway. > That's putting it mildly. -hpa
Re: [PATCH 2/2] x86/ldt: Unmap PTEs for the slow before freeing LDT
On 10/23/18 9:31 AM, Kirill A. Shutemov wrote: > > It shouldn't be a particularly hot path anyway. > That's putting it mildly. -hpa
Re: [PATCH stable v2 1/2] termios, tty/tty_baudrate.c: fix buffer overrun
On 10/23/18 09:02, h...@zytor.com wrote: >> >> As I think Al's big termios cleanups are going to be hitting Linus's >> tree soon, do you know how these patches interact with that? >> >> This patch seems like it will not, so I'll be glad to queue that up >> after my first round of patches get merged to Linus later this week, >> but >> the second one worries me. >> >> thanks, >> >> greg k-h > > I have been working with Al; we had approached much the same problems but > from different directions. Mine ended up being a bit more comprehensive as a > result, so I think we're going to end up using my code with Al's reviews. > > So bottom line is that it should be all good. > [Al: Feel free to yell at me if I got that wrong.] -hpa
Re: [PATCH stable v2 1/2] termios, tty/tty_baudrate.c: fix buffer overrun
On 10/23/18 09:02, h...@zytor.com wrote: >> >> As I think Al's big termios cleanups are going to be hitting Linus's >> tree soon, do you know how these patches interact with that? >> >> This patch seems like it will not, so I'll be glad to queue that up >> after my first round of patches get merged to Linus later this week, >> but >> the second one worries me. >> >> thanks, >> >> greg k-h > > I have been working with Al; we had approached much the same problems but > from different directions. Mine ended up being a bit more comprehensive as a > result, so I think we're going to end up using my code with Al's reviews. > > So bottom line is that it should be all good. > [Al: Feel free to yell at me if I got that wrong.] -hpa
Re: [PATCH RFC] x86: Don't include '-Wa,-' when building with Clang
On 10/23/18 11:40, Nick Desaulniers wrote: > On Mon, Oct 22, 2018 at 10:11 PM Nadav Amit wrote: >> >> at 5:37 PM, Nathan Chancellor wrote: >> >> Commit 77b0bf55bc67 ("kbuild/Makefile: Prepare for using macros in >> inline assembly code to work around asm() related GCC inlining bugs") >> added this flag to KBUILD_CFLAGS, where it works perfectly fine with >> GCC. However, when building with Clang, all of the object files compile >> fine but the build hangs indefinitely at init/main.o, right before the >> linking stage. Don't include this flag when building with Clang. >> >> The kernel builds and boots to a shell in QEMU with both GCC and Clang >> with this patch applied. >> >> Link: >> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FClangBuiltLinux%2Flinux%2Fissues%2F213data=02%7C01%7Cnamit%40vmware.com%7C871daebc2ca44947d28d08d638811fb5%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636758524579997650sdata=shuxW81QRrO3TSqbgf462wgZYdLeAKeQEdGRxmnUX30%3Dreserved=0 >> Signed-off-by: Nathan Chancellor >> --- >> >> The reason this patch is labeled RFC is while I can verify that this >> fixes the issue, I'm not entirely sure why the '-Wa,-' works for GCC >> and not Clang. I looked into what the flag means and I couldn't really >> find anything so I just assume it's taking input from stdin? The issue >> could stem from how GCC forks gas versus how Clang does it. If this >> isn't of concern and the maintainers are happy with this patch as is, >> feel free to take it. >> Perhaps someone could actually, you know, time the build and see how much -pipe actually matters, if at all? -hpa
Re: [PATCH RFC] x86: Don't include '-Wa,-' when building with Clang
On 10/23/18 11:40, Nick Desaulniers wrote: > On Mon, Oct 22, 2018 at 10:11 PM Nadav Amit wrote: >> >> at 5:37 PM, Nathan Chancellor wrote: >> >> Commit 77b0bf55bc67 ("kbuild/Makefile: Prepare for using macros in >> inline assembly code to work around asm() related GCC inlining bugs") >> added this flag to KBUILD_CFLAGS, where it works perfectly fine with >> GCC. However, when building with Clang, all of the object files compile >> fine but the build hangs indefinitely at init/main.o, right before the >> linking stage. Don't include this flag when building with Clang. >> >> The kernel builds and boots to a shell in QEMU with both GCC and Clang >> with this patch applied. >> >> Link: >> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FClangBuiltLinux%2Flinux%2Fissues%2F213data=02%7C01%7Cnamit%40vmware.com%7C871daebc2ca44947d28d08d638811fb5%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636758524579997650sdata=shuxW81QRrO3TSqbgf462wgZYdLeAKeQEdGRxmnUX30%3Dreserved=0 >> Signed-off-by: Nathan Chancellor >> --- >> >> The reason this patch is labeled RFC is while I can verify that this >> fixes the issue, I'm not entirely sure why the '-Wa,-' works for GCC >> and not Clang. I looked into what the flag means and I couldn't really >> find anything so I just assume it's taking input from stdin? The issue >> could stem from how GCC forks gas versus how Clang does it. If this >> isn't of concern and the maintainers are happy with this patch as is, >> feel free to take it. >> Perhaps someone could actually, you know, time the build and see how much -pipe actually matters, if at all? -hpa
Re: [PATCH] kvm/x86 : avoid shifting signed 32-bit value by 31 bits
On 10/15/18 10:23 AM, Paolo Bonzini wrote: > > Even for a value from a 32-bit register? That would be _BIT, which > doesn't exist. > Just use _BITUL(). gcc is smart enough to know that that the resulting value is representable in 32 bits. Or if you really care, submit a patch to create _BITU(), but I don't personally see much of a point. -hpa
Re: [PATCH] kvm/x86 : avoid shifting signed 32-bit value by 31 bits
On 10/15/18 10:23 AM, Paolo Bonzini wrote: > > Even for a value from a 32-bit register? That would be _BIT, which > doesn't exist. > Just use _BITUL(). gcc is smart enough to know that that the resulting value is representable in 32 bits. Or if you really care, submit a patch to create _BITU(), but I don't personally see much of a point. -hpa
Re: [PATCH] kvm/x86 : avoid shifting signed 32-bit value by 31 bits
On 10/7/18 6:04 PM, peng.h...@zte.com.cn wrote: \> > #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK(0xFF) > -#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK(1 << 31) > +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK(1UL << 31) >>> It is reasonable to change to unsigned, while not necessary to unsigned long? >>> AVIC_LOGICAL_ID_ENTRY_VALID_MASK is used in function avic_ldr_write. >>> here I think it doesn't matter if you use unsigned or unsigned long. Do you >>> have any suggestions? > >> In current case, AVIC_LOGICAL_ID_ENTRY_VALID_MASK is used to calculate >> the value of new_entry with type of u32. So the definition here is not >> harmful. > >> Also, I did a quick grep and found similar definition (1 << 31) is popular >> in the whole kernel tree. > >> The reason to make this change is not that strong to me. Would you >> minding sharing more reason behind this change? > oh, I'm just thinking logically, not more reason. The right way to do this would be to use the _BITUL() (or _BITULL()) macro. -hpa
Re: [PATCH] kvm/x86 : avoid shifting signed 32-bit value by 31 bits
On 10/7/18 6:04 PM, peng.h...@zte.com.cn wrote: \> > #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK(0xFF) > -#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK(1 << 31) > +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK(1UL << 31) >>> It is reasonable to change to unsigned, while not necessary to unsigned long? >>> AVIC_LOGICAL_ID_ENTRY_VALID_MASK is used in function avic_ldr_write. >>> here I think it doesn't matter if you use unsigned or unsigned long. Do you >>> have any suggestions? > >> In current case, AVIC_LOGICAL_ID_ENTRY_VALID_MASK is used to calculate >> the value of new_entry with type of u32. So the definition here is not >> harmful. > >> Also, I did a quick grep and found similar definition (1 << 31) is popular >> in the whole kernel tree. > >> The reason to make this change is not that strong to me. Would you >> minding sharing more reason behind this change? > oh, I'm just thinking logically, not more reason. The right way to do this would be to use the _BITUL() (or _BITULL()) macro. -hpa
Re: Insanely high baud rates
On 10/11/18 2:40 PM, Theodore Y. Ts'o wrote: > On Thu, Oct 11, 2018 at 07:14:30AM -0700, h...@zytor.com wrote: >>> >>> I mean - what is the baud rate of a pty ? >> >> Whatever the master wants it to be... > > I think Alan's point is that it is highly unlikely you would be able > to push the equivalent of 4 gbps through a PTY layer. The TTY later > was never engineered for those speeds, and the real question is --- > what's the point? That's what Ethernet is for. > I like to consider the far future; I think things like the Y2038 problem is a good example. Or, as I like to put it, "numbers are cheap, running out of numbers is expensive." Sounds like we have enough of a plan that we aren't completely stuck should it ever become an issue. It might never, of course. -hpa
Re: Insanely high baud rates
On 10/11/18 2:40 PM, Theodore Y. Ts'o wrote: > On Thu, Oct 11, 2018 at 07:14:30AM -0700, h...@zytor.com wrote: >>> >>> I mean - what is the baud rate of a pty ? >> >> Whatever the master wants it to be... > > I think Alan's point is that it is highly unlikely you would be able > to push the equivalent of 4 gbps through a PTY layer. The TTY later > was never engineered for those speeds, and the real question is --- > what's the point? That's what Ethernet is for. > I like to consider the far future; I think things like the Y2038 problem is a good example. Or, as I like to put it, "numbers are cheap, running out of numbers is expensive." Sounds like we have enough of a plan that we aren't completely stuck should it ever become an issue. It might never, of course. -hpa
Re: Insanely high baud rates
On 10/11/18 12:36 PM, Craig Milo Rogers wrote: > On 18.10.11, Alan Cox wrote: >> I mean - what is the baud rate of a pty ? > > Solaris made the distinction between B0, which means pty hangup mode, > and any other baud rate: > > https://docs.oracle.com/cd/E88353_01/html/E37851/pty-4d.html > > But... why not implement a pty bandwidth limitation layer? You say, I > need to justify it? It's for, uh... protecting the system from unrestricted > pty usage DOS attacks! Yeah. That's what it's for. B0 is hangup, that's not in question. -hpa
Re: Insanely high baud rates
On 10/11/18 12:36 PM, Craig Milo Rogers wrote: > On 18.10.11, Alan Cox wrote: >> I mean - what is the baud rate of a pty ? > > Solaris made the distinction between B0, which means pty hangup mode, > and any other baud rate: > > https://docs.oracle.com/cd/E88353_01/html/E37851/pty-4d.html > > But... why not implement a pty bandwidth limitation layer? You say, I > need to justify it? It's for, uh... protecting the system from unrestricted > pty usage DOS attacks! Yeah. That's what it's for. B0 is hangup, that's not in question. -hpa
Re: Insanely high baud rates
On 10/09/18 12:51, Willy Tarreau wrote: > On Tue, Oct 09, 2018 at 12:19:04PM -0700, H. Peter Anvin wrote: >> [Resending to a wider audience] >> >> In trying to get the termios2 interface actually implemented in glibc, >> the question came up if we will ever care about baud rates in excess of >> 4 Gbps, even in the relatively remote future. >> >> If this is something we care about *at all*, I would like to suggest >> that rather than defining yet another kernel interface, we steal some >> bits from the MSB of the speed fields, alternatively one of the c_cc >> bytes (all likearchitectures seem to have c_cc[18] free) or some field, >> if we can find them, in c_cflags, to indicate an exponent. >> >> With 5 bits from the top of the speed fields, the current values would >> be identical up to 248 Gbps, and values up to ~288 Pbps would be >> encodable ±2 ppb. >> >> In the short term, all we would have to do in the kernel would be >> erroring out on baud rates higher than 0x0fff (2^28-1 due to >> implicit one aliasing rhe first bit of a 5-bit exponent - less than 2^27 >> are functionally denorms.) However, I'd like to put the glibc >> infrastructure for this now if this is something we may ever be >> interested in. >> >> Thoughts? > > Just my two cents, maybe we can conclude that for now we don't care > thus don't implement anything, but that everything you identified as > a possible place to steal bits should be marked "reserved for future > use, must be sent as zero". This will leave you ample room later to > decide how to proceed (and maybe it will not be the bps that you'll > want to change but the number of lanes, or word size, or bit encoding, > especially at 4 Gbps). > Well, it would be nice to be able to pre-enable it in glibc as much as possible. What I'm thinking of doing is to use a 64-bit "baud_t" type in glibc, and reserve the upper 4 bits of the speed field as must be zero (which is de facto the case anyway.) In other to avoid a *huge* user space ABI versioning mess we need to be able to encode the baud rate inside a 32-bit speed_t in glibc, and given that I believe it would be a Very Nice Thing if we could squeeze the information into 32 bits on the kernel side as well. So reserving the upper 4 bits I think is The Right Thing. I think that is actually a null change. I'm not sure if it would be a good idea to make the kernel -EINVAL on currently-unused c_cc bytes or c_*flags; I can see pros and cons (the latter being in no small part that that is not legacy behavior.) -hpa
Re: Insanely high baud rates
On 10/09/18 12:51, Willy Tarreau wrote: > On Tue, Oct 09, 2018 at 12:19:04PM -0700, H. Peter Anvin wrote: >> [Resending to a wider audience] >> >> In trying to get the termios2 interface actually implemented in glibc, >> the question came up if we will ever care about baud rates in excess of >> 4 Gbps, even in the relatively remote future. >> >> If this is something we care about *at all*, I would like to suggest >> that rather than defining yet another kernel interface, we steal some >> bits from the MSB of the speed fields, alternatively one of the c_cc >> bytes (all likearchitectures seem to have c_cc[18] free) or some field, >> if we can find them, in c_cflags, to indicate an exponent. >> >> With 5 bits from the top of the speed fields, the current values would >> be identical up to 248 Gbps, and values up to ~288 Pbps would be >> encodable ±2 ppb. >> >> In the short term, all we would have to do in the kernel would be >> erroring out on baud rates higher than 0x0fff (2^28-1 due to >> implicit one aliasing rhe first bit of a 5-bit exponent - less than 2^27 >> are functionally denorms.) However, I'd like to put the glibc >> infrastructure for this now if this is something we may ever be >> interested in. >> >> Thoughts? > > Just my two cents, maybe we can conclude that for now we don't care > thus don't implement anything, but that everything you identified as > a possible place to steal bits should be marked "reserved for future > use, must be sent as zero". This will leave you ample room later to > decide how to proceed (and maybe it will not be the bps that you'll > want to change but the number of lanes, or word size, or bit encoding, > especially at 4 Gbps). > Well, it would be nice to be able to pre-enable it in glibc as much as possible. What I'm thinking of doing is to use a 64-bit "baud_t" type in glibc, and reserve the upper 4 bits of the speed field as must be zero (which is de facto the case anyway.) In other to avoid a *huge* user space ABI versioning mess we need to be able to encode the baud rate inside a 32-bit speed_t in glibc, and given that I believe it would be a Very Nice Thing if we could squeeze the information into 32 bits on the kernel side as well. So reserving the upper 4 bits I think is The Right Thing. I think that is actually a null change. I'm not sure if it would be a good idea to make the kernel -EINVAL on currently-unused c_cc bytes or c_*flags; I can see pros and cons (the latter being in no small part that that is not legacy behavior.) -hpa
Insanely high baud rates
[Resending to a wider audience] In trying to get the termios2 interface actually implemented in glibc, the question came up if we will ever care about baud rates in excess of 4 Gbps, even in the relatively remote future. If this is something we care about *at all*, I would like to suggest that rather than defining yet another kernel interface, we steal some bits from the MSB of the speed fields, alternatively one of the c_cc bytes (all likearchitectures seem to have c_cc[18] free) or some field, if we can find them, in c_cflags, to indicate an exponent. With 5 bits from the top of the speed fields, the current values would be identical up to 248 Gbps, and values up to ~288 Pbps would be encodable ±2 ppb. In the short term, all we would have to do in the kernel would be erroring out on baud rates higher than 0x0fff (2^28-1 due to implicit one aliasing rhe first bit of a 5-bit exponent – less than 2^27 are functionally denorms.) However, I'd like to put the glibc infrastructure for this now if this is something we may ever be interested in. Thoughts? -- Sent from my Android device with K-9 Mail. Please excuse my brevity.
Insanely high baud rates
[Resending to a wider audience] In trying to get the termios2 interface actually implemented in glibc, the question came up if we will ever care about baud rates in excess of 4 Gbps, even in the relatively remote future. If this is something we care about *at all*, I would like to suggest that rather than defining yet another kernel interface, we steal some bits from the MSB of the speed fields, alternatively one of the c_cc bytes (all likearchitectures seem to have c_cc[18] free) or some field, if we can find them, in c_cflags, to indicate an exponent. With 5 bits from the top of the speed fields, the current values would be identical up to 248 Gbps, and values up to ~288 Pbps would be encodable ±2 ppb. In the short term, all we would have to do in the kernel would be erroring out on baud rates higher than 0x0fff (2^28-1 due to implicit one aliasing rhe first bit of a 5-bit exponent – less than 2^27 are functionally denorms.) However, I'd like to put the glibc infrastructure for this now if this is something we may ever be interested in. Thoughts? -- Sent from my Android device with K-9 Mail. Please excuse my brevity.
Re: [PATCH stable v2 1/2] arch/alpha, termios: implement BOTHER, IBSHIFT and termios2
On 10/8/18 8:38 AM, Johan Hovold wrote: > On Sun, Oct 07, 2018 at 09:06:19PM -0700, H. Peter Anvin wrote: >> From: "H. Peter Anvin (Intel)" >> >> Alpha has had c_ispeed and c_ospeed, but still set speeds in c_cflags >> using arbitrary flags. Because BOTHER is not defined, the general >> Linux code doesn't allow setting arbitrary baud rates, and because >> CBAUDEX == 0, we can have an array overrun of the baud_rate[] table in >> drivers/tty/tty_baudrate.c if (c_cflags & CBAUD) == 037. >> >> Resolve both problems by #defining BOTHER to 037 on Alpha. >> >> However, userspace still needs to know if setting BOTHER is actually >> safe given legacy kernels (does anyone actually care about that on >> Alpha anymore?), so enable the TCGETS2/TCSETS*2 ioctls on Alpha, even >> though they use the same structure. Define struct termios2 just for >> compatibility; it is the exact same structure as struct termios. In a >> future patchset, this will be cleaned up so the uapi headers are >> usable from libc. > > Is this really needed? By defining BOTHER (and IBSHIFT which you forgot > to mention here) you are enabling arbitrary rates also through TCSETS on > alpha, right? > Yes, it's needed, not because the old ioctls won't work on NEW kernels, but because Alpha is so far behind the times, *and* the OLD kernels are severely broken if we pass BOTHER to them, we need a new ioctl number so we can guarantee that we won't do anything that user space doesn't intend; this is actually made far worse because if I read the code correctly, the kernel will still report back BOTHER and the speed field set on a legacy kernel in response to TCGETS, but the values will be completely bogus. This means that glibc will need a workaround for Alpha only, and the new ioctl numbers handles support for it. gcc should be able to fold the code together, since it should be able to detect that multiple branches of execution are otherwise identical. To micro-optimize, we could define TERMIOS_OLD as (CBAUDEX ? 8 : 0) in a future (non-stable) patch. We don't need to worry about it on PowerPC because PowerPC implemented this so long ago, before the current glibc support threshold. -hpa
Re: [PATCH stable v2 1/2] arch/alpha, termios: implement BOTHER, IBSHIFT and termios2
On 10/8/18 8:38 AM, Johan Hovold wrote: > On Sun, Oct 07, 2018 at 09:06:19PM -0700, H. Peter Anvin wrote: >> From: "H. Peter Anvin (Intel)" >> >> Alpha has had c_ispeed and c_ospeed, but still set speeds in c_cflags >> using arbitrary flags. Because BOTHER is not defined, the general >> Linux code doesn't allow setting arbitrary baud rates, and because >> CBAUDEX == 0, we can have an array overrun of the baud_rate[] table in >> drivers/tty/tty_baudrate.c if (c_cflags & CBAUD) == 037. >> >> Resolve both problems by #defining BOTHER to 037 on Alpha. >> >> However, userspace still needs to know if setting BOTHER is actually >> safe given legacy kernels (does anyone actually care about that on >> Alpha anymore?), so enable the TCGETS2/TCSETS*2 ioctls on Alpha, even >> though they use the same structure. Define struct termios2 just for >> compatibility; it is the exact same structure as struct termios. In a >> future patchset, this will be cleaned up so the uapi headers are >> usable from libc. > > Is this really needed? By defining BOTHER (and IBSHIFT which you forgot > to mention here) you are enabling arbitrary rates also through TCSETS on > alpha, right? > Yes, it's needed, not because the old ioctls won't work on NEW kernels, but because Alpha is so far behind the times, *and* the OLD kernels are severely broken if we pass BOTHER to them, we need a new ioctl number so we can guarantee that we won't do anything that user space doesn't intend; this is actually made far worse because if I read the code correctly, the kernel will still report back BOTHER and the speed field set on a legacy kernel in response to TCGETS, but the values will be completely bogus. This means that glibc will need a workaround for Alpha only, and the new ioctl numbers handles support for it. gcc should be able to fold the code together, since it should be able to detect that multiple branches of execution are otherwise identical. To micro-optimize, we could define TERMIOS_OLD as (CBAUDEX ? 8 : 0) in a future (non-stable) patch. We don't need to worry about it on PowerPC because PowerPC implemented this so long ago, before the current glibc support threshold. -hpa
Re: [PATCH v9 04/10] x86: refcount: prevent gcc distortions
On 10/04/18 13:29, Andy Lutomirski wrote: >> >> Wonderful. >> >> Here is the horrible code I mentioned yesterday. This is about >> implementing the immediate-patching framework that Linus and others have >> discussed (it helps both performance and kernel hardening): >> > > I'm wondering if a production version should look more like: > > patch_point: > mov $whatever, [target] > .pushsection ".immediate_patches" > .quad .Lpatch_point > .popsection > > and let objtool parse the resulting assembled output and emit an entry > in some table mapping patch_point to the actual address and size of > the immediate to be patched. > Putting the generation of the ersatz code in objtool is an interesting idea, although I'm wondering if these macros, as awful as they are, wouldn't have generic applicability (what they do is they allow the cold branch of an asm to push temporaries to the stack rather than having to have gcc clobber them.) I'll think about it. -hpa
Re: [PATCH v9 04/10] x86: refcount: prevent gcc distortions
On 10/04/18 13:29, Andy Lutomirski wrote: >> >> Wonderful. >> >> Here is the horrible code I mentioned yesterday. This is about >> implementing the immediate-patching framework that Linus and others have >> discussed (it helps both performance and kernel hardening): >> > > I'm wondering if a production version should look more like: > > patch_point: > mov $whatever, [target] > .pushsection ".immediate_patches" > .quad .Lpatch_point > .popsection > > and let objtool parse the resulting assembled output and emit an entry > in some table mapping patch_point to the actual address and size of > the immediate to be patched. > Putting the generation of the ersatz code in objtool is an interesting idea, although I'm wondering if these macros, as awful as they are, wouldn't have generic applicability (what they do is they allow the cold branch of an asm to push temporaries to the stack rather than having to have gcc clobber them.) I'll think about it. -hpa
Re: [PATCH 5/5] arch/xtensa, termios: use
On 10/04/18 15:35, Max Filippov wrote: > > Looks good. But why not removing the header entirely and adding > generic-y += termbits.h > to arch/xtensa/include/uapi/asm/Kbuild? > Good idea. Should do that for others that also have the same #include. -hpa
Re: [PATCH 5/5] arch/xtensa, termios: use
On 10/04/18 15:35, Max Filippov wrote: > > Looks good. But why not removing the header entirely and adding > generic-y += termbits.h > to arch/xtensa/include/uapi/asm/Kbuild? > Good idea. Should do that for others that also have the same #include. -hpa
[PATCH 3/5] arch/mips, termios: use
The MIPS definition of termbits.h is almost identical to the generic one, so use the generic one and only redefine a handful of constants. Move TIOCSER_TEMT to ioctls.h where it lives for all other architectures. Signed-off-by: H. Peter Anvin (Intel) Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: Philippe Ombredanne Cc: Greg Kroah-Hartman Cc: Kate Stewart Cc: Thomas Gleixner Cc: Cc: Greg Kroah-Hartman Cc: Jiri Slaby --- arch/mips/include/uapi/asm/ioctls.h | 2 + arch/mips/include/uapi/asm/termbits.h | 213 +++--- 2 files changed, 15 insertions(+), 200 deletions(-) diff --git a/arch/mips/include/uapi/asm/ioctls.h b/arch/mips/include/uapi/asm/ioctls.h index 890245a9f0c4..a4e11e1438b5 100644 --- a/arch/mips/include/uapi/asm/ioctls.h +++ b/arch/mips/include/uapi/asm/ioctls.h @@ -114,4 +114,6 @@ #define TIOCMIWAIT 0x5491 /* wait for a change on serial input line(s) */ #define TIOCGICOUNT0x5492 /* read serial port inline interrupt counts */ +#define TIOCSER_TEMT 0x01/* Transmitter physically empty */ + #endif /* __ASM_IOCTLS_H */ diff --git a/arch/mips/include/uapi/asm/termbits.h b/arch/mips/include/uapi/asm/termbits.h index dfeffba729b7..a08fa0a697f5 100644 --- a/arch/mips/include/uapi/asm/termbits.h +++ b/arch/mips/include/uapi/asm/termbits.h @@ -11,218 +11,31 @@ #ifndef _ASM_TERMBITS_H #define _ASM_TERMBITS_H -#include +#define NCCS 23 +#include -typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; - -/* - * The ABI says nothing about NCC but seems to use NCCS as - * replacement for it in struct termio - */ -#define NCCS 23 -struct termios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ -}; - -struct termios2 { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -struct ktermios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -/* c_cc characters */ -#define VINTR 0 /* Interrupt character [ISIG]. */ -#define VQUIT 1 /* Quit character [ISIG]. */ -#define VERASE 2 /* Erase character [ICANON]. */ -#define VKILL 3 /* Kill-line character [ICANON]. */ +/* c_cc characters that differ from the generic ABI */ +#undef VMIN #define VMIN4 /* Minimum number of bytes read at once [!ICANON]. */ -#define VTIME 5 /* Time-out value (tenths of a second) [!ICANON]. */ +#undef VEOL2 #define VEOL2 6 /* Second EOL character [ICANON]. */ -#define VSWTC 7 /* ??? */ -#define VSWTCH VSWTC -#define VSTART 8 /* Start (X-ON) character [IXON, IXOFF]. */ -#define VSTOP 9 /* Stop (X-OFF) character [IXON, IXOFF]. */ -#define VSUSP 10 /* Suspend character [ISIG]. */ -#if 0 -/* - * VDSUSP is not supported - */ -#define VDSUSP 11 /* Delayed suspend character [ISIG]. */ -#endif -#define VREPRINT 12 /* Reprint-line character [ICANON]. */ -#define VDISCARD 13 /* Discard character [IEXTEN]. */ -#define VWERASE14 /* Word-erase character [ICANON]. */ -#define VLNEXT 15 /* Literal-next character [IEXTEN]. */ +#undef VEOF #define VEOF 16 /* End-of-file character [ICANON]. */ +#undef VEOL #define VEOL 17 /* End-of-line character [ICANON]. */ -/* c_iflag bits */ -#define IGNBRK 001 /* Ignore break condition. */ -#define BRKINT 002 /* Signal interrupt on break. */ -#define IGNPAR 004 /* Ignore characters with parity errors. */ -#define
[PATCH 3/5] arch/mips, termios: use
The MIPS definition of termbits.h is almost identical to the generic one, so use the generic one and only redefine a handful of constants. Move TIOCSER_TEMT to ioctls.h where it lives for all other architectures. Signed-off-by: H. Peter Anvin (Intel) Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: Philippe Ombredanne Cc: Greg Kroah-Hartman Cc: Kate Stewart Cc: Thomas Gleixner Cc: Cc: Greg Kroah-Hartman Cc: Jiri Slaby --- arch/mips/include/uapi/asm/ioctls.h | 2 + arch/mips/include/uapi/asm/termbits.h | 213 +++--- 2 files changed, 15 insertions(+), 200 deletions(-) diff --git a/arch/mips/include/uapi/asm/ioctls.h b/arch/mips/include/uapi/asm/ioctls.h index 890245a9f0c4..a4e11e1438b5 100644 --- a/arch/mips/include/uapi/asm/ioctls.h +++ b/arch/mips/include/uapi/asm/ioctls.h @@ -114,4 +114,6 @@ #define TIOCMIWAIT 0x5491 /* wait for a change on serial input line(s) */ #define TIOCGICOUNT0x5492 /* read serial port inline interrupt counts */ +#define TIOCSER_TEMT 0x01/* Transmitter physically empty */ + #endif /* __ASM_IOCTLS_H */ diff --git a/arch/mips/include/uapi/asm/termbits.h b/arch/mips/include/uapi/asm/termbits.h index dfeffba729b7..a08fa0a697f5 100644 --- a/arch/mips/include/uapi/asm/termbits.h +++ b/arch/mips/include/uapi/asm/termbits.h @@ -11,218 +11,31 @@ #ifndef _ASM_TERMBITS_H #define _ASM_TERMBITS_H -#include +#define NCCS 23 +#include -typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; - -/* - * The ABI says nothing about NCC but seems to use NCCS as - * replacement for it in struct termio - */ -#define NCCS 23 -struct termios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ -}; - -struct termios2 { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -struct ktermios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -/* c_cc characters */ -#define VINTR 0 /* Interrupt character [ISIG]. */ -#define VQUIT 1 /* Quit character [ISIG]. */ -#define VERASE 2 /* Erase character [ICANON]. */ -#define VKILL 3 /* Kill-line character [ICANON]. */ +/* c_cc characters that differ from the generic ABI */ +#undef VMIN #define VMIN4 /* Minimum number of bytes read at once [!ICANON]. */ -#define VTIME 5 /* Time-out value (tenths of a second) [!ICANON]. */ +#undef VEOL2 #define VEOL2 6 /* Second EOL character [ICANON]. */ -#define VSWTC 7 /* ??? */ -#define VSWTCH VSWTC -#define VSTART 8 /* Start (X-ON) character [IXON, IXOFF]. */ -#define VSTOP 9 /* Stop (X-OFF) character [IXON, IXOFF]. */ -#define VSUSP 10 /* Suspend character [ISIG]. */ -#if 0 -/* - * VDSUSP is not supported - */ -#define VDSUSP 11 /* Delayed suspend character [ISIG]. */ -#endif -#define VREPRINT 12 /* Reprint-line character [ICANON]. */ -#define VDISCARD 13 /* Discard character [IEXTEN]. */ -#define VWERASE14 /* Word-erase character [ICANON]. */ -#define VLNEXT 15 /* Literal-next character [IEXTEN]. */ +#undef VEOF #define VEOF 16 /* End-of-file character [ICANON]. */ +#undef VEOL #define VEOL 17 /* End-of-line character [ICANON]. */ -/* c_iflag bits */ -#define IGNBRK 001 /* Ignore break condition. */ -#define BRKINT 002 /* Signal interrupt on break. */ -#define IGNPAR 004 /* Ignore characters with parity errors. */ -#define
[PATCH 5/5] arch/xtensa, termios: use
The Xtensa definition of termbits.h is identical to the generic one. Signed-off-by: H. Peter Anvin (Intel) Cc: Chris Zankel Cc: Max Filippov Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Philippe Ombredanne Cc: Kate Stewart Cc: Cc: Greg Kroah-Hartman Cc: Jiri Slaby --- arch/xtensa/include/uapi/asm/termbits.h | 222 +--- 1 file changed, 1 insertion(+), 221 deletions(-) diff --git a/arch/xtensa/include/uapi/asm/termbits.h b/arch/xtensa/include/uapi/asm/termbits.h index d4206a7c5138..3935b106de79 100644 --- a/arch/xtensa/include/uapi/asm/termbits.h +++ b/arch/xtensa/include/uapi/asm/termbits.h @@ -1,221 +1 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * include/asm-xtensa/termbits.h - * - * Copied from SH. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ - -#ifndef _XTENSA_TERMBITS_H -#define _XTENSA_TERMBITS_H - - -#include - -typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; - -#define NCCS 19 -struct termios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ -}; - -struct termios2 { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -struct ktermios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -/* c_cc characters */ - -#define VINTR 0 -#define VQUIT 1 -#define VERASE 2 -#define VKILL 3 -#define VEOF 4 -#define VTIME 5 -#define VMIN 6 -#define VSWTC 7 -#define VSTART 8 -#define VSTOP 9 -#define VSUSP 10 -#define VEOL 11 -#define VREPRINT 12 -#define VDISCARD 13 -#define VWERASE 14 -#define VLNEXT 15 -#define VEOL2 16 - -/* c_iflag bits */ - -#define IGNBRK 001 -#define BRKINT 002 -#define IGNPAR 004 -#define PARMRK 010 -#define INPCK 020 -#define ISTRIP 040 -#define INLCR 100 -#define IGNCR 200 -#define ICRNL 400 -#define IUCLC 0001000 -#define IXON 0002000 -#define IXANY 0004000 -#define IXOFF 001 -#define IMAXBEL002 -#define IUTF8 004 - -/* c_oflag bits */ - -#define OPOST 001 -#define OLCUC 002 -#define ONLCR 004 -#define OCRNL 010 -#define ONOCR 020 -#define ONLRET 040 -#define OFILL 100 -#define OFDEL 200 -#define NLDLY 400 -#define NL0 000 -#define NL1 400 -#define CRDLY 0003000 -#define CR0 000 -#define CR1 0001000 -#define CR2 0002000 -#define CR3 0003000 -#define TABDLY 0014000 -#define TAB0 000 -#define TAB1 0004000 -#define TAB2 001 -#define TAB3 0014000 -#define XTABS0014000 -#define BSDLY 002 -#define BS0 000 -#define BS1 002 -#define VTDLY 004 -#define VT0 000 -#define VT1 004 -#define FFDLY 010 -#define FF0 000 -#define FF1 010 - -/* c_cflag bit meaning */ - -#define CBAUD 0010017 -#define B0000 /* hang up */ -#define B50 001 -#define B75 002 -#define B110 003 -#define B134 004 -#define B150 005 -#define B200 006 -#define B300 007 -#define B600 010 -#define B1200 011 -#define B1800 012 -#define B2400 013 -#define B4800 014 -#define B9600 015 -#define B19200016 -#define B38400017 -#define EXTA B19200 -#define EXTB B38400 -#define CSIZE 060 -#define CS5 000 -#define CS6 020 -#define CS7 040 -#define CS8 060 -#define CSTOPB 100 -#define CREAD 200 -#define PARENB 400 -#define PARODD 0001000 -#define HUPCL 0002000 -#define CLOCAL 0004000 -#define CBAUDEX 001 -#define
[PATCH 0/5] termios: remove arch redundancy in
is one of those files which define an ABI. Some were made different due to the desire to be compatible with legacy architectures, others were mostly direct copies of the i386 definitions, which are now in asm-generic. This folds the IA64, MIPS, PA-RISC, and Xtensa implementations into the generic one. IA64 and Xtensa are identical, MIPS and PA-RISC are trivially different and just need a handful of constants redefined. has a few very minor adjustments to allow this. arch/ia64/include/uapi/asm/termbits.h | 210 +- arch/mips/include/uapi/asm/ioctls.h | 2 + arch/mips/include/uapi/asm/termbits.h | 213 ++ arch/parisc/include/uapi/asm/termbits.h | 197 +--- arch/xtensa/include/uapi/asm/termbits.h | 222 +--- include/uapi/asm-generic/termbits.h | 7 +- 6 files changed, 27 insertions(+), 824 deletions(-) Signed-off-by: H. Peter Anvin (Intel) Cc: "James E.J. Bottomley" Cc: Arnd Bergmann Cc: Chris Zankel Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Helge Deller Cc: James Hogan Cc: Jiri Slaby Cc: Kate Stewart Cc: Max Filippov Cc: Paul Burton Cc: Philippe Ombredanne Cc: Ralf Baechle Cc: Thomas Gleixner Cc: Tony Luck Cc: Cc: Cc: Cc:
[PATCH 1/5] asm-generic, termios: add alias constants from MIPS
Some architectures, in this case MIPS, need a couple of legacy alias constants for bits. There really is no reason why we can't define them generically for all architectures. Signed-off-by: H. Peter Anvin (Intel) Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Jiri Slaby linux-kernel@vger.kernel.org (open list) --- include/uapi/asm-generic/termbits.h | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/uapi/asm-generic/termbits.h b/include/uapi/asm-generic/termbits.h index 2fbaf9ae89dd..96ae175eec5b 100644 --- a/include/uapi/asm-generic/termbits.h +++ b/include/uapi/asm-generic/termbits.h @@ -8,7 +8,10 @@ typedef unsigned char cc_t; typedef unsigned int speed_t; typedef unsigned int tcflag_t; -#define NCCS 19 +#ifndef NCCS +# define NCCS 19 +#endif + struct termios { tcflag_t c_iflag; /* input mode flags */ tcflag_t c_oflag; /* output mode flags */ @@ -49,6 +52,7 @@ struct ktermios { #define VTIME 5 #define VMIN 6 #define VSWTC 7 +#define VSWTCH VSWTC #define VSTART 8 #define VSTOP 9 #define VSUSP 10 @@ -173,6 +177,7 @@ struct ktermios { #define ECHONL 100 #define NOFLSH 200 #define TOSTOP 400 +#define ITOSTOP TOSTOP #define ECHOCTL0001000 #define ECHOPRT0002000 #define ECHOKE 0004000 -- 2.14.4
[PATCH 1/5] asm-generic, termios: add alias constants from MIPS
Some architectures, in this case MIPS, need a couple of legacy alias constants for bits. There really is no reason why we can't define them generically for all architectures. Signed-off-by: H. Peter Anvin (Intel) Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Jiri Slaby linux-kernel@vger.kernel.org (open list) --- include/uapi/asm-generic/termbits.h | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/uapi/asm-generic/termbits.h b/include/uapi/asm-generic/termbits.h index 2fbaf9ae89dd..96ae175eec5b 100644 --- a/include/uapi/asm-generic/termbits.h +++ b/include/uapi/asm-generic/termbits.h @@ -8,7 +8,10 @@ typedef unsigned char cc_t; typedef unsigned int speed_t; typedef unsigned int tcflag_t; -#define NCCS 19 +#ifndef NCCS +# define NCCS 19 +#endif + struct termios { tcflag_t c_iflag; /* input mode flags */ tcflag_t c_oflag; /* output mode flags */ @@ -49,6 +52,7 @@ struct ktermios { #define VTIME 5 #define VMIN 6 #define VSWTC 7 +#define VSWTCH VSWTC #define VSTART 8 #define VSTOP 9 #define VSUSP 10 @@ -173,6 +177,7 @@ struct ktermios { #define ECHONL 100 #define NOFLSH 200 #define TOSTOP 400 +#define ITOSTOP TOSTOP #define ECHOCTL0001000 #define ECHOPRT0002000 #define ECHOKE 0004000 -- 2.14.4
[PATCH 5/5] arch/xtensa, termios: use
The Xtensa definition of termbits.h is identical to the generic one. Signed-off-by: H. Peter Anvin (Intel) Cc: Chris Zankel Cc: Max Filippov Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Philippe Ombredanne Cc: Kate Stewart Cc: Cc: Greg Kroah-Hartman Cc: Jiri Slaby --- arch/xtensa/include/uapi/asm/termbits.h | 222 +--- 1 file changed, 1 insertion(+), 221 deletions(-) diff --git a/arch/xtensa/include/uapi/asm/termbits.h b/arch/xtensa/include/uapi/asm/termbits.h index d4206a7c5138..3935b106de79 100644 --- a/arch/xtensa/include/uapi/asm/termbits.h +++ b/arch/xtensa/include/uapi/asm/termbits.h @@ -1,221 +1 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * include/asm-xtensa/termbits.h - * - * Copied from SH. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ - -#ifndef _XTENSA_TERMBITS_H -#define _XTENSA_TERMBITS_H - - -#include - -typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; - -#define NCCS 19 -struct termios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ -}; - -struct termios2 { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -struct ktermios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -/* c_cc characters */ - -#define VINTR 0 -#define VQUIT 1 -#define VERASE 2 -#define VKILL 3 -#define VEOF 4 -#define VTIME 5 -#define VMIN 6 -#define VSWTC 7 -#define VSTART 8 -#define VSTOP 9 -#define VSUSP 10 -#define VEOL 11 -#define VREPRINT 12 -#define VDISCARD 13 -#define VWERASE 14 -#define VLNEXT 15 -#define VEOL2 16 - -/* c_iflag bits */ - -#define IGNBRK 001 -#define BRKINT 002 -#define IGNPAR 004 -#define PARMRK 010 -#define INPCK 020 -#define ISTRIP 040 -#define INLCR 100 -#define IGNCR 200 -#define ICRNL 400 -#define IUCLC 0001000 -#define IXON 0002000 -#define IXANY 0004000 -#define IXOFF 001 -#define IMAXBEL002 -#define IUTF8 004 - -/* c_oflag bits */ - -#define OPOST 001 -#define OLCUC 002 -#define ONLCR 004 -#define OCRNL 010 -#define ONOCR 020 -#define ONLRET 040 -#define OFILL 100 -#define OFDEL 200 -#define NLDLY 400 -#define NL0 000 -#define NL1 400 -#define CRDLY 0003000 -#define CR0 000 -#define CR1 0001000 -#define CR2 0002000 -#define CR3 0003000 -#define TABDLY 0014000 -#define TAB0 000 -#define TAB1 0004000 -#define TAB2 001 -#define TAB3 0014000 -#define XTABS0014000 -#define BSDLY 002 -#define BS0 000 -#define BS1 002 -#define VTDLY 004 -#define VT0 000 -#define VT1 004 -#define FFDLY 010 -#define FF0 000 -#define FF1 010 - -/* c_cflag bit meaning */ - -#define CBAUD 0010017 -#define B0000 /* hang up */ -#define B50 001 -#define B75 002 -#define B110 003 -#define B134 004 -#define B150 005 -#define B200 006 -#define B300 007 -#define B600 010 -#define B1200 011 -#define B1800 012 -#define B2400 013 -#define B4800 014 -#define B9600 015 -#define B19200016 -#define B38400017 -#define EXTA B19200 -#define EXTB B38400 -#define CSIZE 060 -#define CS5 000 -#define CS6 020 -#define CS7 040 -#define CS8 060 -#define CSTOPB 100 -#define CREAD 200 -#define PARENB 400 -#define PARODD 0001000 -#define HUPCL 0002000 -#define CLOCAL 0004000 -#define CBAUDEX 001 -#define
[PATCH 0/5] termios: remove arch redundancy in
is one of those files which define an ABI. Some were made different due to the desire to be compatible with legacy architectures, others were mostly direct copies of the i386 definitions, which are now in asm-generic. This folds the IA64, MIPS, PA-RISC, and Xtensa implementations into the generic one. IA64 and Xtensa are identical, MIPS and PA-RISC are trivially different and just need a handful of constants redefined. has a few very minor adjustments to allow this. arch/ia64/include/uapi/asm/termbits.h | 210 +- arch/mips/include/uapi/asm/ioctls.h | 2 + arch/mips/include/uapi/asm/termbits.h | 213 ++ arch/parisc/include/uapi/asm/termbits.h | 197 +--- arch/xtensa/include/uapi/asm/termbits.h | 222 +--- include/uapi/asm-generic/termbits.h | 7 +- 6 files changed, 27 insertions(+), 824 deletions(-) Signed-off-by: H. Peter Anvin (Intel) Cc: "James E.J. Bottomley" Cc: Arnd Bergmann Cc: Chris Zankel Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Helge Deller Cc: James Hogan Cc: Jiri Slaby Cc: Kate Stewart Cc: Max Filippov Cc: Paul Burton Cc: Philippe Ombredanne Cc: Ralf Baechle Cc: Thomas Gleixner Cc: Tony Luck Cc: Cc: Cc: Cc:
[PATCH 2/5] arch/ia64, termios: use
The IA64 definition of termbits.h is identical to the generic. Signed-off-by: H. Peter Anvin (Intel) Cc: Tony Luck Cc: Fenghua Yu Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Greg Kroah-Hartman Cc: Thomas Gleixner Cc: Cc: Greg Kroah-Hartman CC: Jiri Slaby --- arch/ia64/include/uapi/asm/termbits.h | 210 +- 1 file changed, 1 insertion(+), 209 deletions(-) diff --git a/arch/ia64/include/uapi/asm/termbits.h b/arch/ia64/include/uapi/asm/termbits.h index 000a1a297c75..3935b106de79 100644 --- a/arch/ia64/include/uapi/asm/termbits.h +++ b/arch/ia64/include/uapi/asm/termbits.h @@ -1,209 +1 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_TERMBITS_H -#define _ASM_IA64_TERMBITS_H - -/* - * Based on . - * - * Modified 1999 - * David Mosberger-Tang , Hewlett-Packard Co - * - * 99/01/28Added new baudrates - */ - -#include - -typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; - -#define NCCS 19 -struct termios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ -}; - -struct termios2 { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -struct ktermios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -/* c_cc characters */ -#define VINTR 0 -#define VQUIT 1 -#define VERASE 2 -#define VKILL 3 -#define VEOF 4 -#define VTIME 5 -#define VMIN 6 -#define VSWTC 7 -#define VSTART 8 -#define VSTOP 9 -#define VSUSP 10 -#define VEOL 11 -#define VREPRINT 12 -#define VDISCARD 13 -#define VWERASE 14 -#define VLNEXT 15 -#define VEOL2 16 - -/* c_iflag bits */ -#define IGNBRK 001 -#define BRKINT 002 -#define IGNPAR 004 -#define PARMRK 010 -#define INPCK 020 -#define ISTRIP 040 -#define INLCR 100 -#define IGNCR 200 -#define ICRNL 400 -#define IUCLC 0001000 -#define IXON 0002000 -#define IXANY 0004000 -#define IXOFF 001 -#define IMAXBEL002 -#define IUTF8 004 - -/* c_oflag bits */ -#define OPOST 001 -#define OLCUC 002 -#define ONLCR 004 -#define OCRNL 010 -#define ONOCR 020 -#define ONLRET 040 -#define OFILL 100 -#define OFDEL 200 -#define NLDLY 400 -#define NL0 000 -#define NL1 400 -#define CRDLY 0003000 -#define CR0 000 -#define CR1 0001000 -#define CR2 0002000 -#define CR3 0003000 -#define TABDLY 0014000 -#define TAB0 000 -#define TAB1 0004000 -#define TAB2 001 -#define TAB3 0014000 -#define XTABS0014000 -#define BSDLY 002 -#define BS0 000 -#define BS1 002 -#define VTDLY 004 -#define VT0 000 -#define VT1 004 -#define FFDLY 010 -#define FF0 000 -#define FF1 010 - -/* c_cflag bit meaning */ -#define CBAUD 0010017 -#define B0000 /* hang up */ -#define B50 001 -#define B75 002 -#define B110 003 -#define B134 004 -#define B150 005 -#define B200 006 -#define B300 007 -#define B600 010 -#define B1200 011 -#define B1800 012 -#define B2400 013 -#define B4800 014 -#define B9600 015 -#define B19200016 -#define B38400017 -#define EXTA B19200 -#define EXTB B38400 -#define CSIZE 060 -#define CS5 000 -#define CS6 020 -#define CS7 040 -#define CS8 060 -#define CSTOPB 100 -#define CREAD 200 -#define PARENB 400 -#define PARODD 0001000 -#define HUPCL 0002000 -#define CLOCAL 0004000 -#define CBAUDEX 001 -#defineBOTHER 001 -#defineB57600 0010001 -#define B115200 0010002 -#define B230400 0010003 -#define B460800 0010004 -#define B50 0010005 -#define B576000 0010006 -#define B921600 0010007
[PATCH 2/5] arch/ia64, termios: use
The IA64 definition of termbits.h is identical to the generic. Signed-off-by: H. Peter Anvin (Intel) Cc: Tony Luck Cc: Fenghua Yu Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Greg Kroah-Hartman Cc: Thomas Gleixner Cc: Cc: Greg Kroah-Hartman CC: Jiri Slaby --- arch/ia64/include/uapi/asm/termbits.h | 210 +- 1 file changed, 1 insertion(+), 209 deletions(-) diff --git a/arch/ia64/include/uapi/asm/termbits.h b/arch/ia64/include/uapi/asm/termbits.h index 000a1a297c75..3935b106de79 100644 --- a/arch/ia64/include/uapi/asm/termbits.h +++ b/arch/ia64/include/uapi/asm/termbits.h @@ -1,209 +1 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_TERMBITS_H -#define _ASM_IA64_TERMBITS_H - -/* - * Based on . - * - * Modified 1999 - * David Mosberger-Tang , Hewlett-Packard Co - * - * 99/01/28Added new baudrates - */ - -#include - -typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; - -#define NCCS 19 -struct termios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ -}; - -struct termios2 { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -struct ktermios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -/* c_cc characters */ -#define VINTR 0 -#define VQUIT 1 -#define VERASE 2 -#define VKILL 3 -#define VEOF 4 -#define VTIME 5 -#define VMIN 6 -#define VSWTC 7 -#define VSTART 8 -#define VSTOP 9 -#define VSUSP 10 -#define VEOL 11 -#define VREPRINT 12 -#define VDISCARD 13 -#define VWERASE 14 -#define VLNEXT 15 -#define VEOL2 16 - -/* c_iflag bits */ -#define IGNBRK 001 -#define BRKINT 002 -#define IGNPAR 004 -#define PARMRK 010 -#define INPCK 020 -#define ISTRIP 040 -#define INLCR 100 -#define IGNCR 200 -#define ICRNL 400 -#define IUCLC 0001000 -#define IXON 0002000 -#define IXANY 0004000 -#define IXOFF 001 -#define IMAXBEL002 -#define IUTF8 004 - -/* c_oflag bits */ -#define OPOST 001 -#define OLCUC 002 -#define ONLCR 004 -#define OCRNL 010 -#define ONOCR 020 -#define ONLRET 040 -#define OFILL 100 -#define OFDEL 200 -#define NLDLY 400 -#define NL0 000 -#define NL1 400 -#define CRDLY 0003000 -#define CR0 000 -#define CR1 0001000 -#define CR2 0002000 -#define CR3 0003000 -#define TABDLY 0014000 -#define TAB0 000 -#define TAB1 0004000 -#define TAB2 001 -#define TAB3 0014000 -#define XTABS0014000 -#define BSDLY 002 -#define BS0 000 -#define BS1 002 -#define VTDLY 004 -#define VT0 000 -#define VT1 004 -#define FFDLY 010 -#define FF0 000 -#define FF1 010 - -/* c_cflag bit meaning */ -#define CBAUD 0010017 -#define B0000 /* hang up */ -#define B50 001 -#define B75 002 -#define B110 003 -#define B134 004 -#define B150 005 -#define B200 006 -#define B300 007 -#define B600 010 -#define B1200 011 -#define B1800 012 -#define B2400 013 -#define B4800 014 -#define B9600 015 -#define B19200016 -#define B38400017 -#define EXTA B19200 -#define EXTB B38400 -#define CSIZE 060 -#define CS5 000 -#define CS6 020 -#define CS7 040 -#define CS8 060 -#define CSTOPB 100 -#define CREAD 200 -#define PARENB 400 -#define PARODD 0001000 -#define HUPCL 0002000 -#define CLOCAL 0004000 -#define CBAUDEX 001 -#defineBOTHER 001 -#defineB57600 0010001 -#define B115200 0010002 -#define B230400 0010003 -#define B460800 0010004 -#define B50 0010005 -#define B576000 0010006 -#define B921600 0010007
[PATCH 4/5] arch/parisc, termios: use
The PARISC definition of termbits.h is almost identical to the generic one, so use the generic one and only redefine a handful of constants. Signed-off-by: H. Peter Anvin (Intel) Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Kate Stewart Cc: Thomas Gleixner Cc: Philippe Ombredanne Cc: Greg Kroah-Hartman Cc: Cc: Greg Kroah-Hartman Cc: Jiri Slaby --- arch/parisc/include/uapi/asm/termbits.h | 197 +--- 1 file changed, 4 insertions(+), 193 deletions(-) diff --git a/arch/parisc/include/uapi/asm/termbits.h b/arch/parisc/include/uapi/asm/termbits.h index 40e920f8d683..28ce9067f2e0 100644 --- a/arch/parisc/include/uapi/asm/termbits.h +++ b/arch/parisc/include/uapi/asm/termbits.h @@ -2,201 +2,12 @@ #ifndef __ARCH_PARISC_TERMBITS_H__ #define __ARCH_PARISC_TERMBITS_H__ -#include +#include -typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; - -#define NCCS 19 -struct termios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ -}; - -struct termios2 { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -struct ktermios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line;/* line discipline */ - cc_t c_cc[NCCS];/* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -/* c_cc characters */ -#define VINTR 0 -#define VQUIT 1 -#define VERASE 2 -#define VKILL 3 -#define VEOF 4 -#define VTIME 5 -#define VMIN 6 -#define VSWTC 7 -#define VSTART 8 -#define VSTOP 9 -#define VSUSP 10 -#define VEOL 11 -#define VREPRINT 12 -#define VDISCARD 13 -#define VWERASE 14 -#define VLNEXT 15 -#define VEOL2 16 - - -/* c_iflag bits */ -#define IGNBRK 001 -#define BRKINT 002 -#define IGNPAR 004 -#define PARMRK 010 -#define INPCK 020 -#define ISTRIP 040 -#define INLCR 100 -#define IGNCR 200 -#define ICRNL 400 -#define IUCLC 0001000 -#define IXON 0002000 -#define IXANY 0004000 -#define IXOFF 001 +/* c_iflag bits that differ from the generic ABI */ +#undef IMAXBEL #define IMAXBEL004 +#undef IUTF8 #define IUTF8 010 -/* c_oflag bits */ -#define OPOST 001 -#define OLCUC 002 -#define ONLCR 004 -#define OCRNL 010 -#define ONOCR 020 -#define ONLRET 040 -#define OFILL 100 -#define OFDEL 200 -#define NLDLY 400 -#define NL0 000 -#define NL1 400 -#define CRDLY 0003000 -#define CR0 000 -#define CR1 0001000 -#define CR2 0002000 -#define CR3 0003000 -#define TABDLY 0014000 -#define TAB0 000 -#define TAB1 0004000 -#define TAB2 001 -#define TAB3 0014000 -#define XTABS0014000 -#define BSDLY 002 -#define BS0 000 -#define BS1 002 -#define VTDLY 004 -#define VT0 000 -#define VT1 004 -#define FFDLY 010 -#define FF0 000 -#define FF1 010 - -/* c_cflag bit meaning */ -#define CBAUD 0010017 -#define B0 000 /* hang up */ -#define B50001 -#define B75002 -#define B110 003 -#define B134 004 -#define B150 005 -#define B200 006 -#define B300 007 -#define B600 010 -#define B1200 011 -#define B1800 012 -#define B2400 013 -#define B4800 014 -#define B9600 015 -#define B19200 016 -#define B38400 017 -#define EXTA B19200 -#define EXTB B38400 -#define CSIZE 060 -#define CS5 000 -#define CS6 020 -#define CS7 040 -#define CS8 060 -#define CSTOPB 100 -#define CREAD 200 -#define PARENB 400 -#define PARODD 0001000 -#define HUPCL 0002000 -#define CLOCAL 0004000 -#define CBAUDEX 001 -#defineBOTHER 001 -#defineB57600 0010001 -#define B115200 0010002 -#define B230400 0010003 -#define B460800 0010004 -#define B50 0010005 -#define B5760