Re: gzip: fix pledge violation
On Fri, 8 Jul 2022 16:04:47 + Guilherme Janczak wrote: > gzip violates wpath if you tell it to extract stdin and restore the > original filename. More than a year ago, Guilherme Janczak reported that OpenBSD's "gzip -dN https://marc.info/?l=openbsd-tech&m=165729624913806&w=2 I made a new diff for -c overrides -N, is this better? --gkoehler Index: main.c === RCS file: /cvs/src/usr.bin/compress/main.c,v retrieving revision 1.105 diff -u -p -r1.105 main.c --- main.c 11 Aug 2023 04:45:05 - 1.105 +++ main.c 8 Oct 2023 04:00:57 - @@ -699,7 +699,8 @@ dodecompress(const char *in, char *out, close (ifd); return (FAILURE); } - if (storename && oldname[0] != '\0') { + /* Do -N only if not -c. */ + if (storename && !cat && oldname[0] != '\0') { const char *oldbase = basename(oldname); char *cp = strrchr(out, '/'); if (cp != NULL) { @@ -707,7 +708,6 @@ dodecompress(const char *in, char *out, strlcat(out, oldbase, PATH_MAX); } else strlcpy(out, oldbase, PATH_MAX); - cat = 0;/* XXX should -c override? */ } if (testmode) {
Re: build softraid(4) in powerpc64 RAMDISK
On Tue, 19 Sep 2023 21:48:57 + Klemens Nanni wrote: > distrib/powerpc64/ramdisk builds and fits; I did not have a free disk > (partition) to run-test, but completing the config should be fine. I moved my /usr/xobj filesystem to a softraid CONCAT. The old bsd.rd failed to mount it, the new bsd.rd succeeded. ok gkoehler@ > Index: sys/arch/powerpc64/conf/RAMDISK > === > RCS file: /cvs/src/sys/arch/powerpc64/conf/RAMDISK,v > retrieving revision 1.12 > diff -u -p -r1.12 RAMDISK > --- sys/arch/powerpc64/conf/RAMDISK 26 Jun 2022 20:05:06 - 1.12 > +++ sys/arch/powerpc64/conf/RAMDISK 16 Sep 2023 16:32:42 - > @@ -25,6 +25,7 @@ option CRYPTO > config bsd root on rd0a swap on rd0b > > mainbus0 at root > +softraid0at root > cpu0 at mainbus? > opal0at fdt? > opalcons*at fdt? >
Re: dt(4), hardclock(9): move interval, profile providers to dedicated callback
On Fri, 25 Aug 2023 21:00:34 -0500 Scott Cheloha wrote: > @@ -148,4 +185,18 @@ dt_prov_interval_enter(struct dt_provide > } > smr_read_leave(); > return 0; > +} > + > +void > +dt_prov_profile_intr(struct clockintr *cl, void *cf) > +{ > + uint64_t count, i; > + struct cpu_info *ci = curcpu(); > + > + count = clockintr_advance(cl, hardclock_period); > + for (i = 0; i < count; i++) { > + DT_ENTER(profile, NULL); > + if (CPU_IS_PRIMARY(ci)) > + DT_ENTER(interval, NULL); > + } > } In build of GENERIC bsd.sp, /usr/src/sys/dev/dt/dt_prov_profile.c:194:19: error: unused variable 'ci' [-Werror,-Wunused-variable] struct cpu_info *ci = curcpu(); ^ 1 error generated. To unbreak it, I edited the function to be, void dt_prov_profile_intr(struct clockintr *cl, void *cf) { uint64_t count, i; int primary = CPU_IS_PRIMARY(curcpu()); count = clockintr_advance(cl, hardclock_period); for (i = 0; i < count; i++) { DT_ENTER(profile, NULL); if (primary) DT_ENTER(interval, NULL); } }
Re: Stop using direct syscall(2) from perl(1)
On Thu, 3 Aug 2023 18:43:21 -0700 Andrew Hewus Fresh wrote: > Here's a new version, I think I addressed all of the feedback I got, > although may have missed something. ok gkoehler@ You sent this new version 2 weeks ago, but I didn't build it until yesterday. syscall_emulator.t passes on macppc and powerpc64. > Index: gnu/usr.bin/perl/MANIFEST > === > RCS file: /home/afresh1/OpenBSD-perl/OP/cvs/src/gnu/usr.bin/perl/MANIFEST,v > retrieving revision 1.67 > diff -u -p -a -u -p -r1.67 MANIFEST > --- gnu/usr.bin/perl/MANIFEST 8 Jul 2023 14:18:35 - 1.67 > +++ gnu/usr.bin/perl/MANIFEST 3 Aug 2023 04:34:38 - > @@ -6605,6 +6605,7 @@ t/op/svleak.pl Test file for svleak.t > t/op/svleak.tSee if stuff leaks SVs > t/op/switch.tSee if switches (given/when) work > t/op/symbolcache.t See if undef/delete works on stashes with > functions > +t/op/syscall_emulator.t Tests that syscall works via the > emulator > t/op/sysio.t See if sysread and syswrite work > t/op/taint.t See if tainting works > t/op/threads.t Misc. tests for perl features with > threads > Index: gnu/usr.bin/perl/Makefile.SH > === > RCS file: /home/afresh1/OpenBSD-perl/OP/cvs/src/gnu/usr.bin/perl/Makefile.SH,v > retrieving revision 1.60 > diff -u -p -a -u -p -r1.60 Makefile.SH > --- gnu/usr.bin/perl/Makefile.SH 8 Jul 2023 14:18:35 - 1.60 > +++ gnu/usr.bin/perl/Makefile.SH 3 Aug 2023 04:34:38 - > @@ -541,7 +541,7 @@ c1 = av.c scope.c op.c doop.c doio.c dum > c2 = perly.c pp.c pp_hot.c pp_ctl.c pp_sys.c regcomp.c regexec.c utf8.c sv.c > c3 = taint.c toke.c util.c deb.c run.c builtin.c universal.c pad.c globals.c > keywords.c > c4 = perlio.c numeric.c mathoms.c locale.c pp_pack.c pp_sort.c caretx.c > dquote.c time64.c > -c5 = $(mallocsrc) > +c5 = $(mallocsrc) syscall_emulator.c > > !NO!SUBS! > > @@ -557,7 +557,7 @@ c = $(c1) $(c2) $(c3) $(c4) $(c5) minipe > > obj1 = $(mallocobj) gv$(OBJ_EXT) toke$(OBJ_EXT) perly$(OBJ_EXT) > pad$(OBJ_EXT) regcomp$(OBJ_EXT) dump$(OBJ_EXT) util$(OBJ_EXT) mg$(OBJ_EXT) > reentr$(OBJ_EXT) mro_core$(OBJ_EXT) keywords$(OBJ_EXT) builtin$(OBJ_EXT) > obj2 = hv$(OBJ_EXT) av$(OBJ_EXT) run$(OBJ_EXT) pp_hot$(OBJ_EXT) sv$(OBJ_EXT) > pp$(OBJ_EXT) scope$(OBJ_EXT) pp_ctl$(OBJ_EXT) pp_sys$(OBJ_EXT) > -obj3 = doop$(OBJ_EXT) doio$(OBJ_EXT) regexec$(OBJ_EXT) utf8$(OBJ_EXT) > taint$(OBJ_EXT) deb$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) > numeric$(OBJ_EXT) mathoms$(OBJ_EXT) locale$(OBJ_EXT) pp_pack$(OBJ_EXT) > pp_sort$(OBJ_EXT) caretx$(OBJ_EXT) dquote$(OBJ_EXT) time64$(OBJ_EXT) > +obj3 = doop$(OBJ_EXT) doio$(OBJ_EXT) regexec$(OBJ_EXT) utf8$(OBJ_EXT) > taint$(OBJ_EXT) deb$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) > numeric$(OBJ_EXT) mathoms$(OBJ_EXT) locale$(OBJ_EXT) pp_pack$(OBJ_EXT) > pp_sort$(OBJ_EXT) caretx$(OBJ_EXT) dquote$(OBJ_EXT) time64$(OBJ_EXT) > syscall_emulator$(OBJ_EXT) > > # split the objects into 3 exclusive sets: those used by both miniperl and > # perl, and those used by just one or the other. Doesn't include the > Index: gnu/usr.bin/perl/Makefile.bsd-wrapper > === > RCS file: > /home/afresh1/OpenBSD-perl/OP/cvs/src/gnu/usr.bin/perl/Makefile.bsd-wrapper,v > retrieving revision 1.113 > diff -u -p -a -u -p -r1.113 Makefile.bsd-wrapper > --- gnu/usr.bin/perl/Makefile.bsd-wrapper 15 Feb 2023 01:38:20 - > 1.113 > +++ gnu/usr.bin/perl/Makefile.bsd-wrapper 3 Aug 2023 04:34:38 - > @@ -39,11 +39,18 @@ cleandir: > fi > cd ${.CURDIR} && ${MAKE} -f Makefile.bsd-wrapper1 cleandir > > -all: config.sh > +all: syscall_emulator.c config.sh > cd ${.CURDIR} && exec ${MAKE} -f Makefile.bsd-wrapper1 perl.build > cd ${.CURDIR} && exec ${MAKE} -f Makefile.bsd-wrapper1 mansrc.build > > install: > cd ${.CURDIR} && exec ${MAKE} -f Makefile.bsd-wrapper1 install > + > + > +syscall_emulator.c: gen_syscall_emulator.pl syscall_emulator.h > /usr/include/sys/syscall.h /usr/include/sys/syscallargs.h > + /usr/bin/perl $(.CURDIR)/gen_syscall_emulator.pl > $@ > + > +syscall_emulator.h: > + ln -sf $(.CURDIR)/$@ $@ > > .include > Index: gnu/usr.bin/perl/config.over > === > RCS file: /home/afresh1/OpenBSD-perl/OP/cvs/src/gnu/usr.bin/perl/config.over,v > retrieving revision 1.22 > diff -u -p -a -u -p -r1.22 config.over > --- gnu/usr.bin/perl/config.over 5 Feb 2017 00:33:38 - 1.22 > +++ gnu/usr.bin/perl/config.over 3 Aug 2023 04:34:38 - > @@ -64,3 +64,9 @@ myuname='openbsd' > > # force to use ranlib > ranlib='ranlib' > + > +# Enable the syscall emulator, > +# enabling syscall even if we don't have it > +d_
Re: [Diff] Keyboard backlight support for late powerbooks, plus keybindings
On Wed, 19 Jul 2023 02:03:26 +0200 Tobias Heider wrote: > > ok anyone? > > No one interested in working keyboard backlight shortcuts? > Don't get scared by the powerbook part, a lot of this is reusable for other > laptop models. The arch/macppc/* part is ok gkoehler@, except for 2 minor style issues at the top of adb.c. I have an issue with the wscons part. See my comments below. On Fri, 14 Jul 2023 17:53:41 + (UTC) jon@elytron.openbsd.amsterdam wrote: > Index: arch/macppc/dev/adb.c > === > RCS file: /cvs/src/sys/arch/macppc/dev/adb.c,v > retrieving revision 1.50 > diff -u -p -r1.50 adb.c > --- arch/macppc/dev/adb.c 11 Apr 2023 00:45:07 - 1.50 > +++ arch/macppc/dev/adb.c 13 Jul 2023 21:17:17 - > @@ -102,6 +102,8 @@ > #include > #include > > +#include > + > #include "apm.h" > > #define printf_intr printf 1st minor style issue: Please move this next to one of the other #include lines. > @@ -242,6 +244,12 @@ void setsoftadb(void); > int adb_intr(void *arg); > void adb_cuda_autopoll(void); > void adb_cuda_fileserver_mode(void); > +uint8_t pmu_backlight; /* keyboard backlight value */ > +int pmu_get_backlight(struct wskbd_backlight *); > +int pmu_set_backlight(struct wskbd_backlight *); > +extern int (*wskbd_get_backlight)(struct wskbd_backlight *); > +extern int (*wskbd_set_backlight)(struct wskbd_backlight *); > + > > #ifndef SMALL_KERNEL > void adb_shutdown(void *); 2nd minor style issue: Please delete the extra space before "pmu_backlight". > @@ -1730,8 +1738,11 @@ adbattach(struct device *parent, struct > > if (adbHardware == ADB_HW_CUDA) > adb_cuda_fileserver_mode(); > - if (adbHardware == ADB_HW_PMU) > + if (adbHardware == ADB_HW_PMU) { > + wskbd_get_backlight = pmu_get_backlight; > + wskbd_set_backlight = pmu_set_backlight; > pmu_fileserver_mode(1); > + } > > /* >* XXX If the machine doesn't have an ADB bus (PowerBook5,6+) This code enables pmu_{get,set}_backlight for almost all macppc models, including those without a keyboard backlight. I am ok with this, because I don't know how to detect exactly which models do or don't have a backlight. I booted this diff on an iMac G4 Flat Panel (PowerMac6,1), which has no internal keyboard (so no backlight). It boots with "wsconsctl keyboard.backlight" at 0%. Commands like "wsconsctl keyboard.backlight=100" change the number but have no other effect. I also booted it on an older PowerBook G4 (PowerBook5,4). This has a different keyboard backlight; this diff doesn't support it, so "wsconsctl keyboard.backlight=100" does nothing. > @@ -1757,4 +1768,20 @@ adbattach(struct device *parent, struct > if (adbHardware == ADB_HW_CUDA) > adb_cuda_autopoll(); > adb_polling = 0; > +} > + > +int > +pmu_get_backlight(struct wskbd_backlight *kbl) > +{ > + kbl->min = 0; > + kbl->max = 0xff; > + kbl->curval = pmu_backlight; > + return 0; > +} > + > +int > +pmu_set_backlight(struct wskbd_backlight *kbl) > +{ > + pmu_backlight = kbl->curval; > + return pmu_set_kbl(pmu_backlight); > } > Index: arch/macppc/dev/pm_direct.c > === > RCS file: /cvs/src/sys/arch/macppc/dev/pm_direct.c,v > retrieving revision 1.34 > diff -u -p -r1.34 pm_direct.c > --- arch/macppc/dev/pm_direct.c 28 Dec 2022 07:40:23 - 1.34 > +++ arch/macppc/dev/pm_direct.c 13 Jul 2023 21:17:24 - > @@ -853,3 +853,22 @@ pmu_fileserver_mode(int on) > } > pmgrop(&p); > } > + > +int > +pmu_set_kbl(unsigned int level) > +{ > + if (level > 0xff) > + return (EINVAL); > + > + PMData p; > + > + p.command = 0x4F; > + p.num_data = 3; > + p.s_buf = p.r_buf = p.data; > + p.data[0] = 0; > + p.data[1] = 0; > + p.data[2] = level; > + pmgrop(&p); > + return (0); > +} > + I would leave this command number 0x4F as is. It would have a PMU_* name in pm_direct.h, but I don't know what name to use. My notes use 0x4f = PMU_SET_CONTROL, but that might not be a good name. > Index: arch/macppc/dev/pm_direct.h > === > RCS file: /cvs/src/sys/arch/macppc/dev/pm_direct.h,v > retrieving revision 1.15 > diff -u -p -r1.15 pm_direct.h > --- arch/macppc/dev/pm_direct.h 21 Oct 2022 22:42:36 - 1.15 > +++ arch/macppc/dev/pm_direct.h 13 Jul 2023 21:17:24 - > @@ -67,6 +67,7 @@ struct pmu_battery_info > }; > > int pm_battery_info(int, struct pmu_battery_info *); > +int pmu_set_kbl(unsigned int); > > void pm_eject_pcmcia(int); > void pmu_fileserver_mode(int); > Index: dev/hid/hidkbd.c > === > RCS file: /cvs/src/sys/dev/hid/hidkbd.c,v > retrieving revision 1.9 > diff -u
Re: Stop using direct syscall(2) from perl(1)
On Thu, 20 Jul 2023 19:32:33 -0700 Andrew Hewus Fresh wrote: > On Mon, Jul 17, 2023 at 11:54:18PM -0400, George Koehler wrote: > > How to pread($fd, $buf, 4, 16)? > > > > 1. syscall(169, $fd, $buf, 4, 0, 16) > > 2. syscall(169, $fd, $buf, 4, 0, 0, 16) > > 3. syscall(169, $fd, $buf, 4, 16, 0) > > 4. syscall(169, $fd, $buf, 4, 0, 16, 0) > > 5. syscall(169, $fd, $buf, 4, 16) > > Which is the way it works without the this middleman? This is on macppc. Without your diff is syscall(2). With your diff is va_args(args, off_t). pread(2) without your diff needs line 2, $ perl 3 -E 'syscall(169, 3, $_ = "c" x 7, 7, 0, 0, 11); say $_' Charlie pread(2) with your diff needs line 1, $ perl 3 -E 'syscall(169, 3, $_ = "c" x 7, 7, 0, 11); say $_' Charlie I don't know how it worked before we removed __syscall(2). > > $ cd /usr/src/gnu/usr.bin/perl/obj > > $ MALLOC_OPTIONS=S perl t/op/syscall_emulator.t > > /usr/include/sys/syscall.h -> /usr/include/sys/syscall.ph > > 1..13 > > ok 1 - Opened test.txt for write/create > > ok 2 - Wrote out to test.txt > > ok 3 - closed test.txt > > perl(15511) in malloc(): write after free 0xc36d1efcddc0 > > Abort trap (core dumped) > > Hmm, I guess I need to get a powerpc64 machine. You didn't need a powerpc64. The crash was random; if you ran the test multiple times, then it would crash. Your updated diff from Thu 20 Jul doesn't crash.
Re: nv(4) acceleration disabled by default + enabling EXA
On Thu, 15 Jun 2023 20:26:43 +0200 Henryk Paluch wrote: > To enable EXA acceleration (the only that is supported in current > X-Window distribution) we can for example create file > /etc/X11/xorg.conf.d/15-nv-exa.conf with contents: > > > Section "Device" > Identifier "nv" > Driver "nv" > Option "AccelMethod" "EXA" > EndSection I tried your config on an iMac G4 (Flat Panel) with "GeForce MX with AGP8X (Mac)", but nv doesn't support EXA on this chipset, | [27.025] (WW) Warning, couldn't open module xaa | [27.025] (EE) NV: Failed to load module "xaa" (module does not exist, 0) | ... | [27.096] (WW) NV(0): Option "AccelMethod" is not used I suspect that few people use nv(4), because it only works with old nvidia cards. We might not find another nv(4) user. iMac's dmesg; Xorg.0.log (with your config); pcidumps -xxv are below. --George [ using 1332684 bytes of bsd ELF symbol table ] console out [NVDA,Display-A] console in [keyboard], using USB using parent NVDA,Parent:: memaddr 9800, size 800 : consaddr 98004000 : ioaddr 9100, size 100: width 1440 linebytes 1536 height 900 depth 8 Copyright (c) 1982, 1986, 1989, 1991, 1993 The Regents of the University of California. All rights reserved. Copyright (c) 1995-2023 OpenBSD. All rights reserved. https://www.OpenBSD.org OpenBSD 7.3-current (GENERIC) #140: Mon Jul 17 07:05:52 MDT 2023 dera...@macppc.openbsd.org:/usr/src/sys/arch/macppc/compile/GENERIC real mem = 2147483648 (2048MB) avail mem = 2070360064 (1974MB) random: good seed from bootblocks mpath0 at root scsibus0 at mpath0: 256 targets mainbus0 at root: model PowerMac6,1 cpu0 at mainbus0: 7455 (Revision 0x303): 999 MHz: 256KB L2 cache mem0 at mainbus0 spdmem0 at mem0: 1GB DDR SDRAM non-parity PC2700CL2.5 spdmem1 at mem0: 1GB DDR SDRAM non-parity PC3200CL3.0 memc0 at mainbus0: uni-n rev 0xd2 "hw-clock" at memc0 not configured kiic0 at memc0 offset 0xf8001000 iic0 at kiic0 mpcpcibr0 at mainbus0 pci: uni-north pci0 at mpcpcibr0 bus 0 pchb0 at pci0 dev 11 function 0 "Apple UniNorth AGP" rev 0x00 agp at pchb0 not configured vgafb0 at pci0 dev 16 function 0 vendor "NVIDIA", unknown product 0x0189 rev 0xa2 wsdisplay0 at vgafb0 mux 1: console (std, vt100 emulation) wsdisplay0: screen 1-5 added (std, vt100 emulation) mpcpcibr1 at mainbus0 pci: uni-north pci1 at mpcpcibr1 bus 0 macobio0 at pci1 dev 23 function 0 "Apple Intrepid" rev 0x00 openpic0 at macobio0 offset 0x4: version 0x4614 feature 3f0302 LE macgpio0 at macobio0 offset 0x50 macgpio1 at macgpio0 offset 0x9: irq 47 "programmer-switch" at macgpio0 offset 0x11 not configured "gpio5" at macgpio0 offset 0x6f not configured "gpio6" at macgpio0 offset 0x70 not configured "extint-gpio4" at macgpio0 offset 0x5c not configured "gpio11" at macgpio0 offset 0x75 not configured "extint-gpio15" at macgpio0 offset 0x67 not configured "extint-gpio16" at macgpio0 offset 0x68 not configured "escc-legacy" at macobio0 offset 0x12000 not configured zs0 at macobio0 offset 0x13000: irq 22,23 zstty0 at zs0 channel 0 zstty1 at zs0 channel 1 snapper0 at macobio0 offset 0x1: irq 30,1,2 "timer" at macobio0 offset 0x15000 not configured adb0 at macobio0 offset 0x16000 apm0 at adb0: battery flags 0x9, 0% charged piic0 at adb0 iic1 at piic0 "backlight" at macobio0 offset 0xf300 not configured kiic1 at macobio0 offset 0x18000 iic2 at kiic1 wdc0 at macobio0 offset 0x2 irq 24: DMA atapiscsi0 at wdc0 channel 0 drive 0 scsibus1 at atapiscsi0: 2 targets cd0 at scsibus1 targ 0 lun 0: removable cd0(wdc0:0:0): using BIOS timings, DMA mode 2 audio0 at snapper0 bwi0 at pci1 dev 18 function 0 "Broadcom BCM4306" rev 0x03: irq 52, address 00:11:24:28:c4:ef ohci0 at pci1 dev 24 function 0 "Apple Intrepid USB" rev 0x00: irq 27, version 1.0, legacy support ohci1 at pci1 dev 25 function 0 "Apple Intrepid USB" rev 0x00: irq 28, version 1.0, legacy support ohci2 at pci1 dev 26 function 0 "Apple Intrepid USB" rev 0x00: irq 29, version 1.0, legacy support usb0 at ohci0: USB revision 1.0 uhub0 at usb0 configuration 1 interface 0 "Apple OHCI root hub" rev 1.00/1.00 addr 1 usb1 at ohci1: USB revision 1.0 uhub1 at usb1 configuration 1 interface 0 "Apple OHCI root hub" rev 1.00/1.00 addr 1 usb2 at ohci2: USB revision 1.0 uhub2 at usb2 configuration 1 interface 0 "Apple OHCI root hub" rev 1.00/1.00 addr 1 mpcpcibr2 at mainbus0 pci: uni-north pci2 at mpcpcibr2 bus 0 kauaiata0 at pci2 dev 13 function 0 "Apple Intrepid ATA" rev 0x00 wdc1 at kauaiata0 irq 39: DMA wd0 at wdc1 channel 0 drive 0: wd0: 16-sector PIO, LBA48, 78533MB, 160836480 sectors wd0(wdc1:0:0): using PIO mode 4, DMA mode 2, Ultra-DMA mode 5 "Apple UniNorth Firewire" rev 0x81 at pci2 dev 14 function 0 not configured gem0 at pci2 dev 15 function 0 "Apple Uni-N2 GMAC" rev 0x80: irq 41, address 00:0a:95:89:5b:f2 bmtphy0 at gem0 phy 0: BCM5221 100baseTX PHY, rev. 4 uhub3 at uhub1 port 1 configuration 1 interface 0 "Mitsumi Electric Hub in Apple Extended USB
Re: Stop using direct syscall(2) from perl(1)
On Sun, 9 Jul 2023 13:29:58 -0700 Andrew Hewus Fresh wrote: > +syscall_emulator.h Emulator to dispatch syscalls to libc This got installed as /usr/libdata/perl5/powerpc64-openbsd/CORE/syscall_emulator.h ExtUtils::MakeMaker is using cc -DHAS_SYSCALL_EMULATOR to build my XS module; the define causes to #include . Can we hide this feature from XS? I wish that only pp_sys.c and syscall_emulator.c would #include , that syscall_emulator.h would not be installed, and that -DHAS_SYSCALL_EMULATOR would not appear in non-core modules. Also, the new test has a heap overflow, because $sb is too small for a struct stat, $ cd /usr/src/gnu/usr.bin/perl/obj $ MALLOC_OPTIONS=S perl t/op/syscall_emulator.t /usr/include/sys/syscall.h -> /usr/include/sys/syscall.ph 1..13 ok 1 - Opened test.txt for write/create ok 2 - Wrote out to test.txt ok 3 - closed test.txt perl(15511) in malloc(): write after free 0xc36d1efcddc0 Abort trap (core dumped) I prevented the overflow by making $sb too big, --- t/op/syscall_emulator.t.before Thu Jul 13 00:39:10 2023 +++ t/op/syscall_emulator.t Wed Jul 19 15:32:40 2023 @@ -47,7 +47,7 @@ my $out = "Hello World\n"; my $in = "\0" x 32; my ($in_p, $in_v); -my $sb = "\0\0\0\0"; +my $sb = "\0" x 4096; my $st_mode; my $perms = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH;
Re: Stop using direct syscall(2) from perl(1)
On Sun, 9 Jul 2023 13:29:58 -0700 Andrew Hewus Fresh wrote: > + case SYS_truncate: > + { > + const char * path = va_arg(args, const char *); > + off_t length = va_arg(args, off_t); > + ret = truncate(path, length); > + } > + break; I prefer braces like this, case SYS_truncate: { ... break; } In my opinion, all va_arg()s should look like off_t length = (off_t)va_arg(args, long); because perl passes every argument as long (from pp_syscall in pp_sys.c). I worry that va_arg(args, off_t) would act strangely on platforms with 32-bit long and 64-bit off_t. Only a few syscalls have off_t arguments, and these few calls are almost useless in Perl, so this might not affect real programs. How to pread($fd, $buf, 4, 16)? 1. syscall(169, $fd, $buf, 4, 0, 16) 2. syscall(169, $fd, $buf, 4, 0, 0, 16) 3. syscall(169, $fd, $buf, 4, 16, 0) 4. syscall(169, $fd, $buf, 4, 0, 16, 0) 5. syscall(169, $fd, $buf, 4, 16) If va_args(args, off_t) takes 2 longs, then pick line 1 if big-endian, or line 3 if little-endian. If it skips a 3rd long for alignment, then pick line 2 or 4. If it takes 1 long, then pick line 5. If we (off_t)va_args(args, long), then it always takes 1 long, so every platform picks line 5, but the offset must fit in a long. The syscalls with off_t are void *mmap(void *, size_t, int, int, int, off_t); void *mquery(void *, size_t, int, int, int, off_t); off_t lseek(int, off_t, int); int truncate(const char *, off_t); int ftruncate(int, off_t); ssize_t pread(int, void *, size_t, off_t); ssize_t pwrite(int, const void *, size_t, off_t); ssize_t preadv(int, const struct iovec *, int, off_t); ssize_t pwritev(int, const struct iovec *, int, off_t); syscall(SYS_lseek, @args) would truncate its return from off_t to long, but that is fine, because everyone should use Perl's sysseek. POSIX::2008 in CPAN looks like a better way to call pread.
Re: Stop using direct syscall(2) from perl(1)
On Sun, 9 Jul 2023 13:29:58 -0700 Andrew Hewus Fresh wrote: > Here is a patch to replace perl(1)'s use of syscall(2) with a dispatcher > that will call the libc function instead. patch(1) didn't "chmod +x gen_syscall_emulator.pl", but I needed to do so to get around this this error, $ make -f Makefile.bsd-wrapper /usr/src/gnu/usr.bin/perl/gen_syscall_emulator.pl > syscall_emulator.c /bin/sh: /usr/src/gnu/usr.bin/perl/gen_syscall_emulator.pl: cannot execute - Permission denied *** Error 126 in /usr/src/gnu/usr.bin/perl (Makefile.bsd-wrapper:51 'syscall_emulator.c')
Re: converting perl stuff to v5.36
On Sun, 7 May 2023 19:21:11 -0700 Philip Guenther wrote: > On Sun, May 7, 2023 at 6:13 AM Marc Espie > wrote: > > > I'm actually wondering whether keeping the prototype is worth it. > ... > For plain subs, I would only keep them if they _really_ help the calls look > for more perl-ish, by whatever scale you currently measure that. Maybe a > (&@) prototype so you can do "mysub { s/implicit/sub here/ } qw(args here)" > ala map and grep, but...eh. I wrote some (&@) prototypes before v5.36, | use v5.28; | use warnings; | use experimental 'signatures'; | | sub bsearch :prototype(&@) ($block, @list) { ... } | sub bsearch_range :prototype(&@) ($block, $low, $high) { ... } The signature checks that bsearch_range has exactly 3 arguments. I sometimes call subs with the wrong number of arguments. My other frequent mistakes in Perl are syntax errors, strict(3p) errors, and warnings(3p) of uninitialized values.
Re: OpenBSD perl 5.36.0 - Call for Testing
On Wed, 25 Jan 2023 18:45:32 -0800 Andrew Hewus Fresh wrote: > Good news! I got all our patches updated for perl 5.36.0. To skip the > summary of new features, jump down to the bottom for details on how you > can help test it. A few months ago, I put perl 5.36.0 in a macppc's $HOME, but it wasn't the system perl. Yesterday, I began to use 5.36.0 with your patches as a powerpc64's system perl. A few weeks ago I built 5.36.0 (with no patches) for macppc both with and without -Duse64bitint, thinking of whether to add -Duse64bitint to our system perl. Now, perl has 32-bit integers on 32-bit platforms (like macppc) and 64-bit integers on 64-bit platforms. If we add -Duse64bitint, perl would have 64-bit integers everywhere. I found that perl with -Duse64bitint runs slightly slower and uses more memory, so I'm not wanting to add -Duse64bitint now. $ perl -E 'say ~0' 4294967295# without 18446744073709551615 # with -Duse64bitint $ perl -E 'say 1 << 40' 0 # without 1099511627776 # with -Duse64bitint $ perl -E 'say unpack("Q", "abcdefgh")' Invalid type 'Q' in unpack at -e line 1. # without 7017280452245743464 # with -Duse64bitint > The feature that I'm most excited for, and probably what will make me > `use v5.36` in my scripts is multi-value iteration. Examples from the > docs: > > for my ($key, $value) (%hash) { ... } > for my ($left, $right, $gripping) (@moties) { ... } The for_list is still experimental, so if I decide to use it, I will turn off the warning, use v5.36; use experimental 'for_list'; for my ($k, $v) (%ENV) { say "$k=$v"; } This ($k, $v) iterates 2 values at a time. The for_list can't iterate $n values at a time if $n is variable. List::MoreUtils::natatime is awkward; I might copy the n-ary example from "perldoc -f splice". --George
libcrypto for powerpc g5 xonly
OpenBSD/macppc can enforce xonly on the PowerPC G5. libcrypto linked with cc -Wl,--execute-only will SIGSEGV as the PowerPC asm of sha256 tries to read a table from text. The fix is to move the table to rodata. To find the table, I would do bcl 20, 31, 1f 1: mflr%r7 addis %r7, %r7, .Ltable-1b@ha addi%r7, %r7, .Ltable-1b@l This diff does so in perlasm syntax. The literal "@ha" and "@l" in this diff are for an ELF platform (like OpenBSD) and might break the build for AIX or Mac OS, but I suspect that nobody builds this asm for those platforms. (PowerPC Mac OS is long obsolete, ended at Mac OS X 10.5.8.) If someone wants to try the PowerPC asm on a not-ELF platform, please tell me. aes-ppc.pl would have the same problem, but we don't use aes-ppc.pl, so I provide no fix. ports/security/openssl/{1.0.2,1.1,3.0} has copies of aes-ppc.pl and sha512-ppc.pl with the same problem, but doesn't enable them on OpenBSD, so I don't plan to edit them. sha512-ppc.pl can emit code for sha256 or sha512, but we only use it for sha256. The code uses simple ops (add, subtract, bit logic, bit rotation), nothing more fancy. I don't know why it runs faster than the (not asm) sha256 in ports/security/openssl. ok for this diff in src/lib/libcrypto? --George Index: sha/asm/sha512-ppc.pl === RCS file: /cvs/src/lib/libcrypto/sha/asm/sha512-ppc.pl,v retrieving revision 1.3 diff -u -p -r1.3 sha512-ppc.pl --- sha/asm/sha512-ppc.pl 14 Nov 2015 14:53:13 - 1.3 +++ sha/asm/sha512-ppc.pl 31 Jan 2023 22:03:47 - @@ -220,8 +220,11 @@ $func: $LD $G,`6*$SZ`($ctx) $LD $H,`7*$SZ`($ctx) - bl LPICmeup -LPICedup: + bcl 20,31,Lpc +Lpc: + mflr$Tbl + addis $Tbl,$Tbl,Ltable-Lpc\@ha + addi$Tbl,$Tbl,Ltable-Lpc\@l andi. r0,$inp,3 bne Lunaligned Laligned: @@ -377,22 +380,8 @@ $code.=<<___; blr .long 0 .byte 0,12,0x14,0,0,0,0,0 -___ - -# Ugly hack here, because PPC assembler syntax seem to vary too -# much from platforms to platform... -$code.=<<___; -.align 6 -LPICmeup: - mflrr0 - bcl 20,31,\$+4 - mflr$Tbl; vv "distance" between . and 1st data entry - addi$Tbl,$Tbl,`64-8` - mtlrr0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - .space `64-9*4` + .rodata +Ltable: ___ $code.=<<___ if ($SZ==8); .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
Re: powerpc, macppc: switch to clockintr(9)
On Sun, 20 Nov 2022 06:55:23 -0600 Scott Cheloha wrote: > Build completed, upgrade from resulting bsd.rd completed. I think > this is ready to commit. I'm happy to hear that your dual G4 macppc is running and your diff works. Your clock diff doesn't conflict with my incomplete suspend diff, because we are editing different files. (My PowerBook G4 can suspend but not resume, so my diff isn't ready to commit.) I want to mention one little detail, > @@ -54,7 +58,14 @@ u_int tb_get_timecount(struct timecounte > */ > u_int32_t ticks_per_sec = 3125000; > u_int32_t ns_per_tick = 320; > -static int32_t ticks_per_intr; > +uint64_t dec_nsec_cycle_ratio; > +uint64_t dec_nsec_max; > +static int initialized; > + > +const struct intrclock dec_intrclock = { > + .ic_rearm = dec_rearm, > + .ic_trigger = dec_trigger > +}; > > static struct timecounter tb_timecounter = { > .tc_get_timecount = tb_get_timecount, The old "static int32_t ticks_per_intr;" was visible in ddb, ddb> x/ld ticks_per_intr,1 ticks_per_intr: 184320 I don't like "static int initialized;" because its name is too generic, so "x/ld initialized,1" might examine the wrong file's "initialized". I prefer a less generic name, perhaps "int clock_initialized;" If it isn't "static", the linker might check that it doesn't have the same name as another variable. I don't need you to try another "make build" if you rename this variable. --George
Re: powerpc64: switch to clockintr(9)
(&clock_count, "clock", NULL); > - evcount_attach(&stat_count, "stat", NULL); > > cpu_startclock(); > } > @@ -77,14 +110,8 @@ cpu_initclocks(void) > void > cpu_startclock(void) > { > - struct cpu_info *ci = curcpu(); > - uint64_t nextevent; > - > - ci->ci_lasttb = mftb(); > - ci->ci_nexttimerevent = ci->ci_lasttb + tick_increment; > - nextevent = ci->ci_nextstatevent = ci->ci_nexttimerevent; > - > - mtdec(nextevent - ci->ci_lasttb); > + clockintr_cpu_init(&dec_intrclock); > + clockintr_trigger(); > intr_enable(); > } > > @@ -92,71 +119,25 @@ void > decr_intr(struct trapframe *frame) > { > struct cpu_info *ci = curcpu(); > - uint64_t tb, prevtb; > - uint64_t nextevent; > - uint32_t r; > - int nstats; > int s; > > + clock_count.ec_count++; > + > + mtdec(UINT32_MAX >> 1); /* clear DEC exception */ > + > /* >* If the clock interrupt is masked, postpone all work until >* it is unmasked in splx(9). >*/ > if (ci->ci_cpl >= IPL_CLOCK) { > ci->ci_dec_deferred = 1; > - mtdec(UINT32_MAX >> 1); /* clear DEC exception */ > return; > } > ci->ci_dec_deferred = 0; > > - /* > - * Based on the actual time delay since the last decrementer reload, > - * we arrange for earlier interrupt next time. > - */ > - > - tb = mftb(); > - > - while (ci->ci_nexttimerevent <= tb) > - ci->ci_nexttimerevent += tick_increment; > - > - prevtb = ci->ci_nexttimerevent - tick_increment; > - > - for (nstats = 0; ci->ci_nextstatevent <= tb; nstats++) { > - do { > - r = random() & (statvar - 1); > - } while (r == 0); /* random == 0 not allowed */ > - ci->ci_nextstatevent += statmin + r; > - } > - stat_count.ec_count += nstats; > - > - if (ci->ci_nexttimerevent < ci->ci_nextstatevent) > - nextevent = ci->ci_nexttimerevent; > - else > - nextevent = ci->ci_nextstatevent; > - > - /* > - * Transition of the MSB will trigger a decrementer interrupt. > - * So the next sequence is guaranteed to do the job without a > - * systematic skew. > - */ > - mtdec(nextevent - tb); > - mtdec(nextevent - mftb()); > - > s = splclock(); > intr_enable(); > - > - /* > - * Do standard timer interrupt stuff. > - */ > - while (ci->ci_lasttb < prevtb) { > - ci->ci_lasttb += tick_increment; > - clock_count.ec_count++; > - hardclock((struct clockframe *)frame); > - } > - > - while (nstats-- > 0) > - statclock((struct clockframe *)frame); > - > + clockintr_dispatch(frame); > intr_disable(); > splx(s); > } > @@ -164,25 +145,7 @@ decr_intr(struct trapframe *frame) > void > setstatclockrate(int newhz) > { > - uint64_t stat_increment; > - uint64_t min_increment; > - uint32_t var; > - u_long msr; > - > - msr = intr_disable(); > - > - stat_increment = tb_freq / newhz; > - var = 0x4000; /* really big power of two */ > - /* Find largest 2^n which is nearly smaller than statint/2. */ > - min_increment = stat_increment / 2 + 100; > - while (var > min_increment) > - var >>= 1; > - > - /* Not atomic, but we can probably live with that. */ > - statmin = stat_increment - (var >> 1); > - statvar = var; > - > - intr_restore(msr); > + clockintr_setstatclockrate(newhz); > } > > void -- George Koehler
Re: macppc hw.power, machdep.lidaction, machdep.pwraction
On Sun, 16 Oct 2022 18:05:28 +0200 Mark Kettenis wrote: > The consensus is that calling prsignal() from interrupt context isn't > safe. See dev/fdt/dapmic.c for a way to avoid that. I put prsignal() in interrupt context after "grep -R 'prsignal.*USR2'" found other files that might do the same, arch/arm64/dev/aplsmc.c aplmbox_intr -> sc_rx_callback = rtkit_rx_callback -> rtkit_poll -> callback = aplsmc_callback -> aplsmc_handle_notification -> prsignal arch/sparc64/dev/power.cpower_intr -> prsignal arch/sparc64/dev/rtc.c rtc_intr -> prsignal arch/sparc64/dev/sbus.c sbus_overtemp -> prsignal dev/pv/pvbus.c virtio_pci_config_intr -> sc_config_change = vmmci_config_change -> pvbus_shutdown -> prsignal I have edited adb.c in this diff to act like dapmic.c and use task_add(9) to call prsignal() from process context. (This diff still has the #if 0 in pm_env_intr; I will delete it.) Index: arch/macppc/dev/adb.c === RCS file: /cvs/src/sys/arch/macppc/dev/adb.c,v retrieving revision 1.46 diff -u -p -r1.46 adb.c --- arch/macppc/dev/adb.c 2 Jul 2022 08:50:41 - 1.46 +++ arch/macppc/dev/adb.c 20 Oct 2022 18:29:42 - @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -242,6 +243,11 @@ intadb_intr(void *arg); void adb_cuda_autopoll(void); void adb_cuda_fileserver_mode(void); +#ifndef SMALL_KERNEL +void adb_shutdown(void *); +struct task adb_shutdown_task = TASK_INITIALIZER(adb_shutdown, NULL); +#endif + #ifdef ADB_DEBUG /* * print_single @@ -831,6 +837,49 @@ adb_soft_intr(void) } } +#ifndef SMALL_KERNEL +void +adb_shutdown(void *arg) +{ + extern int allowpowerdown; + + if (allowpowerdown == 1) { + allowpowerdown = 0; + prsignal(initprocess, SIGUSR2); + } +} +#endif /* !SMALL_KERNEL */ + +void +adb_lid_closed_intr(void) +{ +#ifndef SMALL_KERNEL + switch (lid_action) { + case 1: + /* Suspend. */ + break; + case 2: + /* Hibernate. */ + break; + } +#endif +} + +void +adb_power_button_intr(void) +{ +#ifndef SMALL_KERNEL + switch (pwr_action) { + case 1: + task_add(systq, &adb_shutdown_task); + break; + case 2: + /* Suspend. */ + break; + } +#endif +} + /* * This is my version of the ADBOp routine. It mainly just calls the @@ -1597,6 +1646,7 @@ adbattach(struct device *parent, struct adbHardware = ADB_HW_CUDA; else if (strcmp(ca->ca_name, "via-pmu") == 0) { adbHardware = ADB_HW_PMU; + pm_in_adbattach(sc->sc_dev.dv_xname); /* * Bus reset can take a long time if no adb devices are Index: arch/macppc/dev/pm_direct.c === RCS file: /cvs/src/sys/arch/macppc/dev/pm_direct.c,v retrieving revision 1.31 diff -u -p -r1.31 pm_direct.c --- arch/macppc/dev/pm_direct.c 18 Sep 2022 21:36:41 - 1.31 +++ arch/macppc/dev/pm_direct.c 20 Oct 2022 18:29:42 - @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -142,6 +143,9 @@ signed char pm_receive_cmd_type[] = { -1, -1, -1, -1, -1, -1, -1, -1, }; +int pm_old_env; +struct ksensor pm_lid_sens; +struct ksensordev pm_sensdev; /* * Define the private functions @@ -161,6 +165,10 @@ intpm_send(u_char); void pm_adb_get_TALK_result(PMData *); void pm_adb_get_ADB_data(PMData *); +void pm_env_intr(PMData *); + + +extern int hw_power; /* * These variables are in adb_direct.c. @@ -413,6 +421,21 @@ pmgrop(PMData *pmdata) return rval; } +void +pm_in_adbattach(const char *devname) +{ + /* A PowerBook (including iBook) has a lid. */ + if (strncmp(hw_prod, "PowerBook", 9) == 0) { + strlcpy(pm_sensdev.xname, devname, + sizeof(pm_sensdev.xname)); + strlcpy(pm_lid_sens.desc, "lid open", + sizeof(pm_lid_sens.desc)); + pm_lid_sens.type = SENSOR_INDICATOR; + sensor_attach(&pm_sensdev, &pm_lid_sens); + sensordev_install(&pm_sensdev); + pm_lid_sens.value = 1; /* This is a guess. */ + } +} /* * My PM interrupt routine for the PB Duo series and the PB 5XX series @@ -456,9 +479,11 @@ pm_intr(void) case 0x16: /* ADB device event */ case 0x18: case 0x1e: - case PMU_INT_WAKEUP: pm_adb_get_ADB_data(&pmdata); break; + case PMU_INT_ENVIRONMENT: + pm_env_intr(&pmdata); + break; default: #ifdef ADB_DEBUG if (adb_debug) @@ -664,6 +689,41 @@ pm_adb_get_ADB_data(PMData *pmdata) } void +pm_env_intr(PMData *pmd
macppc hw.power, machdep.lidaction, machdep.pwraction
This diff will add a few features to the macppc kernel: - sysctl hw.power will be set to 0 or 1 when I unplug or plug in my PowerBook's AC adapter. - sysctl machdep.lidaction and machdep.pwraction will exist. - The default machdep.pwraction=1 will power off my macppc when I push the power button. - The default machdep.lidaction=1 should suspend, but will do nothing, because suspend is not yet implemented. - PowerBooks and iBooks will have a lid sensor, which looks like $ sysctl hw.sensors hw.sensors.adb0.indicator0=On (lid open) machdep.{lid,pwr}action are documented in acpi(4), but macppcs don't have acpi. This diff doesn't change any manuals. To access the new sysctls, I needed to build a kernel, install the kernel's includes, and build sysctl(8). machdep.pwraction=0 will disable the power button, which I might do on my Cube G4, where I sometimes touch the power button by accident. Small kernels (bsd.rd) will ignore the power button. Some other models of macppcs might have power buttons or lids that don't work with this diff. Most of these features use adb(4)'s environment interrupt, which this diff renames from PMU_INT_WAKEUP to PMU_INT_ENVIRONMENT to match Linux, FreeBSD, NetBSD. I will commit this diff in a few days, unless it breaks my build. A 2nd commit will connect the power button and lid interrupts to the incomplete suspend code. --George Index: arch/macppc/dev/adb.c === RCS file: /cvs/src/sys/arch/macppc/dev/adb.c,v retrieving revision 1.46 diff -u -p -r1.46 adb.c --- arch/macppc/dev/adb.c 2 Jul 2022 08:50:41 - 1.46 +++ arch/macppc/dev/adb.c 8 Oct 2022 03:52:17 - @@ -831,6 +831,41 @@ adb_soft_intr(void) } } +void +adb_lid_closed_intr(void) +{ +#ifndef SMALL_KERNEL + switch (lid_action) { + case 1: + /* Suspend. */ + break; + case 2: + /* Hibernate. */ + break; + } +#endif +} + +void +adb_power_button_intr(void) +{ +#ifndef SMALL_KERNEL + extern int allowpowerdown; + + switch (pwr_action) { + case 1: + if (allowpowerdown == 1) { + allowpowerdown = 0; + prsignal(initprocess, SIGUSR2); + } + break; + case 2: + /* Suspend. */ + break; + } +#endif +} + /* * This is my version of the ADBOp routine. It mainly just calls the @@ -1597,6 +1632,7 @@ adbattach(struct device *parent, struct adbHardware = ADB_HW_CUDA; else if (strcmp(ca->ca_name, "via-pmu") == 0) { adbHardware = ADB_HW_PMU; + pm_in_adbattach(sc->sc_dev.dv_xname); /* * Bus reset can take a long time if no adb devices are Index: arch/macppc/dev/pm_direct.c === RCS file: /cvs/src/sys/arch/macppc/dev/pm_direct.c,v retrieving revision 1.31 diff -u -p -r1.31 pm_direct.c --- arch/macppc/dev/pm_direct.c 18 Sep 2022 21:36:41 - 1.31 +++ arch/macppc/dev/pm_direct.c 8 Oct 2022 03:52:17 - @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -142,6 +143,9 @@ signed char pm_receive_cmd_type[] = { -1, -1, -1, -1, -1, -1, -1, -1, }; +int pm_old_env; +struct ksensor pm_lid_sens; +struct ksensordev pm_sensdev; /* * Define the private functions @@ -161,6 +165,10 @@ intpm_send(u_char); void pm_adb_get_TALK_result(PMData *); void pm_adb_get_ADB_data(PMData *); +void pm_env_intr(PMData *); + + +extern int hw_power; /* * These variables are in adb_direct.c. @@ -413,6 +421,21 @@ pmgrop(PMData *pmdata) return rval; } +void +pm_in_adbattach(const char *devname) +{ + /* A PowerBook (including iBook) has a lid. */ + if (strncmp(hw_prod, "PowerBook", 9) == 0) { + strlcpy(pm_sensdev.xname, devname, + sizeof(pm_sensdev.xname)); + strlcpy(pm_lid_sens.desc, "lid open", + sizeof(pm_lid_sens.desc)); + pm_lid_sens.type = SENSOR_INDICATOR; + sensor_attach(&pm_sensdev, &pm_lid_sens); + sensordev_install(&pm_sensdev); + pm_lid_sens.value = 1; /* This is a guess. */ + } +} /* * My PM interrupt routine for the PB Duo series and the PB 5XX series @@ -456,9 +479,11 @@ pm_intr(void) case 0x16: /* ADB device event */ case 0x18: case 0x1e: - case PMU_INT_WAKEUP: pm_adb_get_ADB_data(&pmdata); break; + case PMU_INT_ENVIRONMENT: + pm_env_intr(&pmdata); + break; default: #ifdef ADB_DEBUG if (adb_debug) @@ -664,6 +689,41 @@ pm_adb_get_ADB_data(PMData *pmdata) } void +pm_env_intr(PMData *pmdata) +{ + in
Re: powerpc64: retrigger deferred DEC interrupts from splx(9)
On Tue, 2 Aug 2022 19:55:02 +0200 (CEST) Mark Kettenis wrote: > > Date: Tue, 2 Aug 2022 11:56:59 -0500 > > From: Scott Cheloha > > > > At what point do we consider the patch safe? Have you seen any hangs? > > > > Wanna run with it another week? > > Sorry. I'm not set up to run my powerpc64 machine for extended > periods of time. But I'm fine with this going in if George can > confirm that this diff builds. Yes, it builds. ok gkoehler@ I have been running this version of the diff for a few hours, after switching from the previous version (which had modified trap.c). > > Index: include/cpu.h > > === > > RCS file: /cvs/src/sys/arch/powerpc64/include/cpu.h,v > > retrieving revision 1.31 > > diff -u -p -r1.31 cpu.h > > --- include/cpu.h 6 Jul 2021 09:34:07 - 1.31 > > +++ include/cpu.h 25 Jul 2022 23:43:47 - > > @@ -74,9 +74,9 @@ struct cpu_info { > > uint64_tci_lasttb; > > uint64_tci_nexttimerevent; > > uint64_tci_nextstatevent; > > - int ci_statspending; > > > > volatile intci_cpl; > > + volatile intci_dec_deferred; > > uint32_tci_ipending; > > uint32_tci_idepth; > > #ifdef DIAGNOSTIC > > Index: powerpc64/clock.c > > === > > RCS file: /cvs/src/sys/arch/powerpc64/powerpc64/clock.c,v > > retrieving revision 1.3 > > diff -u -p -r1.3 clock.c > > --- powerpc64/clock.c 23 Feb 2021 04:44:31 - 1.3 > > +++ powerpc64/clock.c 25 Jul 2022 23:43:47 - > > @@ -98,6 +98,17 @@ decr_intr(struct trapframe *frame) > > int s; > > > > /* > > +* If the clock interrupt is masked, postpone all work until > > +* it is unmasked in splx(9). > > +*/ > > + if (ci->ci_cpl >= IPL_CLOCK) { > > + ci->ci_dec_deferred = 1; > > + mtdec(UINT32_MAX >> 1); /* clear DEC exception */ > > + return; > > + } > > + ci->ci_dec_deferred = 0; > > + > > + /* > > * Based on the actual time delay since the last decrementer reload, > > * we arrange for earlier interrupt next time. > > */ > > @@ -130,30 +141,23 @@ decr_intr(struct trapframe *frame) > > mtdec(nextevent - tb); > > mtdec(nextevent - mftb()); > > > > - if (ci->ci_cpl >= IPL_CLOCK) { > > - ci->ci_statspending += nstats; > > - } else { > > - nstats += ci->ci_statspending; > > - ci->ci_statspending = 0; > > - > > - s = splclock(); > > - intr_enable(); > > - > > - /* > > -* Do standard timer interrupt stuff. > > -*/ > > - while (ci->ci_lasttb < prevtb) { > > - ci->ci_lasttb += tick_increment; > > - clock_count.ec_count++; > > - hardclock((struct clockframe *)frame); > > - } > > + s = splclock(); > > + intr_enable(); > > > > - while (nstats-- > 0) > > - statclock((struct clockframe *)frame); > > - > > - intr_disable(); > > - splx(s); > > + /* > > +* Do standard timer interrupt stuff. > > +*/ > > + while (ci->ci_lasttb < prevtb) { > > + ci->ci_lasttb += tick_increment; > > + clock_count.ec_count++; > > + hardclock((struct clockframe *)frame); > > } > > + > > + while (nstats-- > 0) > > + statclock((struct clockframe *)frame); > > + > > + intr_disable(); > > + splx(s); > > } > > > > void > > Index: powerpc64/intr.c > > === > > RCS file: /cvs/src/sys/arch/powerpc64/powerpc64/intr.c,v > > retrieving revision 1.9 > > diff -u -p -r1.9 intr.c > > --- powerpc64/intr.c26 Sep 2020 17:56:54 - 1.9 > > +++ powerpc64/intr.c25 Jul 2022 23:43:47 - > > @@ -139,6 +139,11 @@ splx(int new) > > { > > struct cpu_info *ci = curcpu(); > > > > + if (ci->ci_dec_deferred && new < IPL_CLOCK) { > > + mtdec(0); > > + mtdec(UINT32_MAX); /* raise DEC exception */ > > + } > > + > > if (ci->ci_ipending & intr_smask[new]) > > intr_do_pending(new); > > > >
Re: powerpc, macppc: retrigger deferred DEC interrupts from splx(9)
On Wed, 29 Jun 2022 22:47:19 -0500 Scott Cheloha wrote: > To be perfectly clear, you are concerned about this scenario: > > > > + if (ci->ci_dec_deferred && newcpl < IPL_CLOCK) { > > > + ppc_mtdec(0); > > /* DEC interrupt fires *here*. */ > /* We jump to decrint() and then call decr_intr(). */ > > > > + ppc_mtdec(UINT32_MAX); /* raise DEC exception */ > > > + } > > I think it's possible for the DEC exception to occur in that spot. > However, external/DEC *interrupts* are explicitly disabled, so I don't > think that we will jump to decrint() until the next time we do > > ppc_intr_enable(1); I missed the ppc_intr_disable(), which disables PSL_EE, in macintr_splx and openpic_splx. You are correct, it can't call decr_intr until ppc_intr_enable(1). ppc_dflt_splx also looks good, because we don't enable PSL_EE until we switch to macintr_splx or openpic_splx. > > Would this be better? > > > > ppc_mtdec(1 >> UINT32_MAX); > > ppc_mtdec(UINT32_MAX); > > I assume you meant to type > > ppc_mtdec(UINT32_MAX >> 1); Yes, I meant UINT32_MAX >> 1, but you have persuaded me that the existing ppc_mtdec(0) is correct, and no change is necessary. I will continue running your diff with ppc_mtdec(0).
Re: powerpc, macppc: retrigger deferred DEC interrupts from splx(9)
Hi. I have a question about splx, below. On Thu, 23 Jun 2022 21:58:48 -0500 Scott Cheloha wrote: > My machine uses openpic(4). I would appreciate tests on a > non-openpic(4) Mac, though all tests are appreciated. We only run on New World Macs, and the only ones without openpic(4) might be the oldest models of iMac G3 from 1998; these would attach macintr0 and not openpic0 in dmesg. I don't know anyone who might have such an iMac. The iMac model PowerMac2,1 from 1999 (with the (slot-loading cd drive) does have openpic(4). > Index: macppc/dev/openpic.c > === > RCS file: /cvs/src/sys/arch/macppc/dev/openpic.c,v > retrieving revision 1.89 > diff -u -p -r1.89 openpic.c > --- macppc/dev/openpic.c 21 Feb 2022 10:38:50 - 1.89 > +++ macppc/dev/openpic.c 24 Jun 2022 02:49:59 - > @@ -382,6 +382,10 @@ openpic_splx(int newcpl) > > intr = ppc_intr_disable(); > openpic_setipl(newcpl); > + if (ci->ci_dec_deferred && newcpl < IPL_CLOCK) { > + ppc_mtdec(0); > + ppc_mtdec(UINT32_MAX); /* raise DEC exception */ > + } > if (newcpl < IPL_SOFTTTY && (ci->ci_ipending & ppc_smask[newcpl])) { > s = splsofttty(); > dosoftint(newcpl); The 2nd mtdec tries to raise dec_intr by changing bit 1 << 31 of the decrementer register from 0 to 1. I suspect if the decrementer can also decrement itself from 0 to UINT32_MAX, and raise dec_intr early, before we reach the 2nd mtdec. This would be bad, because this ppc_mtdec(UINT32_MAX) would override the ppc_mtdec(nextevent - tb) in dec_intr and lose the next scheduled clock interrupt. Testing might miss this problem. For example, a randomly reordered kernel might place the 2 mtdec instructions in different pages, which has a small chance of a page fault on a Mac G5. Would this be better? ppc_mtdec(1 >> UINT32_MAX); ppc_mtdec(UINT32_MAX);
Re: patch: nm(1): add support for symbols created with -ffunction-sections
On Sat, 6 Nov 2021 15:20:03 +0100 Sebastien Marie wrote: > When an object is compiled using -ffunction-sections... > The following diff makes nm(1) to properly mark the function 'T', by > recognize ".text.*" sections: ok gkoehler@ nm(1) has more problems. If one compiles with -fdata-sections, then the mark on read-only symbols (like "const int var;") changes from 'R' to 'D', because nm knows ".rodata" but not ".rodata.*". If an object has >= 65280 sections, then nm fails. Example is from clang 11.1.0 on my macppc, $ perl -E 'say "void f$_(void) {}" for 1..3'>exam.c $ time cc -O2 -ffunction-sections -c exam.c 7m40.20s real 7m33.96s user 0m03.29s system $ nm exam.o nm: exam.o: no section header table This is because nm(1) can't read a section count >= 65280. https://stackoverflow.com/a/30428833/3614563 quotes the ELF spec, | If the number of sections is greater than or equal to SHN_LORESERVE | (0xff00), e_shnum has the value SHN_UNDEF (0) and the actual number | of section header table entries is contained in the sh_size field of | the section header at index 0 ... I found a large count in real C++ code, when I built llvm-project from git on amd64. Then nm(1) failed on some *.o file (and ar(1) made a wrongly indexed *.a, so I used llvm-ar to unbreak my build). I tried to fix nm(1), but then I got '?' and not 'T' on my function symbols, because I didn't have your diff. My sections were named ".text.*" and not ".text". I switched to llvm-nm or another tool. I didn't know that libtool(1) runs nm. We might have a problem if something will use libtool with -ffunction-sections or -fdata-sections and have >= 65280 sections. I don't know whether nm should check section names like ".text*", or it should ignore section names and check bits like SHF_EXECINSTR. Because nm now checks section names, your diff improves nm. --George > diff cecccd4b3c548875286ca2b010c95cbce6c0e359 /home/semarie/repos/openbsd/src > blob - 5aeef7a01a7cbff029299cfc5562cfcec085347f > file + usr.bin/nm/elf.c > --- usr.bin/nm/elf.c > +++ usr.bin/nm/elf.c > @@ -274,6 +274,8 @@ elf_shn2type(Elf_Ehdr *eh, u_int shn, const char *sn) > return (-1); > else if (!strcmp(sn, ELF_TEXT)) > return (N_TEXT); > + else if (!strncmp(sn, ".text.", 6)) > + return (N_TEXT); > else if (!strcmp(sn, ELF_RODATA)) > return (N_SIZE); > else if (!strcmp(sn, ELF_OPENBSDRANDOMDATA)) > @@ -355,6 +357,7 @@ elf2nlist(Elf_Sym *sym, Elf_Ehdr *eh, Elf_Shdr *shdr, > } else if (sn != NULL && *sn != 0 && > strcmp(sn, ELF_INIT) && > strcmp(sn, ELF_TEXT) && > + strncmp(sn, ".text.", 6) && > strcmp(sn, ELF_FINI)) /* XXX GNU compat */ > np->nl.n_other = '?'; > break; > > The change on elf_shn2type() isn't strictly necessary for my use-case, > but it (should) makes .text.* support better (recognize N_TEXT for > STT_NOTYPE, STT_OBJECT, STT_TLS). > > > After, nm(1) properly recognize the symbol: > > $ /usr/obj/usr.bin/nm/nm test.o > d .L.str > W __llvm_retpoline_r11 > W __retguard_759 > U printf > F test.c > T test_fn > > and it makes libtool(1) happy (LT/Archive.pm: get_symbollist > function), and it makes librsvg build happy (which is playing with > symbols at build time), and it should makes aja@ happy too. > > Comments or OK ? > -- > Sebastien Marie > -- George Koehler
Re: macppc: ignore 0 BARs in vgafb
On Thu, 21 Oct 2021 18:58:53 -0500 Ryan Farley wrote: > Hello, > > Based on the history it seems that the drm code is now used for AMD/ATI > graphics hardware, leaving those with lowly old nvidia cards (which > can't be replaced in the case of my powerbook) to rely on xf86-video-nv > for a proper full-color X experience; the problem is that the existing > logic here will select the wrong mmio region (reported as starting at > 0x) and prevent mmap()-ing the correct one from userland as a > result. This solves that problem by uncommenting the check for 0. Hello Ryan, Please mail to me the output of these commands, # dmesg # pcidump -v on the computer that needs your diff. I want to compare it with my G5 tower PowerMac7,3 (with nvidia vgafb). --George > I"m not personally aware of any hardware that would be negatively > affected by this; perhaps some newer chipsets would have more comlicated > memory layouts but they'll have the benefit of the AGP aperture in > correspondingly 'newer' hardware, so I'm curious if this would be > acceptable: > > diff --git sys/arch/macppc/pci/vgafb.c sys/arch/macppc/pci/vgafb.c > index e3752045b95..41bbdcd8ab2 100644 > --- sys/arch/macppc/pci/vgafb.c > +++ sys/arch/macppc/pci/vgafb.c > @@ -517,8 +517,7 @@ vgafb_mapregs(struct vgafb_softc *sc, struct > pci_attach_args *pa) > &ba, &bs, NULL); > if (rv != 0) > continue; > - > - if (bs == 0 /* || ba == 0 */) { > + if (bs == 0 || ba == 0 ) { > /* ignore this entry */ > } else if (hasmem == 0) { > /* > -- George Koehler
missing logbl(3) where 64-bit long double
Hello tech list, OpenBSD's libm is missing logbl(3) function on archs where long double is double. The missing logbl breaks my builds of "NEW: devel/dtools" (ports list) on macppc and powerpc64. I guess that other software avoids logbl and prefers ilogbl(3) or frexp(3). This diff adds logbl and fixes dtools, ok? --George Index: src/s_logb.c === RCS file: /cvs/src/lib/libm/src/s_logb.c,v retrieving revision 1.4 diff -u -p -r1.4 s_logb.c --- src/s_logb.c12 Sep 2016 19:47:02 - 1.4 +++ src/s_logb.c14 Oct 2021 21:49:22 - @@ -33,3 +33,4 @@ logb(double x) return (double) (ix-1023); } DEF_STD(logb); +LDBL_MAYBE_UNUSED_CLONE(logb);
Re: macppc bsd.mp pmap's hash lock
On Wed, 19 May 2021 00:27:51 -0400 George Koehler wrote: > Here's my new diff. It is a copy of what I sent to ppc@ a moment ago. > I would ask, "Is it ok to commit?", but while writing this mail, I > found a 2nd potential problem with memory barriers, so I will need to > edit this diff yet again. I edited the diff to add a 2nd membar_exit() and a comment. I want to commit this version, ok? Index: arch/powerpc/include/mplock.h === RCS file: /cvs/src/sys/arch/powerpc/include/mplock.h,v retrieving revision 1.4 diff -u -p -r1.4 mplock.h --- arch/powerpc/include/mplock.h 15 Apr 2020 08:09:00 - 1.4 +++ arch/powerpc/include/mplock.h 20 May 2021 19:47:52 - @@ -30,13 +30,14 @@ #define __USE_MI_MPLOCK /* + * __ppc_lock exists because pte_spill_r() can't use __mp_lock. * Really simple spinlock implementation with recursive capabilities. * Correctness is paramount, no fancyness allowed. */ struct __ppc_lock { - volatile struct cpu_info *mpl_cpu; - volatile long mpl_count; + struct cpu_info *volatile mpl_cpu; + longmpl_count; }; #ifndef _LOCORE @@ -44,10 +45,6 @@ struct __ppc_lock { void __ppc_lock_init(struct __ppc_lock *); void __ppc_lock(struct __ppc_lock *); void __ppc_unlock(struct __ppc_lock *); -int __ppc_release_all(struct __ppc_lock *); -int __ppc_release_all_but_one(struct __ppc_lock *); -void __ppc_acquire_count(struct __ppc_lock *, int); -int __ppc_lock_held(struct __ppc_lock *, struct cpu_info *); #endif Index: arch/powerpc/powerpc/lock_machdep.c === RCS file: /cvs/src/sys/arch/powerpc/powerpc/lock_machdep.c,v retrieving revision 1.9 diff -u -p -r1.9 lock_machdep.c --- arch/powerpc/powerpc/lock_machdep.c 15 Apr 2020 08:09:00 - 1.9 +++ arch/powerpc/powerpc/lock_machdep.c 20 May 2021 19:47:52 - @@ -1,6 +1,7 @@ /* $OpenBSD: lock_machdep.c,v 1.9 2020/04/15 08:09:00 mpi Exp $*/ /* + * Copyright (c) 2021 George Koehler * Copyright (c) 2007 Artur Grabowski * * Permission to use, copy, modify, and distribute this software for any @@ -22,10 +23,21 @@ #include #include -#include #include +/* + * If __ppc_lock() crosses a page boundary in the kernel text, then it + * may cause a page fault (on G5 with ppc_nobat), and pte_spill_r() + * would recursively call __ppc_lock(). The lock must be in a valid + * state when the page fault happens. We acquire or release the lock + * with a 32-bit atomic write to mpl_owner, so the lock is always in a + * valid state, before or after the write. + * + * Acquired the lock: mpl->mpl_cpu == curcpu() + * Released the lock: mpl->mpl_cpu == NULL + */ + void __ppc_lock_init(struct __ppc_lock *lock) { @@ -46,12 +58,12 @@ static __inline void __ppc_lock_spin(struct __ppc_lock *mpl) { #ifndef MP_LOCKDEBUG - while (mpl->mpl_count != 0) + while (mpl->mpl_cpu != NULL) CPU_BUSY_CYCLE(); #else int nticks = __mp_lock_spinout; - while (mpl->mpl_count != 0 && --nticks > 0) + while (mpl->mpl_cpu != NULL && --nticks > 0) CPU_BUSY_CYCLE(); if (nticks == 0) { @@ -65,32 +77,33 @@ void __ppc_lock(struct __ppc_lock *mpl) { /* -* Please notice that mpl_count gets incremented twice for the -* first lock. This is on purpose. The way we release the lock -* in mp_unlock is to decrement the mpl_count and then check if -* the lock should be released. Since mpl_count is what we're -* spinning on, decrementing it in mpl_unlock to 0 means that -* we can't clear mpl_cpu, because we're no longer holding the -* lock. In theory mpl_cpu doesn't need to be cleared, but it's -* safer to clear it and besides, setting mpl_count to 2 on the -* first lock makes most of this code much simpler. +* Please notice that mpl_count stays at 0 for the first lock. +* A page fault might recursively call __ppc_lock() after we +* set mpl_cpu, but before we can increase mpl_count. +* +* After we acquire the lock, we need a "bc; isync" memory +* barrier, but we might not reach the barrier before the next +* page fault. Then the fault's recursive __ppc_lock() must +* have a barrier. membar_enter() is just "isync" and must +* come after a conditional branch for holding the lock. */ while (1) { - int s; + struct cpu_info *owner = mpl->mpl_cpu; + struct cpu_info *ci = curcpu(); - s = ppc_intr_disable(); - if (atomic_cas_ulong(&mpl->mpl_count, 0, 1) == 0) { + if (owner == NULL)
Re: macppc bsd.mp pmap's hash lock
On Thu, 13 May 2021 02:20:45 -0400 George Koehler wrote: > My last diff (11 May 2021) still has a potential problem with memory > barriers. I will mail a new diff if I think of a fix. Here's my new diff. It is a copy of what I sent to ppc@ a moment ago. I would ask, "Is it ok to commit?", but while writing this mail, I found a 2nd potential problem with memory barriers, so I will need to edit this diff yet again. I add a 2nd membar_enter() to __ppc_lock() to avoid my 1st "potential problem". I also simplify the code by changing __ppc_lock_spin to check the owning cpu, not the count. A moment ago, I sent a copy of this diff to ppc@, in reply to another report of bsd.mp hanging at "using parent ..." on a G5. My own hang happened when the function __ppc_lock(), in a randomly reordered bsd.mp, crossed a page boundary and caused a page fault with a recursive call to __ppc_lock(). The goal of my diff is to prevent such hangs, by allowing such recursive calls to work. I acquire the lock with an atomic 32-bit write, so the lock is always in a valid state before or after the write. The old code did spin until mpl_count == 0. In my earlier diffs, I acquired the lock by setting both the owning cpu and a nonzero count in one write, so I needed to pack both owner and count in a 32-bit integer. This diff spins until mpl_cpu == NULL. I acquire the lock by setting only mpl_cpu and leaving mpl_count at 0. So mpl_cpu is the spinlock, and mpl_count is a variable protected by the lock. I no longer need to pack both fields in a 32-bit integer, so I get rid of my BOLT* macros for packing and unpacking the fields. I need 2 memory barriers: 1. after acquiring the lock, before accessing the locked data. 2. after accessing the locked data, before releasing the lock. I added the 2nd membar_enter(), because I feared that a cpu would acquire the lock, but get a page fault (and recursive call) before it would reach the memory barrier. I didn't think of the opposite case, where a cpu would do membar_exit(), but gets a page fault before it would release the lock. This is the 2nd potential problem that I didn't fix. I didn't observe an actual problem after running this diff for more than 24 hours. --George Index: sys/arch/powerpc/include/mplock.h === RCS file: /cvs/src/sys/arch/powerpc/include/mplock.h,v retrieving revision 1.4 diff -u -p -r1.4 mplock.h --- sys/arch/powerpc/include/mplock.h 15 Apr 2020 08:09:00 - 1.4 +++ sys/arch/powerpc/include/mplock.h 16 May 2021 00:51:41 - @@ -30,13 +30,14 @@ #define __USE_MI_MPLOCK /* + * __ppc_lock exists because pte_spill_r() can't use __mp_lock. * Really simple spinlock implementation with recursive capabilities. * Correctness is paramount, no fancyness allowed. */ struct __ppc_lock { - volatile struct cpu_info *mpl_cpu; - volatile long mpl_count; + struct cpu_info *volatile mpl_cpu; + longmpl_count; }; #ifndef _LOCORE @@ -44,10 +45,6 @@ struct __ppc_lock { void __ppc_lock_init(struct __ppc_lock *); void __ppc_lock(struct __ppc_lock *); void __ppc_unlock(struct __ppc_lock *); -int __ppc_release_all(struct __ppc_lock *); -int __ppc_release_all_but_one(struct __ppc_lock *); -void __ppc_acquire_count(struct __ppc_lock *, int); -int __ppc_lock_held(struct __ppc_lock *, struct cpu_info *); #endif Index: sys/arch/powerpc/powerpc/lock_machdep.c === RCS file: /cvs/src/sys/arch/powerpc/powerpc/lock_machdep.c,v retrieving revision 1.9 diff -u -p -r1.9 lock_machdep.c --- sys/arch/powerpc/powerpc/lock_machdep.c 15 Apr 2020 08:09:00 - 1.9 +++ sys/arch/powerpc/powerpc/lock_machdep.c 16 May 2021 00:51:41 - @@ -1,6 +1,7 @@ /* $OpenBSD: lock_machdep.c,v 1.9 2020/04/15 08:09:00 mpi Exp $*/ /* + * Copyright (c) 2021 George Koehler * Copyright (c) 2007 Artur Grabowski * * Permission to use, copy, modify, and distribute this software for any @@ -22,10 +23,21 @@ #include #include -#include #include +/* + * If __ppc_lock() crosses a page boundary in the kernel text, then it + * may cause a page fault (on G5 with ppc_nobat), and pte_spill_r() + * would recursively call __ppc_lock(). The lock must be in a valid + * state when the page fault happens. We acquire or release the lock + * with a 32-bit atomic write to mpl_owner, so the lock is always in a + * valid state, before or after the write. + * + * Acquired the lock: mpl->mpl_cpu == curcpu() + * Released the lock: mpl->mpl_cpu == NULL + */ + void __ppc_lock_init(struct __ppc_lock *lock) { @@ -46,12 +58,12 @@ static __inline void __ppc_lock_spin(struct __ppc_lock *mpl) { #ifndef MP_LOCKDEBUG - while (mpl->mpl_count != 0) + while (mpl->mpl_cpu != NULL)
Re: macppc bsd.mp pmap's hash lock
My last diff (11 May 2021) still has a potential problem with memory barriers. I will mail a new diff if I think of a fix. On Tue, 11 May 2021 11:08:55 -0500 Dale Rahn wrote: > This structure really should be cache-line aligned, which should prevent it > from spilling across a page boundary. There is both a struct __ppc_lock and a function __ppc_lock(); my G5 got stuck at boot when the function crossed a page boundary. It might help to align parts of the function, but if I would do so, I might need to write the entire function in asm and count instructions. The potential problem in my last diff is here inside __ppc_lock(): 15eb8c: 7c c0 19 2d stwcx. r6,0,r3 15eb90: 40 c2 ff f0 bne+15eb80 <__ppc_lock+0x40> 15eb94: 7c 08 38 40 cmplw r8,r7 15eb98: 41 82 00 28 beq-15ebc0 <__ppc_lock+0x80> ... 15ebc0: 4c 00 01 2c isync After "stwcx." grabs the lock, we need a "bc; isync" memory barrier, which is the "bne+" and "isync" in the above disassembly. A page fault might act like an "isync", so if we can do the "bne+" before the page fault, we might be fine. The problem is that, if the "stwcx." and "bne+" are in different pages (1 in 1024 chance?), then I would get a page fault before the "bne+", so my code would skip the necessary memory barrier. I guess that I can fix it by adding another membar_enter() somewhere. > The powerpc pmap was originally designed to have 8 'way' locks so that > only a single way would get locked, thus (as long as one doesn't have way > more than 8 cores) any core should be able to choose some way to replace > and get the work done. This also would have allowed limited recursion. > However at some point those were taken out. Note that before locking one of > the ways of the hashtable, it would hold the lock on the pmap that it was > inserting the mapping from (which could be the kernel). Not certain if the > kernel pmap lock is recursive or not. That's a good history lesson for me; I'm too new. The pmap lock is "struct mutex pm_mtx" (powerpc/include/pmap.h), so it isn't recursive. The code in pmap.c pte_spill_r() doesn't hold the pmap lock if the page is in the direct map. The kernel text, including the function __ppc_lock(), is in the direct map. > The point was to allow multiple cores to access different regions of the > hashtable at the same time (minimal contention). However it would also > allow a core to reenter the lock_try on a different way where it should be > able to succeed (as long as the recursion doesn't go 8 deep?) If I understand the code, the max recursion is 2 levels. The kernel can grab the hash lock, get a page fault, then the page fault handler can do 2nd grab. The handler is in real mode (no address translation) and would never cause another page fault and a 3rd grab. If we would have the 8 locks, then it might deadlock if 8 cpus each hold a lock and try to grab a 2nd lock (but a macppc has at most 4 cpus: PPC_MAXPROCS in powerpc/include/cpu.h). --George > On Tue, May 11, 2021 at 12:42 AM George Koehler wrote: > > > I made a mistake in my last diff by deleting the memory barriers. > > Here is a diff that keeps membar_enter() and membar_exit(). > > > > With my last diff, my G5 froze after 15 hours of uptime, and again > > after 10 hours of uptime. I'm not sure whether the missing memory > > barriers caused the freeze, but I will be running this diff and hoping > > for fewer freezes. > > > > On Sat, 8 May 2021 18:59:35 +0200 (CEST) > > Mark Kettenis wrote: > > > > > Good find! On powerpc64 I avoid the issue because I guarantee that > > > the kernel mappings are never evicted from the hash. But doing so on > > > powerpc would require more serious development. I'm not sure we > > > really need a ticket lock for this, but since you already did the > > > work, let's stick with it for now. > > > > __ppc_lock (before and after this diff) doesn't give tickets like > > __mp_lock does, but __ppc_lock and __mp_lock are both recursive locks. > > I don't know an easy way to avoid the recursion, if some of the kernel > > mappings might not be in the hash. My __ppc_lock diff should be easy > > if I don't make mistakes like wrong memory barriers. > > > > --George > > > > Index: arch/powerpc/include/mplock.h > > === > > RCS file: /cvs/src/sys/arch/powerpc/include/mplock.h,v > > retrieving revision 1.4 > > diff -u -p -r1.4 mplock.h > > --- arch/powerpc/include/mplock.h 15 Apr 2020 08:
Re: macppc bsd.mp pmap's hash lock
I made a mistake in my last diff by deleting the memory barriers. Here is a diff that keeps membar_enter() and membar_exit(). With my last diff, my G5 froze after 15 hours of uptime, and again after 10 hours of uptime. I'm not sure whether the missing memory barriers caused the freeze, but I will be running this diff and hoping for fewer freezes. On Sat, 8 May 2021 18:59:35 +0200 (CEST) Mark Kettenis wrote: > Good find! On powerpc64 I avoid the issue because I guarantee that > the kernel mappings are never evicted from the hash. But doing so on > powerpc would require more serious development. I'm not sure we > really need a ticket lock for this, but since you already did the > work, let's stick with it for now. __ppc_lock (before and after this diff) doesn't give tickets like __mp_lock does, but __ppc_lock and __mp_lock are both recursive locks. I don't know an easy way to avoid the recursion, if some of the kernel mappings might not be in the hash. My __ppc_lock diff should be easy if I don't make mistakes like wrong memory barriers. --George Index: arch/powerpc/include/mplock.h === RCS file: /cvs/src/sys/arch/powerpc/include/mplock.h,v retrieving revision 1.4 diff -u -p -r1.4 mplock.h --- arch/powerpc/include/mplock.h 15 Apr 2020 08:09:00 - 1.4 +++ arch/powerpc/include/mplock.h 10 May 2021 23:33:00 - @@ -30,13 +30,13 @@ #define __USE_MI_MPLOCK /* + * __ppc_lock exists because pte_spill_r() can't use __mp_lock. * Really simple spinlock implementation with recursive capabilities. * Correctness is paramount, no fancyness allowed. */ struct __ppc_lock { - volatile struct cpu_info *mpl_cpu; - volatile long mpl_count; + volatile unsigned int mpl_bolt; }; #ifndef _LOCORE @@ -44,10 +44,6 @@ struct __ppc_lock { void __ppc_lock_init(struct __ppc_lock *); void __ppc_lock(struct __ppc_lock *); void __ppc_unlock(struct __ppc_lock *); -int __ppc_release_all(struct __ppc_lock *); -int __ppc_release_all_but_one(struct __ppc_lock *); -void __ppc_acquire_count(struct __ppc_lock *, int); -int __ppc_lock_held(struct __ppc_lock *, struct cpu_info *); #endif Index: arch/powerpc/powerpc/lock_machdep.c === RCS file: /cvs/src/sys/arch/powerpc/powerpc/lock_machdep.c,v retrieving revision 1.9 diff -u -p -r1.9 lock_machdep.c --- arch/powerpc/powerpc/lock_machdep.c 15 Apr 2020 08:09:00 - 1.9 +++ arch/powerpc/powerpc/lock_machdep.c 10 May 2021 23:33:00 - @@ -1,6 +1,7 @@ /* $OpenBSD: lock_machdep.c,v 1.9 2020/04/15 08:09:00 mpi Exp $*/ /* + * Copyright (c) 2021 George Koehler * Copyright (c) 2007 Artur Grabowski * * Permission to use, copy, modify, and distribute this software for any @@ -22,15 +23,29 @@ #include #include -#include #include +/* + * If __ppc_lock() crosses a page boundary in the kernel text, then it + * may cause a page fault (on G5 with ppc_nobat), and pte_spill_r() + * would recursively call __ppc_lock(). The lock must be in a valid + * state when the page fault happens. + * + * This lock has 2 fields, "cpuid" and "count", packed in a 32-bit + * "bolt", so we can use 32-bit atomic ops to ensure that our lock is + * always in a valid state. + */ +#define BOLT(cpuid, count) ((cpuid) << 24 | (count) & 0x00ff) +#define BOLT_CPUID(bolt) ((bolt) >> 24) +#define BOLT_COUNT(bolt) ((bolt) & 0x00ff) +#define BOLT_LOCKED(bolt) (BOLT_COUNT(bolt) != 0) +#define BOLT_UNLOCKED(bolt)(BOLT_COUNT(bolt) == 0) + void __ppc_lock_init(struct __ppc_lock *lock) { - lock->mpl_cpu = NULL; - lock->mpl_count = 0; + lock->mpl_bolt = 0; } #if defined(MP_LOCKDEBUG) @@ -46,12 +61,12 @@ static __inline void __ppc_lock_spin(struct __ppc_lock *mpl) { #ifndef MP_LOCKDEBUG - while (mpl->mpl_count != 0) + while (BOLT_LOCKED(mpl->mpl_bolt)) CPU_BUSY_CYCLE(); #else int nticks = __mp_lock_spinout; - while (mpl->mpl_count != 0 && --nticks > 0) + while (BOLT_LOCKED(mpl->mpl_bolt) && --nticks > 0) CPU_BUSY_CYCLE(); if (nticks == 0) { @@ -64,33 +79,23 @@ __ppc_lock_spin(struct __ppc_lock *mpl) void __ppc_lock(struct __ppc_lock *mpl) { - /* -* Please notice that mpl_count gets incremented twice for the -* first lock. This is on purpose. The way we release the lock -* in mp_unlock is to decrement the mpl_count and then check if -* the lock should be released. Since mpl_count is what we're -* spinning on, decrementing it in mpl_unlock to 0 means that -* we can't clear mpl_cpu, because we're no longer holding the -* lock. In theory mpl_cpu d
Re: macppc: add ld.script for kernel, ofwboot
On Fri, 7 May 2021 10:31:55 +0200 (CEST) Mark Kettenis wrote: > Makes sense to me. It seems ldd always seems to require a little bit > more coercion to produce non-standard binaries. We use linker scripts > for the various EFI bootloaders as well. > > ok kettenis@ My diff had an extra "pwd" in arch/macppc/stand/ofwboot/Makefile; I deleted the "pwd" before committing it. > > -${PROG}: ${OBJS} ${LIBSA} ${LIBZ} > > - ${LD} -nopie -znorelro -N -X -Ttext ${RELOC} -e ${ENTRY} -o ${PROG} \ > > +${PROG}: ${OBJS} ${LIBSA} ${LIBZ} ld.script > > + pwd > > + ${LD} -nopie -znorelro -N -X -T ${.CURDIR}/ld.script -o ${PROG} \ > > ${OBJS} ${LIBS} >From my experiments with lld 10, I believe that macppc is almost ready to switch from ld.bfd to ld.lld. I know of 2 other problems: 1. ports/lang/gcc/8 needs USE_LLD = No, because lld 10 can't link C++ code from gcc. (I have not yet checked lld 11.) lld had no problem with Fortran ports built by gcc. 2. All instances of -Wl,-relax or -Wl,--relax in src or ports must be deleted, because it is an unknown option to lld, but lld can link large binaries without the option. --George
macppc: add ld.script for kernel, ofwboot
Hello tech list, I want macppc to switch from ld.bfd to ld.lld, but there is a problem when lld links ofwboot or the kernel. I propose to fix it by adding ld.script for both. These scripts also work with ld.bfd, so I want to commit my diff at the end of this mail, ok? lld sets the symbol "etext" to a nonsense value like 0x1034. In ofwboot, wrong "etext" causes freeze, failure to boot kernel. (Wrong "etext" in kernel doesn't cause an obvious problem.) Other lld arches use an ld.script to set a correct "etext" in kernel. I copied the ld.script from powerpc64's kernel and made these changes for macppc's kernel: - change "_start" to "start" to match macppc/locore0.S - remove PT_DYNAMIC and sections (don't exist in macppc kernel) - put .text at 0x00100114 to match Makefile - remove symbols like "_erodata" (not used by macppc kernel) - add ".rodata.*" and such, so sections and segments look correct | --- arch/powerpc64/conf/ld.script Sat Jul 18 09:16:32 2020 | +++ arch/macppc/conf/ld.scriptSat Apr 24 11:52:34 2021 | @@ -16,18 +16,17 @@ | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | */ | | -ENTRY(_start) | +ENTRY(start) | | PHDRS | { | text PT_LOAD; | - dynamic PT_DYNAMIC; | openbsd_randomize PT_OPENBSD_RANDOMIZE; | } | | SECTIONS | { | - . = 0x0010; | + . = 0x00100114; | .text : | { | *(.text) | @@ -35,49 +34,35 @@ | PROVIDE (etext = .); | PROVIDE (_etext = .); | | - . = ALIGN(4096); | - .rela.dyn : { *(.rela.dyn) } | - | - .dynamic : | + .rodata : | { | - *(.dynamic) | - } :dynamic :text | + *(.rodata .rodata.*) | + } :text | | - .rodata : | + .data.rel.ro : | { | - *(.rodata) | *(.data.rel.ro) | } :text | | .openbsd.randomdata : | { | - *(.openbsd.randomdata) | + *(.openbsd.randomdata .openbsd.randomdata.*) | } :openbsd_randomize :text | - PROVIDE (_erodata = .); | | - . = ALIGN(4096); | .data : | { | *(.data) | } :text | | - . = ALIGN(4096); | - .got : { *(.got) } | - .toc : { *(.toc) } | + .sbss : | + { | + *(.sbss) | + } | | - PROVIDE (__bss_start = .); | .bss : | { | *(.bss) | } | PROVIDE (end = .); | PROVIDE (_end = .); | - | - /DISCARD/ : | - { | - *(.dynsym) | - *(.dynstr) | - *(.gnu.hash) | - *(.hash) | - } | } Then I made these changes for ofwboot: - use "_start" and 0x0002 to match Makefile - remove randomdata (doesn't exist in ofwboot) | --- arch/macppc/conf/ld.scriptSat Apr 24 11:52:34 2021 | +++ arch/macppc/stand/ofwboot/ld.script Sat Apr 24 11:52:34 2021 | @@ -16,17 +16,17 @@ | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | */ | | -ENTRY(start) | +ENTRY(_start) | | PHDRS | { | text PT_LOAD; | - openbsd_randomize PT_OPENBSD_RANDOMIZE; | } | | SECTIONS | { | - . = 0x00100114; | + /* Must match RELOC in Makefile */ | + . = 0x0002; | .text : | { | *(.text) | @@ -43,11 +43,6 @@ | { | *(.data.rel.ro) | } :text | - | - .openbsd.randomdata : | - { | - *(.openbsd.randomdata .openbsd.randomdata.*) | - } :openbsd_randomize :text | | .data : | { Index: arch/macppc/conf/Makefile.macppc === RCS file: /cvs/src/sys/arch/macppc/conf/Makefile.macppc,v retrieving revision 1.101 diff -u -p -r1.101 Makefile.macppc --- arch/macppc/conf/Makefile.macppc28 Jan 2021 17:39:03 - 1.101 +++ arch/macppc/conf/Makefile.macppc6 May 2021 20:01:08 - @@ -51,7 +51,7 @@ DEBUG?= -g COPTIMIZE?=-O2 CFLAGS=${DEBUG} ${CWARNFLAGS} ${CMACHFLAGS} ${COPTIMIZE} ${COPTS} ${PIPE} AFLAGS=-D_LOCORE ${CMACHFLAGS} -LINKFLAGS= -N -Ttext 100114 -e start --warn-common -nopie +LINKFLAGS= -N -T ld.script --warn-common -nopie .if ${MACHINE} == "powerpc64" CFLAGS+= -m32 Index: arch/macppc/conf/ld.script === RCS file: /cvs/src/sys/arch/macppc/conf/ld.script,v retrieving revision 1.1 diff -u -p -r1.1 ld.script --- arch/macppc/conf/ld.script 13 Jun 2017 01:42:52 - 1.1 +++ arch/macppc/conf/ld.script 6 May 2021 20:01:08 - @@ -0,0 +1,68 @@ +/* $OpenBSD: ld.script,v 1.4 2020/07/18 13:16:32 kettenis Exp $*/ + +/* + * Copyright (c) 2013 Mark Kettenis + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS
macppc bsd.mp pmap's hash lock
Hello tech list, If you have a macppc with more than one cpu, I would like you to try this diff in the GENERIC.MP kernel. I am running it on a dual G5 (without radeondrm and not running X). I don't know whether I want to commit this diff. In late April, my G5's kernel froze very early during boot, while trying to map the framebuffer. The problem went away after reordering and relinking the kernel. I kept a copy of the bad kernel. I found the problem on Tuesday: __ppc_lock() crossed a page boundary. $ nm -n /bsd.crash | grep __ppc_lock $ objdump -dlr --start-ad=0x27bf8c /bsd.crash|less The disassembly had 0x27fbf8c <= __ppc_lock < 0x27c058, so it crossed pages at 0x27c000; page size = 0x1000 = 4096. On a G5, the kernel lazily faults in its own pages. The page fault at 0x27c000 inside __ppc_lock caused a recursive call to __ppc_lock. I believe that the fault happened here in __ppc_lock: s = ppc_intr_disable(); if (atomic_cas_ulong(&mpl->mpl_count, 0, 1) == 0) { membar_enter(); mpl->mpl_cpu = curcpu(); } if (mpl->mpl_cpu == curcpu()) { //--> // page fault! recursive call to __ppc_lock mpl->mpl_count++; ppc_intr_enable(s); This is bad, because the lock is not in a valid state when the page fault happens. The code tries ppc_intr_disable to protect the lock, but this doesn't disable page faults. The lock is a recursive spinlock that protects the pmap's hash table. My bad kernel tried to grab the lock to insert the 1st page of the framebuffer into the hash, and then tried to recursively grab the lock to insert page 0x27c000 into the hash. I debugged the problem by copying the bad kernel and overwriting some asm to insert some extra printf()s. The problem went away when my asm caused an earlier access to page 0x27c000. When I reorder the kernel, __ppc_lock gets a different address, and probably doesn't cross pages with a not-valid lock; but I can force a page fault this way: __asm volatile("b 1f; . = . + 4096; 1:"); If I insert this asm at my above "//-->" and compile GENERIC.MP, then it reproduces the freezing problem on my G5. The problem doesn't happen on a G3 or G4, where the kernel uses block address translation (!ppc_nobat); I copied my bad kernel to a G4 and it didn't freeze there. The problem doesn't happen in bsd.sp, where the lock doesn't exist. Our powerpc64 kernel doesn't fault in its own pages this way. I have observed the problem only with macppc on G5. In this diff, I try to fix the problem by shrinking the lock to 32 bits, and using 32-bit atomic ops to keep the lock in a valid state. This __ppc_lock is no longer the __mp_lock, but is only the pmap's hash lock, so I also delete some unused functions. --George Index: arch/powerpc/include/mplock.h === RCS file: /cvs/src/sys/arch/powerpc/include/mplock.h,v retrieving revision 1.4 diff -u -p -r1.4 mplock.h --- arch/powerpc/include/mplock.h 15 Apr 2020 08:09:00 - 1.4 +++ arch/powerpc/include/mplock.h 6 May 2021 20:01:08 - @@ -30,13 +30,13 @@ #define __USE_MI_MPLOCK /* + * __ppc_lock exists because pte_spill_r() can't use __mp_lock. * Really simple spinlock implementation with recursive capabilities. * Correctness is paramount, no fancyness allowed. */ struct __ppc_lock { - volatile struct cpu_info *mpl_cpu; - volatile long mpl_count; + volatile unsigned int mpl_bolt; }; #ifndef _LOCORE @@ -44,10 +44,6 @@ struct __ppc_lock { void __ppc_lock_init(struct __ppc_lock *); void __ppc_lock(struct __ppc_lock *); void __ppc_unlock(struct __ppc_lock *); -int __ppc_release_all(struct __ppc_lock *); -int __ppc_release_all_but_one(struct __ppc_lock *); -void __ppc_acquire_count(struct __ppc_lock *, int); -int __ppc_lock_held(struct __ppc_lock *, struct cpu_info *); #endif Index: arch/powerpc/powerpc/lock_machdep.c === RCS file: /cvs/src/sys/arch/powerpc/powerpc/lock_machdep.c,v retrieving revision 1.9 diff -u -p -r1.9 lock_machdep.c --- arch/powerpc/powerpc/lock_machdep.c 15 Apr 2020 08:09:00 - 1.9 +++ arch/powerpc/powerpc/lock_machdep.c 6 May 2021 20:01:08 - @@ -1,6 +1,7 @@ /* $OpenBSD: lock_machdep.c,v 1.9 2020/04/15 08:09:00 mpi Exp $*/ /* + * Copyright (c) 2021 George Koehler * Copyright (c) 2007 Artur Grabowski * * Permission to use, copy, modify, and distribute this software for any @@ -22,15 +23,29 @@ #include #include -#include #include +/* + * If __ppc_lock() crosses a page boundary in the kernel text, then it + * may cause a page fault (on G5 with ppc_nobat), and pte_s
Re: Unlock top part of uvm_fault()
On Thu, 22 Apr 2021 15:38:53 +0200 Martin Pieuchot wrote: > Diff below remove the KERNEL_LOCK()/UNLOCK() dance from uvm_fault() for > both amd64 and sparc64. That means the kernel lock will only be taken > for lower faults and some amap/anon code will now run without it. I made a similar diff, below, for macppc and powerpc64. In my small trials, the build times were about the same on my 4-core powerpc64 and slightly faster on my 2-core macppc. My macppc kernel had a problem: it froze at boot, but the problem went away after I reordered the kernel. In my guess, this unlocking diff isn't the cause of the problem, because the freeze was too early (before copyright); the other cpu wouldn't have entered the kernel, so the kernel lock wouldn't prevent the freeze. My powerpc64 trial was "make -j4" in src/gnu/usr.bin/clang on 4-core POWER9 in Talos II. Before diff: 117m45.96s real 361m58.87s user68m44.66s system After diff: 115m24.53s real 362m02.65s user58m40.99s system I see that system time went down by 10 minutes, but real time is about the same (down by 2 minutes). My macppc trial was "dpb devel/cmake" on 2-cpu Power Mac G5. Before diff:03:44:50 After diff: 03:33:44 Real time went down by 11 minutes. Maybe the unlocking diff works better on a 2-core machine, or maybe the unlocking diff does nothing and I saved 11 minutes by random luck. --George Index: arch/powerpc/powerpc/trap.c === RCS file: /cvs/src/sys/arch/powerpc/powerpc/trap.c,v retrieving revision 1.119 diff -u -p -r1.119 trap.c --- arch/powerpc/powerpc/trap.c 11 Mar 2021 11:16:59 - 1.119 +++ arch/powerpc/powerpc/trap.c 28 Apr 2021 02:49:44 - @@ -282,9 +282,7 @@ trap(struct trapframe *frame) else ftype = PROT_READ; - KERNEL_LOCK(); error = uvm_fault(map, trunc_page(va), 0, ftype); - KERNEL_UNLOCK(); if (error == 0) return; @@ -318,10 +316,8 @@ trap(struct trapframe *frame) } else vftype = ftype = PROT_READ; - KERNEL_LOCK(); error = uvm_fault(&p->p_vmspace->vm_map, trunc_page(frame->dar), 0, ftype); - KERNEL_UNLOCK(); if (error == 0) { uvm_grow(p, frame->dar); @@ -343,10 +339,8 @@ trap(struct trapframe *frame) ftype = PROT_READ | PROT_EXEC; - KERNEL_LOCK(); error = uvm_fault(&p->p_vmspace->vm_map, trunc_page(frame->srr0), 0, ftype); - KERNEL_UNLOCK(); if (error == 0) { uvm_grow(p, frame->srr0); Index: arch/powerpc64/powerpc64/trap.c === RCS file: /cvs/src/sys/arch/powerpc64/powerpc64/trap.c,v retrieving revision 1.49 diff -u -p -r1.49 trap.c --- arch/powerpc64/powerpc64/trap.c 9 Jan 2021 13:14:02 - 1.49 +++ arch/powerpc64/powerpc64/trap.c 24 Apr 2021 03:21:02 - @@ -126,9 +126,7 @@ trap(struct trapframe *frame) access_type = PROT_READ | PROT_WRITE; else access_type = PROT_READ; - KERNEL_LOCK(); error = uvm_fault(map, trunc_page(va), 0, access_type); - KERNEL_UNLOCK(); if (error == 0) return; @@ -237,9 +235,7 @@ trap(struct trapframe *frame) access_type = PROT_READ | PROT_WRITE; else access_type = PROT_READ; - KERNEL_LOCK(); error = uvm_fault(map, trunc_page(va), 0, access_type); - KERNEL_UNLOCK(); if (error == 0) uvm_grow(p, va); @@ -284,9 +280,7 @@ trap(struct trapframe *frame) map = &p->p_vmspace->vm_map; va = frame->srr0; access_type = PROT_READ | PROT_EXEC; - KERNEL_LOCK(); error = uvm_fault(map, trunc_page(va), 0, access_type); - KERNEL_UNLOCK(); if (error == 0) uvm_grow(p, va);
Re: mg(1): dired-jump
On Thu, 18 Mar 2021 20:22:44 + (UTC) Mark Lumsden wrote: > This diff was first seen on the tech list just under a year ago. It is > from Philip K. and was tested by George Koehler. I have modified it > slightly. From Philips original post: You sent, or I received, your diff with wrong whitespace; I needed $ sed 's/^ / /' your.eml | patch -p0 One question below. > this patch implements "dired-jump" for mg. For those not familiar with > what dired-jump does in GNU Emacs, here's it's docstring: > > Jump to Dired buffer corresponding to current buffer. > If in a file, Dired the current directory and move to files line. > > > I find this useful. ok? > > Index: def.h > === > RCS file: /cvs/src/usr.bin/mg/def.h,v > retrieving revision 1.168 > diff -u -p -r1.168 def.h > --- def.h 1 Mar 2021 10:51:14 - 1.168 > +++ def.h 18 Mar 2021 20:15:34 - > @@ -363,6 +363,7 @@ intask_makedir(void); > > /* dired.c */ > struct buffer *dired_(char *); > +int dired_jump(int, int); > int do_dired(char *); > > /* file.c X */ > Index: dired.c > === > RCS file: /cvs/src/usr.bin/mg/dired.c,v > retrieving revision 1.98 > diff -u -p -r1.98 dired.c > --- dired.c 5 Mar 2021 16:16:53 - 1.98 > +++ dired.c 18 Mar 2021 20:15:34 - > @@ -53,6 +53,7 @@ static int d_refreshbuffer(int, int); > static int d_filevisitalt(int, int); > static int d_gotofile(int, int); > static void reaper(int); > +static intgotofile(char*); > static struct buffer*refreshbuffer(struct buffer *); > static int createlist(struct buffer *); > static void redelete(struct buffer *); > @@ -1091,13 +1092,38 @@ createlist(struct buffer *bp) > } > > int > +dired_jump(int f, int n) > +{ > + char dname[NFILEN], *fname; > + struct buffer *bp; > + int ret; > + > + if (getbufcwd(dname, sizeof(dname)) != TRUE) > + return (FALSE); > + > + fname = curbp->b_fname; > + > + if ((bp = dired_(dname)) == NULL) > + return (FALSE); > + curbp = bp; > + > + ret = showbuffer(bp, curwp, WFFULL | WFMODE); > + if (ret != TRUE) > + return ret; > + > + fname = adjustname(fname, TRUE); > + if (strlen(fname)) > + gotofile(fname); Should this be "if (fname != NULL)"? Some other callers of adjustname() check for a NULL pointer. strlen(NULL) would crash by SIGSEGV; strlen("") would return 0, because "" is a non-NULL pointer to an ASCII NUL '\0'. The old diff, that I had been running, called basename() here. You changed it to call adjustname(). That's better. I didn't like basename() because it didn't use the xbasename() wrapper. --George > + > + return (TRUE); > +} > + > +int > d_gotofile(int f, int n) > { > - struct line *lp, *nlp; > size_t lenfpath; > - char fpath[NFILEN], fname[NFILEN]; > - char*p, *fpth, *fnp = NULL; > - int tmp; > + char fpath[NFILEN]; > + char*fpth, *fnp = NULL; > > if (getbufcwd(fpath, sizeof(fpath)) != TRUE) > fpath[0] = '\0'; > @@ -1114,8 +1140,18 @@ d_gotofile(int f, int n) > ewprintf("No file to find");/* Current directory given so > */ > return (TRUE); /* return at present location. > */ > } > + return gotofile(fpth); > +} > + > +int > +gotofile(char *fpth) > +{ > + struct line *lp, *nlp; > + char fname[NFILEN]; > + char*p; > + int tmp; > + > (void)xbasename(fname, fpth, NFILEN); > - curbp = curwp->w_bufp; > tmp = 0; > for (lp = bfirstlp(curbp); lp != curbp->b_headp; lp = nlp) { > tmp++; > Index: funmap.c > === > RCS file: /cvs/src/usr.bin/mg/funmap.c,v > retrieving revision 1.59 > diff -u -p -r1.59 funmap.c > --- funmap.c 11 Jul 2019 18:20:18 - 1.59 > +++ funmap.c 18 Mar 2021 20:15:34 - > @@ -98,6 +98,7 @@ static struct funmap functnames[] = { > {desckey, "describe-key-briefly", 1}, > {diffbuffer, "diff-buffer-with-file", 0}, > {digit_argument, "digit-argument", 1}, > + {dired_jump, "dir
Re: OpenBSD perl 5.32.1 - Call for Testing
On Fri, 26 Feb 2021 11:31:07 -0800 Andrew Hewus Fresh wrote: > On Sat, Feb 20, 2021 at 02:11:45PM -0800, Andrew Hewus Fresh wrote: > > I've probably missed making it in for 6.9, but it is again time for > > testing a perl update so it can become /usr/bin/perl > > It was pointed out that there is still time to get this in for 6.9, and > sthen@ says it looks OK in a bulk ports build, so I'll be working on > preparing the import this weekend. Please let me know of any reasons > to hold off. Good news that ports can build. I had moved my amd64 and powerpc64 to perl 5.32.1, but have not yet built any ports. I will move my macppc to 5.32.1 sometime after your import it.--George
Re: [PATCH] Convert ddb_sysctl to sysctl_bounded_arr
On Fri, 04 Dec 2020 20:15:13 -0800 Greg Steuck wrote: > George Koehler writes: > > > If I do ddb set a bad value, then sysctl refuses to show the value: > > > > # sysctl ddb.console=1 > > ddb.console: 0 -> 1 > > # sysctl ddb.trigger=1 > > Stopped at ddb_sysctl+0x114: ori r0,r0,0x0 > > ddb{0}> set $radix = 0t2 > > ddb{0}> c > > ddb.trigger: 0 -> 1 > > # sysctl ddb.radix > > sysctl: ddb.radix: Invalid argument > > > > This diff might be better than doing nothing? I'm not sure. --George > > I'm game for changing the range of ddb.radix to [2..INT_MAX] if you > think that's better. I doubt it makes that much of a difference either > way. I prefer to keep [8..16]; your ddb_sysctl diff from Nov 30 is now ok gkoehler@ The only good $radix values are 8, 10, 16. I might want to restrict both "sysctl ddb.radix=" and "ddb> set $radix =" to 8, 10, 16; but I have not written the code to restrict them. Your diff restricts only "sysctl ddb.radix=" to [8..16], which is easy with sysctl_bounded_args and without my needing to write more code. Your diff is better than nothing. I'm sorry that I took so long to give the ok.
Re: [PATCH] Convert gre_sysctl to sysctl_bounded_arr
ok gkoehler@ with one comment change, see below. On Sat, 21 Nov 2020 15:36:58 -0800 Greg Steuck wrote: > diff --git sys/net/if_gre.c sys/net/if_gre.c > index cb5beeaad62..deacad9c755 100644 > --- sys/net/if_gre.c > +++ sys/net/if_gre.c > @@ -1109,7 +1109,7 @@ gre_input_key(struct mbuf **mp, int *offp, int type, > int af, uint8_t otos, > if (n == NULL) > goto decline; > if (n->m_data[off] >> 4 != IPVERSION) > - hlen += sizeof(gre_wccp); > + hlen += 4; /* four-octet GRE header */ > > /* FALLTHROUGH */ > } I want the comment to be, "four-octet Redirect header". This is my best guess after I glanced at section 4.12.1 of https://tools.ietf.org/html/draft-wilson-wrec-wccp-v2-00 The Redirect header would be after the GRE header; the sizeof(*gh) in "hlen = iphlen + sizeof(*gh);" was the GRE header. I agree that using sizeof(gre_wccp) is wrong, because that's the size of the sysctl net.inet.gre.wccp, not the size of any header. I don't have any way to test the wccp code. --George > @@ -4229,31 +4229,22 @@ drop: > return (NULL); > } > > +const struct sysctl_bounded_args gre_vars[] = { > + { GRECTL_ALLOW, &gre_allow, 0, 1 }, > + { GRECTL_WCCP, &gre_wccp, 0, 1 }, > +}; > + > int > gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, > size_t newlen) > { > int error; > > - /* All sysctl names at this level are terminal. */ > - if (namelen != 1) > - return (ENOTDIR); > - > - switch (name[0]) { > - case GRECTL_ALLOW: > - NET_LOCK(); > - error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow); > - NET_UNLOCK(); > - return (error); > - case GRECTL_WCCP: > - NET_LOCK(); > - error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp); > - NET_UNLOCK(); > - return (error); > - default: > - return (ENOPROTOOPT); > - } > - /* NOTREACHED */ > + NET_LOCK(); > + error = sysctl_bounded_arr(gre_vars, nitems(gre_vars), name, > + namelen, oldp, oldlenp, newp, newlen); > + NET_UNLOCK(); > + return error; > } > > static inline int > -- > 2.29.2 > -- George Koehler
Re: [PATCH] Convert ddb_sysctl to sysctl_bounded_arr
On Mon, 30 Nov 2020 20:04:10 -0800 Greg Steuck wrote: > Tested with a bunch of manual sysctl -w's. > > OK? I'm not sure about this diff. I'm more likely to do ddb{0}> set $radix = 0t2 and less likely to do # sysctl ddb.radix=2 but you enforce the range check (radix in 8..16) only in sysctl, not in ddb set. If I do ddb set a bad value, then sysctl refuses to show the value: # sysctl ddb.console=1 ddb.console: 0 -> 1 # sysctl ddb.trigger=1 Stopped at ddb_sysctl+0x114: ori r0,r0,0x0 ddb{0}> set $radix = 0t2 ddb{0}> c ddb.trigger: 0 -> 1 # sysctl ddb.radix sysctl: ddb.radix: Invalid argument This diff might be better than doing nothing? I'm not sure. --George > From 24ae202fd5d39c3c40c029fb878aa15eee33b709 Mon Sep 17 00:00:00 2001 > From: Greg Steuck > Date: Mon, 30 Nov 2020 19:42:19 -0800 > Subject: [PATCH] Convert ddb_sysctl to sysctl_bounded_arr > > --- > sys/ddb/db_usrreq.c | 22 ++ > 1 file changed, 10 insertions(+), 12 deletions(-) > > diff --git sys/ddb/db_usrreq.c sys/ddb/db_usrreq.c > index 546822459ca..259e7b56a8f 100644 > --- sys/ddb/db_usrreq.c > +++ sys/ddb/db_usrreq.c > @@ -36,6 +36,14 @@ > int db_log = 1; > int db_profile; /* Allow dynamic profiling */ > > +const struct sysctl_bounded_args ddb_vars[] = { > + { DBCTL_RADIX, &db_radix, 8, 16 }, > + { DBCTL_MAXWIDTH, &db_max_width, 0, INT_MAX }, > + { DBCTL_TABSTOP, &db_tab_stop_width, 1, 16 }, > + { DBCTL_MAXLINE, &db_max_line, 0, INT_MAX }, > + { DBCTL_LOG, &db_log, 0, 1 }, > +}; > + > int > ddb_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, > size_t newlen, struct proc *p) > @@ -47,15 +55,6 @@ ddb_sysctl(int *name, u_int namelen, void *oldp, size_t > *oldlenp, void *newp, > return (ENOTDIR); > > switch (name[0]) { > - > - case DBCTL_RADIX: > - return sysctl_int(oldp, oldlenp, newp, newlen, &db_radix); > - case DBCTL_MAXWIDTH: > - return sysctl_int(oldp, oldlenp, newp, newlen, &db_max_width); > - case DBCTL_TABSTOP: > - return sysctl_int(oldp, oldlenp, newp, newlen, > &db_tab_stop_width); > - case DBCTL_MAXLINE: > - return sysctl_int(oldp, oldlenp, newp, newlen, &db_max_line); > case DBCTL_PANIC: > if (securelevel > 0) > return (sysctl_int_lower(oldp, oldlenp, newp, newlen, > @@ -86,8 +85,6 @@ ddb_sysctl(int *name, u_int namelen, void *oldp, size_t > *oldlenp, void *newp, > return (0); > } > break; > - case DBCTL_LOG: > - return (sysctl_int(oldp, oldlenp, newp, newlen, &db_log)); > case DBCTL_TRIGGER: > if (newp && db_console) { > struct process *pr = curproc->p_p; > @@ -119,7 +116,8 @@ ddb_sysctl(int *name, u_int namelen, void *oldp, size_t > *oldlenp, void *newp, > break; > #endif /* DDBPROF */ > default: > - return (EOPNOTSUPP); > + return (sysctl_bounded_arr(ddb_vars, nitems(ddb_vars), name, > + namelen, oldp, oldlenp, newp, newlen)); > } > /* NOTREACHED */ > } > -- > 2.29.2 > -- George Koehler
Re: Dell XPS 9310 succesful install but bootloader can't read disk label
On Wed, 2 Dec 2020 01:59:00 +0100 Noth wrote: > Disk: sd0 Usable LBA: 34 to 4000797326 [4000797360 Sectors] > #: type [ start: size ] > > 0: EFI Sys [ 2048: 389120 ] > 1: e3c9e316-0b5c-4db8-817d-f92df00215ae [ 391168: 262144 ] > 2: FAT12 [ 653312: 1071679488 ] > 3: 516e7cba-6ecf-11d6-8ff8-00022d09712b [ 1072332800: 3905536 ] > 4: Linux files* [ 1076238336: 2692558848 ] > 5: OpenBSD [ 3768797184: 195311616 ] > 6: Win Recovery [ 3964108800: 2027520 ] > 7: Win Recovery [ 3966136320: 31772672 ] > 8: Win Recovery [ 3997911040: 2885632 ] OpenBSD offset 3768797184 is about 1797G. I believe that our EFI bootloader works only if OpenBSD partition 'a' is in the first 1024G of the drive. Back in January 2020, I suspected that a daddr32_t would overflow in /sys/arch/amd64/stand/efiboot/efidev.c, see https://marc.info/?l=openbsd-bugs&m=158007879212894&w=2 I have no drives larger than 1024G, so I have no way to reproduce the problem.--George
Re: lld for macppc kernel
On Mon, 23 Nov 2020 22:59:19 -0500 George Koehler wrote: > My macppc can now build, link, > and boot the GENERIC kernel with either ld.bfd or ld.lld. ld.lld with my diff can link macppc kernels GENERIC, GENERIC.MP, and RAMDISK, and I can boot and run them, but all macppc kernels from lld have &etext == 0x1034 which is far too high. $ nm bsd|grep etext 1034 T etext ofwboot with ld.lld doesn't work; I needed to relink ofwboot with ld.bfd to boot any kernel. ofwboot also had &etext == 0x1034, but I don't know what caused ofwboot to fail. I still ask if ld.lld diff for R_PPC_ADDR24 is ok.
lld for macppc kernel
With this diff, lld can link macppc's kernel. The first part of the diff adds R_PPC_ADDR24 to lld. I didn't find code in upstream lld's git for R_PPC_ADDR24. We need R_PPC_ADDR24 for "ba" and "bla" in locore.S. The second part edits the kernel's Makefile.macppc. I add gapdummy (like in Makefile.powerpc64) to avoid an lld error. I also adapt a part of Makefile.i386, so my powerpc64 can build and link the macppc kernel. (I didn't boot that kernel.) My macppc can now build, link, and boot the GENERIC kernel with either ld.bfd or ld.lld. ok to commit? Index: gnu/llvm/lld/ELF/Arch/PPC.cpp === RCS file: /cvs/src/gnu/llvm/lld/ELF/Arch/PPC.cpp,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 PPC.cpp --- gnu/llvm/lld/ELF/Arch/PPC.cpp 3 Aug 2020 14:32:29 - 1.1.1.1 +++ gnu/llvm/lld/ELF/Arch/PPC.cpp 23 Nov 2020 19:49:47 - @@ -220,6 +220,7 @@ RelExpr PPC::getRelExpr(RelType type, co case R_PPC_ADDR16_HA: case R_PPC_ADDR16_HI: case R_PPC_ADDR16_LO: + case R_PPC_ADDR24: case R_PPC_ADDR32: return R_ABS; case R_PPC_DTPREL16: @@ -344,6 +345,7 @@ void PPC::relocateOne(uint8_t *loc, RelT break; } case R_PPC_REL24: + case R_PPC_ADDR24: case R_PPC_LOCAL24PC: case R_PPC_PLTREL24: { uint32_t mask = 0x03FC; Index: sys/arch/macppc/conf/Makefile.macppc === RCS file: /cvs/src/sys/arch/macppc/conf/Makefile.macppc,v retrieving revision 1.99 diff -u -p -r1.99 Makefile.macppc --- sys/arch/macppc/conf/Makefile.macppc7 Nov 2019 20:42:28 - 1.99 +++ sys/arch/macppc/conf/Makefile.macppc23 Nov 2020 19:49:49 - @@ -53,6 +53,13 @@ CFLAGS= ${DEBUG} ${CWARNFLAGS} ${CMACHF AFLAGS=-D_LOCORE ${CMACHFLAGS} LINKFLAGS= -N -Ttext 100114 -e start --warn-common -nopie +.if ${MACHINE} == "powerpc64" +CFLAGS+= -m32 +AFLAGS+= -m32 +LDFLAGS= -melf32ppc +LINKFLAGS+=${LDFLAGS} +.endif + HOSTCC?= ${CC} HOSTED_CPPFLAGS=${CPPFLAGS:S/^-nostdinc$//} HOSTED_CFLAGS= ${CFLAGS} @@ -123,12 +130,16 @@ ioconf.o: ioconf.c ld.script: ${_machdir}/conf/ld.script cp ${_machdir}/conf/ld.script $@ +gapdummy.o: + echo '__asm(".section .rodata,\"a\"");' > gapdummy.c + ${CC} -c ${CFLAGS} ${CPPFLAGS} gapdummy.c -o $@ + makegap.sh: cp $S/conf/makegap.sh $@ -MAKE_GAP = LD="${LD}" sh makegap.sh 0x # guaranteed illegal +MAKE_GAP = LD="${LD}" sh makegap.sh 0x gapdummy.o -gap.o: Makefile makegap.sh vers.o +gap.o: Makefile makegap.sh gapdummy.o vers.o ${MAKE_GAP} vers.o: ${SYSTEM_DEP:Ngap.o} @@ -137,7 +148,7 @@ vers.o: ${SYSTEM_DEP:Ngap.o} clean: rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* \ - gap.link ld.script lorder makegap.sh param.c + gap.link gapdummy.c ld.script lorder makegap.sh param.c cleandir: clean rm -f Makefile *.h ioconf.c options machine ${_mach} vers.c
Re: powerpc lld fix
On Sun, 15 Nov 2020 17:23:09 +0100 (CET) Mark Kettenis wrote: > This is the diff from: > > https://reviews.llvm.org/D75419 > > which is needed to be able to link llvm with lld on powerpc. > > Not enough to link code generated by gcc 4.x though, but if I dissable > building gcc a make build completes. > > What would the impact on ports of disabling base-gcc be on powerpc? > > ok? ok gkoehler@ This diff is in upstream llvm, so let's take it as is. Large PIC uses pointers at -32768(%r30), -32764(%r30), ..., where r30 points to .LTOC in the .got2 section. Each file can have a different .LTOC, so the addend like -32768 fits in a signed 16-bit integer. .got2 can hold pointers to symbols in other shared objects, but powerpc clang also uses .got2 for pointers to local symbols in the same file! For example, "int f(void){static int i; return i++;}" would put the address of "static int i" in .got2. In upstream's test case, lld links 2 copies of .text.foo, discards 1 copy, and gets stuck with an extra .got2 pointer to a discarded local symbol. After the diff, lld leaves an extra .got2 pointer with R_PPC_NONE: $ objdump -rsj .got2 exam2.o ... RELOCATION RECORDS FOR [.got2]: OFFSET TYPE VALUE R_PPC_ADDR32 .text.foo+0x0004 0004 R_PPC_NONE*ABS*+0x0004 lld can't link a macppc kernel, because it can't handle branch absolute "ba" instructions with R_PPC_ADDR24 = 2: | ld: error: locore.S:(.text+0x208): unknown relocation (2) against symbol s_trap After I link ld.lld to /usr/bin/ld, the build of base-gcc fails with | ld: error: unable to find library -lc because we haven't taught base-gcc to pass -L/usr/lib to ld, as we did on amd64. I don't use base-gcc on powerpc, and won't miss it if we disable it. I suspect that ports-gcc will need a patch to do -L/usr/lib.--George > Index: gnu/llvm/lld/ELF/InputSection.cpp > === > RCS file: /cvs/src/gnu/llvm/lld/ELF/InputSection.cpp,v > retrieving revision 1.1.1.2 > diff -u -p -r1.1.1.2 InputSection.cpp > --- gnu/llvm/lld/ELF/InputSection.cpp 9 Aug 2020 15:52:06 - 1.1.1.2 > +++ gnu/llvm/lld/ELF/InputSection.cpp 15 Nov 2020 16:15:46 - > @@ -438,11 +438,12 @@ void InputSection::copyRelocations(uint8 >// hopefully creates a frame that is ignored at runtime. Also, don't > warn >// on .gcc_except_table and debug sections. >// > - // See the comment in maybeReportUndefined for PPC64 .toc . > + // See the comment in maybeReportUndefined for PPC32 .got2 and PPC64 > .toc >auto *d = dyn_cast(&sym); >if (!d) { > if (!isDebugSection(*sec) && sec->name != ".eh_frame" && > -sec->name != ".gcc_except_table" && sec->name != ".toc") { > +sec->name != ".gcc_except_table" && sec->name != ".got2" && > +sec->name != ".toc") { >uint32_t secIdx = cast(sym).discardedSecIdx; >Elf_Shdr_Impl sec = >CHECK(file->getObj().sections(), file)[secIdx]; > Index: gnu/llvm/lld/ELF/Relocations.cpp > === > RCS file: /cvs/src/gnu/llvm/lld/ELF/Relocations.cpp,v > retrieving revision 1.1.1.1 > diff -u -p -r1.1.1.1 Relocations.cpp > --- gnu/llvm/lld/ELF/Relocations.cpp 3 Aug 2020 14:32:29 - 1.1.1.1 > +++ gnu/llvm/lld/ELF/Relocations.cpp 15 Nov 2020 16:15:46 - > @@ -926,8 +926,12 @@ static bool maybeReportUndefined(Symbol >// .toc and the .rela.toc are incorrectly not placed in the comdat. The ELF >// spec says references from outside the group to a STB_LOCAL symbol are > not >// allowed. Work around the bug. > - if (config->emachine == EM_PPC64 && > - cast(sym).discardedSecIdx != 0 && sec.name == ".toc") > + // > + // PPC32 .got2 is similar but cannot be fixed. Multiple .got2 is infeasible > + // because .LC0-.LTOC is not representable if the two labels are in > different > + // .got2 > + if (cast(sym).discardedSecIdx != 0 && > + (sec.name == ".got2" || sec.name == ".toc")) > return false; > >bool isWarning = > -- George Koehler
Re: macppc clang: fix va_arg in Objective-C
On Wed, 4 Nov 2020 00:21:15 -0500 George Koehler wrote: > Hello tech list, > > clang for 32-bit powerpc has a bug that breaks va_arg(3) when the > argument type is an object or block in Objective-C. This breaks > GNUstep on macppc. This clang diff fixes GNUstep. Objective-C uses > pointers to represent objects and blocks, so this diff tells clang's > va_arg to handle things with pointer representations as pointers. > > Anthony Richardby found and reported the bug, > https://bugs.llvm.org/show_bug.cgi?id=47921 I want this diff (also at https://reviews.llvm.org/D90329). The first change (isInt) fixes GNUstep, in a simpler way than my diff from 2 weeks ago. The second change (isIndirect) fixes va_arg(3) with some C++ types. The diff only affects 32-bit powerpc. ok to commit? Index: clang/lib/CodeGen/TargetInfo.cpp === RCS file: /cvs/src/gnu/llvm/clang/lib/CodeGen/TargetInfo.cpp,v retrieving revision 1.1.1.2 diff -u -p -r1.1.1.2 TargetInfo.cpp --- clang/lib/CodeGen/TargetInfo.cpp9 Aug 2020 15:51:11 - 1.1.1.2 +++ clang/lib/CodeGen/TargetInfo.cpp10 Nov 2020 11:41:53 - @@ -4248,13 +4248,12 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(Co // }; bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64; - bool isInt = - Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType(); + bool isInt = !Ty->isFloatingType(); bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64; // All aggregates are passed indirectly? That doesn't seem consistent // with the argument-lowering code. - bool isIndirect = Ty->isAggregateType(); + bool isIndirect = isAggregateTypeForABI(Ty); CGBuilderTy &Builder = CGF.Builder;
Re: Fix ilogb(3)
Your ilogb fix is ok gkoehler@ On Sat, 31 Oct 2020 16:09:07 +0100 (CET) Mark Kettenis wrote: > - Dropping the amd64 and i386 versions. Fixing the corner cases in > assembly is hard, and the C implementation should be fast enough for > regular floating-point values. The amd64 and i386 assembly uses the x87 fxtract instruction. I feel that x87 instructions are obsolete on amd64. One might want to use fxtract on i386, but software might call frexp(3) to get the exponent, and we have frexp(3) in C. I am ok with ilogb(3) in C.
Re: macppc clang: fix va_arg in Objective-C
On Wed, 4 Nov 2020 00:21:15 -0500 George Koehler wrote: > I posted this diff at https://reviews.llvm.org/D90329 ... > > ok to commit? --George No, I withdraw my diff. In about a week, I might have a better diff that also fixes va_arg with some C++ types. This only affects 32-bit powerpc; nothing will change on powerpc64 or elsewhere. --George > Index: clang/lib/CodeGen/TargetInfo.cpp > === > RCS file: /cvs/src/gnu/llvm/clang/lib/CodeGen/TargetInfo.cpp,v > retrieving revision 1.1.1.2 > diff -u -p -r1.1.1.2 TargetInfo.cpp > --- clang/lib/CodeGen/TargetInfo.cpp 9 Aug 2020 15:51:11 - 1.1.1.2 > +++ clang/lib/CodeGen/TargetInfo.cpp 28 Oct 2020 23:43:54 - > @@ -4248,8 +4248,8 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(Co >// }; > >bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64; > - bool isInt = > - Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType(); > + bool isInt = Ty->isIntegerType() || Ty->hasPointerRepresentation() || > + Ty->isAggregateType(); >bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64; > >// All aggregates are passed indirectly? That doesn't seem consistent -- George Koehler
macppc clang: fix va_arg in Objective-C
Hello tech list, clang for 32-bit powerpc has a bug that breaks va_arg(3) when the argument type is an object or block in Objective-C. This breaks GNUstep on macppc. This clang diff fixes GNUstep. Objective-C uses pointers to represent objects and blocks, so this diff tells clang's va_arg to handle things with pointer representations as pointers. Anthony Richardby found and reported the bug, https://bugs.llvm.org/show_bug.cgi?id=47921 I posted this diff at https://reviews.llvm.org/D90329 but upstream llvm might do something else. I used a clang with this diff to rebuild src and xenocara on macppc. ok to commit? --George Index: clang/lib/CodeGen/TargetInfo.cpp === RCS file: /cvs/src/gnu/llvm/clang/lib/CodeGen/TargetInfo.cpp,v retrieving revision 1.1.1.2 diff -u -p -r1.1.1.2 TargetInfo.cpp --- clang/lib/CodeGen/TargetInfo.cpp9 Aug 2020 15:51:11 - 1.1.1.2 +++ clang/lib/CodeGen/TargetInfo.cpp28 Oct 2020 23:43:54 - @@ -4248,8 +4248,8 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(Co // }; bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64; - bool isInt = - Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType(); + bool isInt = Ty->isIntegerType() || Ty->hasPointerRepresentation() || + Ty->isAggregateType(); bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64; // All aggregates are passed indirectly? That doesn't seem consistent
Re: [macppc] clang-10: issue with ppc dag to dag pattern instruction selection
Moving from bugs to tech. cwen reported that base-clang crashed on macppc in graphics/babl and emulators/mednafen [1]. I observed that clang crashed on powerpc64 in mednafen. I now propose to backport a commit in llvm 11.x git [2] to prevent these crashes. This change affects other arches. [1] https://marc.info/?l=openbsd-bugs&m=159775378014683&w=2 [2] https://github.com/llvm/llvm-project/commit/d4ce862 FreeBSD on powerpc64 found the problems earlier: https://bugs.llvm.org/show_bug.cgi?id=45237 (babl) https://bugs.llvm.org/show_bug.cgi?id=45274 (mednafen) The problem is with clang -fno-unsafe-math-optimizations, one of the new floating-point options in clang-10. The diff disables the new options on all targets except X86 (and SystemZ), so it also affects arches other than PowerPC. The disabled options cause a warning. With the diff in clang: - mednafen stops using -fno-unsafe-math-optimizations, because the new warning fails the configure test. I can build and package mednafen on macppc (where a single *m68k*.cpp takes over 2 hours), but when I try to play Super Mario World (snes), mednafen crashes with a SIGSEGV. The build on powerpc64 stops crashing clang, but fails at an altivec error. - babl uses -fno-unsafe-math-optimizations and ignores the new warning. I can build and package babl, and all 27 tests pass on both macppc and powerpc64. - I have built src and xenocara on powerpc64. The diff is in llvm 11.x. To backport the diff, I manually applied some parts (where the context had changed), and I renamed RoundingMode::NearestTiesToEven back to FPR_RoundToNearest. Apply the diff in /usr/src/gnu/llvm. If DiagnosticFrontendKinds.inc exists under /usr/obj/gnu/usr.bin/clang/, you must delete the .inc, because there is a missing dependency in our Makefiles. Is this OK? Should we do something else? --George Index: clang/docs/ClangCommandLineReference.rst === RCS file: /cvs/src/gnu/llvm/clang/docs/ClangCommandLineReference.rst,v retrieving revision 1.1.1.2 diff -u -p -r1.1.1.2 ClangCommandLineReference.rst --- clang/docs/ClangCommandLineReference.rst9 Aug 2020 15:51:07 - 1.1.1.2 +++ clang/docs/ClangCommandLineReference.rst30 Aug 2020 19:37:51 - @@ -818,6 +818,10 @@ Enables the experimental global instruct Enables an experimental new pass manager in LLVM. +.. option:: -fexperimental-strict-floating-point + +Enables the use of non-default rounding modes and non-default exception handling on targets that are not currently ready. + .. option:: -ffine-grained-bitfield-accesses, -fno-fine-grained-bitfield-accesses Use separate accesses for consecutive bitfield runs with legal widths and alignments. Index: clang/include/clang/Basic/CodeGenOptions.def === RCS file: /cvs/src/gnu/llvm/clang/include/clang/Basic/CodeGenOptions.def,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 CodeGenOptions.def --- clang/include/clang/Basic/CodeGenOptions.def3 Aug 2020 14:31:32 - 1.1.1.1 +++ clang/include/clang/Basic/CodeGenOptions.def30 Aug 2020 19:37:51 - @@ -58,6 +58,8 @@ CODEGENOPT(DisableLLVMPasses , 1, 0) /// ///< frontend. CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers CODEGENOPT(DisableO0ImplyOptNone , 1, 0) ///< Don't annonate function with optnone at O0 +CODEGENOPT(ExperimentalStrictFloatingPoint, 1, 0) ///< Enables the new, experimental + ///< strict floating point. CODEGENOPT(ExperimentalNewPassManager, 1, 0) ///< Enables the new, experimental ///< pass manager. CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new Index: clang/include/clang/Basic/DiagnosticFrontendKinds.td === RCS file: /cvs/src/gnu/llvm/clang/include/clang/Basic/DiagnosticFrontendKinds.td,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 DiagnosticFrontendKinds.td --- clang/include/clang/Basic/DiagnosticFrontendKinds.td3 Aug 2020 14:31:32 - 1.1.1.1 +++ clang/include/clang/Basic/DiagnosticFrontendKinds.td30 Aug 2020 19:37:51 - @@ -37,6 +37,12 @@ def note_fe_backend_plugin: Note<"%0">, def warn_fe_override_module : Warning< "overriding the module target triple with %0">, InGroup>; +def warn_fe_backend_unsupported_fp_rounding : Warning< +"overriding currently unsupported rounding mode on this target">, +InGroup; +def warn_fe_backend_unsupported_fp_exceptions : Warning< +"overriding currently unsupported use of floating point exceptions " +"on this target">, InGroup; def remark_fe_backend_optimization_remark : Remark<"%0">, BackendInfo, InGroup; Index: clang/include/clang/Basic/DiagnosticGroups.td
Re: [PATCH 1/2] Add DMA remapping support for Intel VT-d and AMD Vi
On Tue, 11 Aug 2020 15:11:13 -0500 Your_Name wrote: > From 6990aec14f90638e1c433a71a30e4720da45ec86 Mon Sep 17 00:00:00 2001 > From: Jordan > Date: Tue, 11 Aug 2020 14:51:17 -0500 > Subject: [PATCH 1/2] Add DMA remapping support for Intel VT-d and AMD Vi > > I have been working on adding support for PCI Passthrough to VMM. This > initial patch adds support for the IOMMU for Intel and AMD processors. > It hooks the initial PCI probe and replaces the bus_dma_xxx functions > for remapping page I/O. I have tested this on my Intel and Ryzen > boxes but would like some more eyes and testing on this. My AMD Ryzen 5 3400G has so much trouble with your pci passthrough diffs that I need to reboot to a snap kernel without your diffs. One of the symptoms is an acpidmar "IOMMU Error" that appears at boot when I'm starting the network: starting network === IOMMU Error[]: io page fault ... re0: 192.168.0.104 lease... This time, I had unplugged my axe0, so my only network interfaces are the motherboard's re0 and the usual lo0, enc0, pflog0. I'm running dhclient on re0. Even with the error, dhclient succeeds and the re0 interface works normally. The error doesn't always appear: it seems more likely to appear if I power on (not reboot) after the computer has been powered off for more than a short amount of time. I also get some "ivhd command timeout"s while the system is running. These timeouts can appear on ttyC0 after the network starts and before xenodm starts, or in xconsole after xenodm starts. There are problems with my boot drive (sd1 at scsibus1 at ahci0). I tried to cvs up my /usr/src, got a warning that some CVS/Root wasn't an absolute path. Moments later, the kernel froze (maybe trying to enter ddb) and I forced a reboot into a snap kernel without acpidmar. Then cvs up worked and the CVS/Root warning didn't appear. All the previous symptoms happen without "vmctl start"ing any guests. vmd is running with only a disabled template vm in vm.conf. This dmesg includes the IOMMU Error and some timeouts. amdgpu is disabled. OpenBSD 6.7-current (GENERIC.MP) #1: Tue Aug 25 14:57:27 EDT 2020 r...@pehcehwahn.my.domain:/sys/arch/amd64/compile/GENERIC.MP real mem = 14961672192 (14268MB) avail mem = 14491107328 (13819MB) random: good seed from bootblocks mpath0 at root scsibus0 at mpath0: 256 targets mainbus0 at root bios0 at mainbus0: SMBIOS rev. 2.8 @ 0xe6cf0 (59 entries) bios0: vendor American Megatrends Inc. version "M.30" date 09/18/2019 bios0: Micro-Star International Co., Ltd MS-7B86 acpi0 at bios0: ACPI 6.0 acpi0: sleep states S0 S3 S4 S5 acpi0: tables DSDT FACP APIC FPDT FIDT SSDT SSDT SSDT MCFG HPET UEFI IVRS CRAT CDIT BGRT SSDT SSDT SSDT SSDT WSMT acpi0: wakeup devices GPP0(S4) GPP2(S4) GPP3(S4) GPP4(S4) GPP5(S4) GPP6(S4) GP17(S4) XHC0(S4) XHC1(S4) GP18(S4) GPP1(S4) PT21(S3) PTXH(S4) acpitimer0 at acpi0: 3579545 Hz, 32 bits acpimadt0 at acpi0 addr 0xfee0: PC-AT compat cpu0 at mainbus0: apid 0 (boot processor) cpu0: AMD Ryzen 5 3400G with Radeon Vega Graphics, 3700.62 MHz, 17-18-01 cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES cpu0: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache cpu0: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu0: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu0: smt 0, core 0, package 0 mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges cpu0: apic clock running at 25MHz cpu0: mwait min=64, max=64, C-substates=1.1, IBE cpu1 at mainbus0: apid 2 (application processor) cpu1: AMD Ryzen 5 3400G with Radeon Vega Graphics, 3699.95 MHz, 17-18-01 cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES cpu1: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache cpu1: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu1: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu1: smt 0, core 1, package 0 cpu2 at mainbus0: apid 4 (application processor) cpu2: AMD Ryzen 5 3400G with Radeon Vega Graphics, 3699.95 MHz, 17-18-01 cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA
Re: Clang floating-point alignment on powerpc
On Tue, 11 Aug 2020 03:52:10 -0400 Brad Smith wrote: > It looks like after this went in which is included with 10 that this local > diff can be removed.. > > https://reviews.llvm.org/D71954 Looks correct to me: the new Subtarget.allowsUnalignedFPAccess() check obsoletes the local diff that you are removing. I want to build your diff on macppc, but that will take a few days. After this removal, our other local diffs for PowerPC llvm will be: - -msvr4-struct-return: this is already in upstream git. - max atomic size in PPCISelLowering.cpp: this isn't upstream; I should send it upstream but haven't done so. - support for OpenBSD/powerpc64 > Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp > === > RCS file: /home/cvs/src/gnu/llvm/llvm/lib/Target/PowerPC/PPCISelLowering.cpp,v > retrieving revision 1.1.1.2 > diff -u -p -u -p -r1.1.1.2 PPCISelLowering.cpp > --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp 9 Aug 2020 15:50:15 > - 1.1.1.2 > +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp 11 Aug 2020 07:39:07 > - > @@ -15299,14 +15299,6 @@ bool PPCTargetLowering::allowsMisaligned >if (VT == MVT::ppcf128) > return false; > > - if (Subtarget.isTargetOpenBSD()) { > -// Traditional PowerPC does not support unaligned memory access > -// for floating-point and the OpenBSD kernel does not emulate > -// all possible floating-point load and store instructions. > -if (VT == MVT::f32 || VT == MVT::f64) > - return false; > - } > - >if (Fast) > *Fast = true; > > Index: llvm/lib/Target/PowerPC/PPCSubtarget.h > === > RCS file: /home/cvs/src/gnu/llvm/llvm/lib/Target/PowerPC/PPCSubtarget.h,v > retrieving revision 1.1.1.1 > diff -u -p -u -p -r1.1.1.1 PPCSubtarget.h > --- llvm/lib/Target/PowerPC/PPCSubtarget.h3 Aug 2020 14:30:25 - > 1.1.1.1 > +++ llvm/lib/Target/PowerPC/PPCSubtarget.h11 Aug 2020 07:38:26 - > @@ -320,7 +320,6 @@ public: >bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } >bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } >bool isTargetLinux() const { return TargetTriple.isOSLinux(); } > - bool isTargetOpenBSD() const { return TargetTriple.isOSOpenBSD(); } > >bool isDarwinABI() const { return isTargetMachO() || isDarwin(); } >bool isAIXABI() const { return TargetTriple.isOSAIX(); } -- George Koehler
Re: timekeep: tk_generation problem
On Tue, 14 Jul 2020 11:59:14 +0200 (CEST) Mark Kettenis wrote: > Yeah, one possible approach would be to increment ogen by two. A > little bit easier to check that they can never be the same since one > is always odd and the other is always even. > > Another possible approach would be to export both timehands. This > could help avoiding some of the looping int the bin*time() functions > in libc. Not sure that's worth it. And we should probably go for the > "quick" fix initially regardless. I suspect that the bin*time() functions almost never loop back. Here's a quick fix: read ogen from the other struct timehands, so there is only one sequence of generation values, giving odds to th0 and evens to th1 (until the generation overflows and skips zero, then it would give evens to th0 and odds to th1). I like this better than my first diff. OK? Index: kern/kern_tc.c === RCS file: /cvs/src/sys/kern/kern_tc.c,v retrieving revision 1.62 diff -u -p -r1.62 kern_tc.c --- kern/kern_tc.c 6 Jul 2020 13:33:09 - 1.62 +++ kern/kern_tc.c 14 Jul 2020 21:58:57 - @@ -583,8 +583,8 @@ tc_windup(struct bintime *new_boottime, * the contents, the generation must be zero. */ tho = timehands; + ogen = tho->th_generation; th = tho->th_next; - ogen = th->th_generation; th->th_generation = 0; membar_producer(); memcpy(th, tho, offsetof(struct timehands, th_generation));
Re: timekeep: tk_generation problem
On Mon, 13 Jul 2020 01:18:14 -0500 Scott Cheloha wrote: > To review, the update protocol is: > > 1. Set tk_generation to zero; the update has begun. > > 2. Memory barrier. The side effects of step (1) are "visible" to >other CPUs before those of step (3). > > 3. Update the other tk_* members from the active timehands. > > 4. Memory barrier. The side effects of step (3) are "visible" before >those of step (5). > > 5. Set tk_generation to th_generation; the update is over. The >timekeep page is consistent once more. > > -- > > As far as I can tell, tk_generation always changes after an update, > during step (5). There are 2 values, th0.th_generation and th1.th_generation. When the kernel updates tk_generation, it switches between these 2 values, but the values might be equal. >From /sys/kern/kern_tc.c tc_windup(): tho = timehands; th = tho->th_next; ogen = th->th_generation; th->th_generation = 0; membar_producer(); ... /* update th */ if (++ogen == 0) ogen = 1; membar_producer(); th->th_generation = ogen; timehands is a circular list, where th0->th_next == th1 and th1->th_next == th0. Now suppose that we call tc_windup() with, timehands == th0 th0.th_generation == 2177 th1.th_generation == 2176 We assign tho = th0, th = th1, and ogen = 2176. After we update the clock, we increment ogen and set th1.th_generation = 2177. When we update the user timekeep page, we change tk_generation from 2177, the old th0.th_generation, to 2177, the new th1.th_generation; but libc sees the same value 2177 and can't detect the update. We update the clock 100 times per second (in a 100 Hz kernel), but were incrementing tk_generation only 50 times per second. I run the diff from my first mail (copied below). It moves apart the initial values of th_generation, so they don't become equal: th0.th_generation = UINT_MAX / 2/* changed from 1 */ th1.th_generation = 0 /* not changed */ It might be better to change how tc_windup() sets ogen.--George Index: kern/kern_tc.c === RCS file: /cvs/src/sys/kern/kern_tc.c,v retrieving revision 1.62 diff -u -p -r1.62 kern_tc.c --- kern/kern_tc.c 6 Jul 2020 13:33:09 - 1.62 +++ kern/kern_tc.c 13 Jul 2020 02:59:58 - @@ -98,7 +98,8 @@ static struct timehands th0 = { .th_counter = &dummy_timecounter, .th_scale = UINT64_MAX / 100, .th_offset = { .sec = 1, .frac = 0 }, - .th_generation = 1, + /* Keep apart generations of th0, th1, for user timekeep. */ + .th_generation = UINT_MAX / 2, .th_next = &th1 };
Re: timekeep: tk_generation problem
On Mon, 13 Jul 2020 10:24:49 +0300 Paul Irofti wrote: > I am assuming you tested on amd64. Mind sharing the dmesg? Could this be due > to the lack of RDTSC serialization in userland? I got the problem on my 4-core amd64 and on a 1-cpu macppc. Here's my amd64 dmesg. In my next mail, I will describe the problem in more detail. OpenBSD 6.7-current (GENERIC.MP) #111: Sun Jul 12 22:50:35 EDT 2020 r...@pehcehwahn.my.domain:/sys/arch/amd64/compile/GENERIC.MP real mem = 14961672192 (14268MB) avail mem = 14493188096 (13821MB) random: boothowto does not indicate good seed mpath0 at root scsibus0 at mpath0: 256 targets mainbus0 at root bios0 at mainbus0: SMBIOS rev. 2.8 @ 0xe6cf0 (59 entries) bios0: vendor American Megatrends Inc. version "M.30" date 09/18/2019 bios0: Micro-Star International Co., Ltd MS-7B86 acpi0 at bios0: ACPI 6.0 acpi0: sleep states S0 S3 S4 S5 acpi0: tables DSDT FACP APIC FPDT FIDT SSDT SSDT SSDT MCFG HPET UEFI IVRS CRAT CDIT BGRT SSDT SSDT SSDT SSDT WSMT acpi0: wakeup devices GPP0(S4) GPP2(S4) GPP3(S4) GPP4(S4) GPP5(S4) GPP6(S4) GP17(S4) XHC0(S4) XHC1(S4) GP18(S4) GPP1(S4) PT21(S3) PTXH(S4) acpitimer0 at acpi0: 3579545 Hz, 32 bits acpimadt0 at acpi0 addr 0xfee0: PC-AT compat cpu0 at mainbus0: apid 0 (boot processor) cpu0: AMD Ryzen 5 3400G with Radeon Vega Graphics, 3700.62 MHz, 17-18-01 cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES cpu0: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache cpu0: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu0: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu0: smt 0, core 0, package 0 mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges cpu0: apic clock running at 24MHz cpu0: mwait min=64, max=64, C-substates=1.1, IBE cpu1 at mainbus0: apid 2 (application processor) cpu1: AMD Ryzen 5 3400G with Radeon Vega Graphics, 3699.94 MHz, 17-18-01 cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES cpu1: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache cpu1: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu1: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu1: smt 0, core 1, package 0 cpu2 at mainbus0: apid 4 (application processor) cpu2: AMD Ryzen 5 3400G with Radeon Vega Graphics, 3699.94 MHz, 17-18-01 cpu2: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES cpu2: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache cpu2: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu2: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu2: smt 0, core 2, package 0 cpu3 at mainbus0: apid 6 (application processor) cpu3: AMD Ryzen 5 3400G with Radeon Vega Graphics, 3699.95 MHz, 17-18-01 cpu3: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,RDSEED,ADX,SMAP,CLFLUSHOPT,SHA,IBPB,XSAVEOPT,XSAVEC,XGETBV1,XSAVES cpu3: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache cpu3: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu3: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative cpu3: smt 0, core 3, package 0 cpu4 at mainbus0: apid 1 (application processor) cpu4: AMD Ryzen 5 3400G with Radeon Vega Graphics, 3699.95 MHz, 17-18-01 cpu4: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,M
timekeep: tk_generation problem
Hello tech list, My CLOCK_MONOTONIC can jump backwards. It looks like a problem with tk_generation in the user timekeep page. If tk_offset_count and tk_offset change but tk_generation doesn't change, then libc can mix old and new values and calculate a bogus time. This diff tries to fix it. The kernel has 2 sets of timehands, th0 and th1, but libc has only 1 timekeep page. If the kernel switches between th0 and th1 while they have the same generation, then libc can't see the switch. Is diff OK, or should we do something else? The attached monocheck.c can detect the problem. It loops until CLOCK_MONOTONIC decreases.--George Index: kern/kern_tc.c === RCS file: /cvs/src/sys/kern/kern_tc.c,v retrieving revision 1.62 diff -u -p -r1.62 kern_tc.c --- kern/kern_tc.c 6 Jul 2020 13:33:09 - 1.62 +++ kern/kern_tc.c 13 Jul 2020 02:59:58 - @@ -98,7 +98,8 @@ static struct timehands th0 = { .th_counter = &dummy_timecounter, .th_scale = UINT64_MAX / 100, .th_offset = { .sec = 1, .frac = 0 }, - .th_generation = 1, + /* Keep apart generations of th0, th1, for user timekeep. */ + .th_generation = UINT_MAX / 2, .th_next = &th1 }; #include #include #include int main(void) { struct timespec diff, now, then; if (clock_gettime(CLOCK_MONOTONIC, &then) == -1) err(1, "clock_gettime"); for (;;) { if (clock_gettime(CLOCK_MONOTONIC, &now) == -1) err(1, "clock_gettime"); if (timespeccmp(&now, &then, <)) { timespecsub(&then, &now, &diff); errx(1, "then %lld.%09ld - now %lld.%09ld " "= diff %lld.%09ld", then.tv_sec, then.tv_nsec, now.tv_sec, now.tv_nsec, diff.tv_sec, diff.tv_nsec); } then = now; } }
Re: userland clock_gettime proof of concept
On Wed, 8 Jul 2020 14:26:02 +0200 (CEST) Mark Kettenis wrote: > > From: Paul Irofti > > Reads OK to me. Please make the adjustments to static functions that > > kettenis@ mentioned in the alpha thread. > > To add to that: > > * TC_LAST isn't needed, so kill that > * tc_get_timecount > > Also in the sparc64 I did an exact copy of the kernel implementation > of the functions to read the counter. I only made them static inline. > That makes it easier to verify that they are indeed identical. Here is the diff for macppc after I drop TC_LAST, recopy usertc.c from amd64 (so tc_get_timecount is now static), and copy ppc_mftbl from /sys/arch/powerpc/include/cpu.h OK to commit? Index: lib/libc/arch/powerpc/gen/usertc.c === RCS file: /cvs/src/lib/libc/arch/powerpc/gen/usertc.c,v retrieving revision 1.1 diff -u -p -r1.1 usertc.c --- lib/libc/arch/powerpc/gen/usertc.c 6 Jul 2020 13:33:05 - 1.1 +++ lib/libc/arch/powerpc/gen/usertc.c 9 Jul 2020 21:41:47 - @@ -1,4 +1,4 @@ -/* $OpenBSD: usertc.c,v 1.1 2020/07/06 13:33:05 pirofti Exp $ */ +/* $OpenBSD: usertc.c,v 1.2 2020/07/08 09:17:48 kettenis Exp $ */ /* * Copyright (c) 2020 Paul Irofti * @@ -18,4 +18,24 @@ #include #include -int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL; +static __inline u_int32_t +ppc_mftbl (void) +{ + int ret; + __asm volatile ("mftb %0" : "=r" (ret)); + return ret; +} + +static int +tc_get_timecount(struct timekeep *tk, u_int *tc) +{ + switch (tk->tk_user) { + case TC_TB: + *tc = ppc_mftbl(); + return 0; + } + + return -1; +} + +int (*const _tc_get_timecount)(struct timekeep *, u_int *) = tc_get_timecount; Index: sys/arch/macppc/include/timetc.h === RCS file: /cvs/src/sys/arch/macppc/include/timetc.h,v retrieving revision 1.1 diff -u -p -r1.1 timetc.h --- sys/arch/macppc/include/timetc.h6 Jul 2020 13:33:07 - 1.1 +++ sys/arch/macppc/include/timetc.h9 Jul 2020 21:41:48 - @@ -18,6 +18,6 @@ #ifndef _MACHINE_TIMETC_H_ #define _MACHINE_TIMETC_H_ -#defineTC_LAST 0 +#defineTC_TB 1 #endif /* _MACHINE_TIMETC_H_ */ Index: sys/arch/macppc/macppc/clock.c === RCS file: /cvs/src/sys/arch/macppc/macppc/clock.c,v retrieving revision 1.44 diff -u -p -r1.44 clock.c --- sys/arch/macppc/macppc/clock.c 6 Jul 2020 13:33:08 - 1.44 +++ sys/arch/macppc/macppc/clock.c 9 Jul 2020 21:41:48 - @@ -57,7 +57,7 @@ u_int32_t ns_per_tick = 320; static int32_t ticks_per_intr; static struct timecounter tb_timecounter = { - tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, 0 + tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, TC_TB }; /* calibrate the timecounter frequency for the listed models */
Re: userland clock_gettime proof of concept
On Wed, 8 Jul 2020 11:32:09 -0500 Scott Cheloha wrote: > On Wed, Jul 08, 2020 at 09:36:03AM -0600, Theo de Raadt wrote: > > Ugly test program (not showing it) seems to show syncronized clocks on > > powerpc, or at least closely sycronized. It might be one register. > > I guess I'll share mine, then. > > With this program I can consistently catch an offset of -2 to -1 on my > G5. Such a small offset is probably fine, right? I'm curious if it > gets any larger on other machines. /sys/arch/macppc/macppc/cpu.c has code to "Sync timebase" of the cpus. One cpu does h->tb = ppc_mftb() + 10; other does ppc_mttb(h->tb). Your -2 to -1 implies that time went backwards! I don't like this, but time also goes backwards in OpenBSD/amd64 and in other systems. (Microsoft Docs, "Acquiring high-resolution time stamps", says that stamps "from different threads", within a small range of uncertainty, have "ambiguous ordering".)
Re: powerpc64: 64-bit-ize memmove.S
On Sat, 27 Jun 2020 01:27:14 +0200 Christian Weisgerber wrote: > That function simply copies as many (double)words plus a tail of > bytes as the length argument specifies. Neither source nor destination > are checked for alignment, so this will happily run a loop of > unaligned accesses, which doesn't sound very optimal. I made a benchmark and concluded that unaligned word copies are slower than aligned word copies, but faster than byte copies. In most cases, memmove.S is faster than memmove.c, but if aligned word copies between unaligned buffers are possible, then memmove.c is faster. The benchmark was on a 32-bit macppc G3 with cpu0 at mainbus0: 750 (Revision 0x202): 400 MHz: 512KB backside cache The benchmark has 4 implementations of memmove, stbu => byte copy with lbzu,stbu loop stbx => byte copy with lbzx,stbx,addi loop C => aligned word copy or byte copy (libc/string/memmove.c) asm => unaligned word copy (libc/arch/powerpc/string/memmove.S) It shows time measured by mftb (move from timebase). 1st bench: move 1 bytes up by 4 bytes, then down by 4 bytes, in aligned buffer (offset 0). asm wins: $ ./bench 1 4 0 stbustbxC asm 26392814792 633 25022814784 628 25012814783 627 25012814784 626 2nd bench: unaligned buffer (offset 1), but (src & 3) == (dst & 3), so C does aligned word copies, while asm does misaligned. C wins: $ ./bench 1 4 1 stbustbxC asm 26383006795 961 25022814786 938 25012814786 939 25012813785 939 3rd bench: move up then down by 5 bytes, src & 3 != dst & 3, can't align word copies. C does byte copies. asm wins: $ ./bench 1 5 0 stbustbxC asm 267528152514809 250128132504782 250228152504782 250128142503782 I think that memmove.S is probably better than memmove.c on G3. I haven't run the bench on POWER9.
Re: powerpc64: 64-bit-ize memmove.S
On Sat, 27 Jun 2020 01:27:14 +0200 Christian Weisgerber wrote: > I'm also intrigued by this aside in the PowerPC ISA documentation: > | Moreover, Load with Update instructions may take longer to execute > | in some implementations than the corresponding pair of a non-update > | Load instruction and an Add instruction. > What does clang generate? clang likes load/store with update instructions. For example, the powerpc64 kernel has /sys/lib/libkern/memcpy.c, which copies bytes: while (n-- > 0) *t++ = *f++; clang uses lbzu and stbu: memcpy: cmpldi r5,0x0 memcpy+0x4: beqlr memcpy+0x8: addi r4,r4,-1 memcpy+0xc: addi r6,r3,-1 memcpy+0x10:mtspr ctr,r5 memcpy+0x14:lbzu r5,1(r4) memcpy+0x18:stbu r5,1(r6) memcpy+0x1c:bdnz 0x26cd0d4 (memcpy+0x14) memcpy+0x20:blr > I think we should consider dropping this "optimized" memmove.S on > both powerpc and powerpc64. I might want to benchmark memmove.S against memmove.c to check if those unaligned accesses are too slow. First I would have to write a benchmark.
Re: userland clock_gettime proof of concept
On Mon, 22 Jun 2020 19:12:22 +0300 Paul Irofti wrote: > New iteration: > > - ps_timekeep should not coredump, pointed by deraadt@ > - set ps_timekeep to 0 before user uvm_map for randomization > - map timekeep before fixup. confirmed by naddy@ that it fixes NULL init > - initialize va. clarified by kettenis@ Here's macppc again. My macppc isn't using your newest diff but does now need to define TC_TB in . The /sys/arch/powerpc/include/timetc.h in your diff never gets used, because there is no #include . On macppc, uname -m => macppc and uname -p => powerpcare different, and #include is /sys/arch/macppc/include/timetc.h. I suspect that is /sys/arch/$i/include/timetc.h if and only if /sys/arch/$i/compile exists. 10 days ago, naddy said, "You only need the lower register." That is correct, so this diff also stops using mftbu (the higher register). --- lib/libc/arch/powerpc/gen/usertc.c.before Wed Jun 24 16:42:36 2020 +++ lib/libc/arch/powerpc/gen/usertc.c Wed Jun 24 16:46:00 2020 @@ -18,4 +18,17 @@ #include #include -int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL; +int +tc_get_timecount(struct timekeep *tk, u_int *tc) +{ + u_int tb; + + if (tk->tk_user != TC_TB) + return -1; + + asm volatile("mftb %0" : "=r"(tb)); + *tc = tb; + return 0; +} +int (*const _tc_get_timecount)(struct timekeep *tk, u_int *tc) + = tc_get_timecount; --- sys/arch/macppc/include/timetc.h.before Wed Jun 24 16:36:03 2020 +++ sys/arch/macppc/include/timetc.hWed Jun 24 16:37:47 2020 @@ -18,6 +18,7 @@ #ifndef _MACHINE_TIMETC_H_ #define _MACHINE_TIMETC_H_ -#defineTC_LAST 0 +#defineTC_TB 1 +#defineTC_LAST 2 #endif /* _MACHINE_TIMETC_H_ */ --- sys/arch/macppc/macppc/clock.c.before Wed Jun 24 16:39:58 2020 +++ sys/arch/macppc/macppc/clock.c Wed Jun 24 16:40:08 2020 @@ -57,7 +57,7 @@ static int32_t ticks_per_intr; static struct timecounter tb_timecounter = { - tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, 0 + tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, TC_TB }; /* calibrate the timecounter frequency for the listed models */
Re: userland clock_gettime proof of concept
This is macppc, goes on top of your diff from Thu, 11 Jun 2020 16:27:03 +0300. Do integrate this small diff into your big diff. If you need to change the macppc code but can't build it, then do the change, and allow me or someone else to build it. I'm gkoehler@. macppc is a simple arch: there is only 1 timecounter, where tk->tk_user == 1; and we will probably never add another timecounter. The pointer _tc_get_timecount seems unnecessary, because macppc never sets the pointer to NULL. The function tc_get_timecount might work as a static inline function, but then it would need to be in a header (libc/arch/powerpc/usertc.h?) and not in usertc.c. I don't check tk->tk_count. If the count is 2 or 3, then we can ignore it, because we only handle tk->tk_user == 1. If the count is 0, then the kernel should not give us a timekeep page with tk->tk_user == 1. I don't know that tk->tk_count needs to exist. I don't check tc == NULL (but you do in amd64). The only caller tc_delta() in libc/sys/microtime.c never passes NULL. In my last mail (31 May), I wanted to #include and call ppc_mftb(). I change my mind. It is more difficult to change the kernel headers to expose ppc_mftb() outside _KERNEL, and easier to copy the __asm, so I copy the __asm.--George --- lib/libc/arch/powerpc/gen/usertc.c.before Sat Jun 13 21:28:50 2020 +++ lib/libc/arch/powerpc/gen/usertc.c Sat Jun 13 21:38:52 2020 @@ -18,4 +18,19 @@ #include #include -int (*const _tc_get_timecount)(struct timekeep *, uint64_t *) = NULL; +int +tc_get_timecount(struct timekeep *tk, uint64_t *tc) +{ + uint64_t tb; + uint32_t scratch; + + if (tk->tk_user != 1) + return -1; + + __asm volatile ("1: mftbu %0; mftb %L0; mftbu %1;" + " cmpw 0,%0,%1; bne 1b" : "=r"(tb), "=r"(scratch)); + *tc = tb; + return 0; +} +int (*const _tc_get_timecount)(struct timekeep *, uint64_t *) + = tc_get_timecount; --- sys/arch/macppc/macppc/machdep.c.before Sat Jun 13 18:07:27 2020 +++ sys/arch/macppc/macppc/machdep.cSat Jun 13 18:07:38 2020 @@ -351,7 +351,7 @@ } /* timekeep number of user accesible clocks */ -int tk_nclocks = 0; +int tk_nclocks = 1; /* * safepri is a safe priority for sleep to set for a spin-wait --- sys/arch/macppc/macppc/clock.c.before Sat Jun 13 18:39:58 2020 +++ sys/arch/macppc/macppc/clock.c Sat Jun 13 18:40:10 2020 @@ -57,7 +57,7 @@ static int32_t ticks_per_intr; static struct timecounter tb_timecounter = { - tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, 0 + tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, 1 }; /* calibrate the timecounter frequency for the listed models */
macppc PowerBook5,4: add missing volume control
This diff is for macppc "model PowerBook5,4". It adds the missing audio volume control by changing the driver from aoa(4) to snapper(4). Before the diff, I needed to put my ear near the speaker to check if audio was playing. After the diff, the speaker was so loud (about output.level=0.75) that I used sndioctl to turn it down. The diff uses strcmp(hw_prod, "PowerBook5,4") to attach the other driver. In OpenBSD, aoa(4) is i2s with no volume control. snapper(4) is i2s with TAS3004 volume control. In Mac OS X, system profiler shows TAS3004 audio on my PowerBook5,4. NetBSD doesn't check the "PowerBook5,4" model string. NetBSD's sys/arch/macppc/dev/snapper.c snapper_defer() looks for a "deq" in the device tree. No "deq" means no hardware volume control. OpenBSD defines a DEQaddr and doesn't check "deq". We do look for the kiic(4) on macobio(4) -- this device has "deq" -- but this happens late in snapper_defer(), after kiic(4) attaches, which is after aoa(4) or snapper(4) attaches. By checking "PowerBook5,4", I can pick aoa(4) or snapper(4) without knowing kiic(4). See part of my dmesg and eeprom -p, below the diff. OK to commit? Index: share/man/man4/man4.macppc/aoa.4 === RCS file: /cvs/src/share/man/man4/man4.macppc/aoa.4,v retrieving revision 1.9 diff -u -p -r1.9 aoa.4 --- share/man/man4/man4.macppc/aoa.42 Jul 2016 16:28:50 - 1.9 +++ share/man/man4/man4.macppc/aoa.44 Jun 2020 20:39:33 - @@ -46,8 +46,6 @@ driver include: .Pp .Bl -dash -offset indent -compact .It -PowerBook5,4 -.It PowerMac7,3 .It PowerMac9,1 Index: share/man/man4/man4.macppc/snapper.4 === RCS file: /cvs/src/share/man/man4/man4.macppc/snapper.4,v retrieving revision 1.16 diff -u -p -r1.16 snapper.4 --- share/man/man4/man4.macppc/snapper.415 Jan 2015 20:37:36 - 1.16 +++ share/man/man4/man4.macppc/snapper.44 Jun 2020 20:39:33 - @@ -57,6 +57,8 @@ PowerBook5,2 .It PowerBook5,3 .It +PowerBook5,4 +.It PowerBook5,5 .It PowerBook5,6 Index: sys/arch/macppc/dev/aoa.c === RCS file: /cvs/src/sys/arch/macppc/dev/aoa.c,v retrieving revision 1.9 diff -u -p -r1.9 aoa.c --- sys/arch/macppc/dev/aoa.c 19 Sep 2016 06:46:43 - 1.9 +++ sys/arch/macppc/dev/aoa.c 4 Jun 2020 20:39:33 - @@ -57,6 +57,8 @@ void aoa_attach(struct device *, struct void aoa_defer(struct device *); void aoa_set_volume(struct aoa_softc *, int, int); +extern char *hw_prod; + struct cfattach aoa_ca = { sizeof(struct aoa_softc), aoa_match, aoa_attach }; @@ -107,7 +109,8 @@ aoa_match(struct device *parent, void *m bzero(compat, sizeof compat); OF_getprop(soundchip, "compatible", compat, sizeof compat); - if (strcmp(compat, "AOAKeylargo") == 0) + if (strcmp(compat, "AOAKeylargo") == 0 && + strcmp(hw_prod, "PowerBook5,4") != 0) return (1); if (strcmp(compat, "AOAK2") == 0) return (1); Index: sys/arch/macppc/dev/snapper.c === RCS file: /cvs/src/sys/arch/macppc/dev/snapper.c,v retrieving revision 1.37 diff -u -p -r1.37 snapper.c --- sys/arch/macppc/dev/snapper.c 19 Sep 2016 06:46:43 - 1.37 +++ sys/arch/macppc/dev/snapper.c 4 Jun 2020 20:39:34 - @@ -70,6 +70,8 @@ void snapper_set_input(struct snapper_so int tas3004_write(struct snapper_softc *, u_int, const void *); int tas3004_init(struct snapper_softc *); +extern char *hw_prod; + struct cfattach snapper_ca = { sizeof(struct snapper_softc), snapper_match, snapper_attach }; @@ -442,6 +444,9 @@ snapper_match(struct device *parent, voi bzero(compat, sizeof compat); OF_getprop(soundchip, "compatible", compat, sizeof compat); + if (strcmp(compat, "AOAKeylargo") == 0 && + strcmp(hw_prod, "PowerBook5,4") == 0) + return (1); if (strcmp(compat, "snapper") == 0) return (1); Part of dmesg: [ using 1155980 bytes of bsd ELF symbol table ] console out [ATY,Jasper_A] console in [keyboard]USB and ADB found, using ADB using parent ATY,JasperParent:: memaddr b800, size 800 : consaddr b8008000 : ioaddr b002, size 2: width 1280 linebytes 1280 height 854 depth 8 Copyright (c) 1982, 1986, 1989, 1991, 1993 The Regents of the University of California. All rights reserved. Copyright (c) 1995-2020 OpenBSD. All rights reserved. https://www.OpenBSD.org OpenBSD 6.7-current (GENERIC) #1: Thu Jun 4 16:12:14 EDT 2020 kern...@wisconsin.my.domain:/usr/src/sys57/arch/macppc/compile/GENERIC real mem = 1073741824 (1024MB) avail mem = 1026486272 (978MB) random: good seed from bootblocks mpath0 at root scsibus0 at mpath0: 256 targets mainbus0 at root: model PowerBook5,4 cpu0 at mainbus0: 7447A (Revision
Re: userland clock_gettime proof of concept
On Sat, 30 May 2020 19:21:30 +0300 Paul Irofti wrote: > Here is an updated diff with no libc bump. Please use this one for > further testing. Your diff does amd64. Here is a diff to add macppc. Apply after your diff. I have only tested clock_gettime(2) with CLOCK_REALTIME, by doing loops in Ruby like, $ ruby27 -e '1.times{p Time.now}' The time increased steadily, and ktrace showed only a few system calls to clock_gettime(2). I copied ppc_mftb() from /sys/arch/powerpc/powerpc/cpu.h and renamed it to tc_get_timecount_md(), because #include doesn't provide ppc_mftb() if not _KERNEL. It would be better to edit the kernel headers to give ppc_mftb() if _LIBC, but I haven't done that. I changed __amd64 to __amd64__ because I didn't find __powerpc. I'm not sure, but one might move the list of arches to dlfcn/Makefile.inc and do -DTIMEKEEP, like how thread/Makefile.inc does -DFUTEX. One might drop the tc_get_timecount function pointer and just always call the function #ifdef TIMEKEEP. PowerPC Mac OS X had a userland gettimeofday(2) using the cpu's timebase and a "common page" from the kernel. Their common page also had executable code for gettimeofday, memcpy, pthread_self, and a few other functions. --George Index: lib/libc/arch/powerpc/gen/Makefile.inc === RCS file: /cvs/src/lib/libc/arch/powerpc/gen/Makefile.inc,v retrieving revision 1.14 diff -u -p -r1.14 Makefile.inc --- lib/libc/arch/powerpc/gen/Makefile.inc 12 Apr 2012 16:14:09 - 1.14 +++ lib/libc/arch/powerpc/gen/Makefile.inc 31 May 2020 03:20:58 - @@ -3,3 +3,4 @@ SRCS+= fabs.c SRCS+= fpgetmask.c fpsetmask.c SRCS+= fpgetround.c fpsetround.c SRCS+= fpgetsticky.c fpsetsticky.c +SRCS+= usertc.c --- /dev/null Sat May 30 23:21:20 2020 +++ lib/libc/arch/powerpc/gen/usertc.c Sat May 30 19:37:59 2020 @@ -0,0 +1,44 @@ +/* $OpenBSD$ */ +/* + * Copyright (C) 1995, 1996 Wolfgang Solfrank. + * Copyright (C) 1995, 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + *must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + *derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +uint64_t +tc_get_timecount_md(void) +{ + u_long scratch; + u_int64_t tb; + + __asm volatile ("1: mftbu %0; mftb %L0; mftbu %1;" + " cmpw 0,%0,%1; bne 1b" : "=r"(tb), "=r"(scratch)); + return tb; +} --- lib/libc/dlfcn/init.c.beforeSat May 30 23:26:35 2020 +++ lib/libc/dlfcn/init.c Sat May 30 18:00:45 2020 @@ -70,7 +70,7 @@ /* provide definitions for these */ const dl_cb *_dl_cb __relro = NULL; -#if defined(__amd64) +#if defined(__amd64__) || defined(__powerpc__) uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md; #else uint64_t (*const tc_get_timecount)(void) = NULL; --- sys/arch/macppc/macppc/clock.c.before Sat May 30 23:28:00 2020 +++ sys/arch/macppc/macppc/clock.c Sat May 30 20:35:47 2020 @@ -57,7 +57,7 @@ static int32_t ticks_per_intr; static struct timecounter tb_timecounter = { - tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, 0 + tb_get_timecount, NULL, 0x7fff, 0, "tb", 0, NULL, 1 }; /* calibrate the timecounter frequency for the listed models */
Re: [macppc 6.7-beta] clang backend error: "adde Constant" issue
On Wed, 29 Apr 2020 21:08:52 +0200 (CEST) Mark Kettenis wrote: > Upstream fixed this issue as well. Apparently only ADDE can't be > legalized (because it is "special") but ADDCARRY can. Do ypu want to > adjust your diff based on that information? > > Either way, ok kettenis@ This adjusted diff changes only ADDE and not ADDCARRY. I expect it to work as well as my previous diff (Tue 28 Apr) on PowerPC, because PowerPC doesn't use ADDCARRY. After I built macppc clang with the previous diff, I did a successful "make build" in /usr/src. I'm not doing another "make build" with this adjusted diff, but I will check that some other stuff builds. Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp === RCS file: /cvs/src/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp,v retrieving revision 1.1.1.8 diff -u -p -r1.1.1.8 DAGCombiner.cpp --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp23 Jun 2019 21:36:48 - 1.1.1.8 +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp30 Apr 2020 16:18:31 - @@ -9904,9 +9904,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNod // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) // When the adde's carry is not used. + // Don't make an illegal adde: LegalizeDAG can't expand nor promote it. if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && - (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) { + ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) || + TLI.isOperationLegal(N0.getOpcode(), VT))) { SDLoc SL(N); auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
Re: [macppc 6.7-beta] clang backend error: "adde Constant" issue
Moving from bugs@ to tech@, because some people might miss a clang diff on bugs@. This diff modifies LLVM's DAGCombiner to skip an optimization if it would make an illegal ISD::ADDE node. This fixes fatal errors from powerpc clang when building ports net/libtorrent-rasterbar and devel/avr/binutils on PowerPC. The fatal errors look like, On Sun, 19 Apr 2020 17:26:49 +0200 Charlene Wendling wrote: > fatal error: error in backend: Cannot select: 0x90a1c1b0: i1,glue = adde > Constant:i1<0>, Constant:i1<-1>, 0xa6a2b1b0:1 > 0xa6a2b828: i1 = Constant<0> > 0x90a1c630: i1 = Constant<-1> > 0xa6a2b1b0: i32,glue = addc 0x90a1c3a8, Constant:i32<-1> An example to cause this error, given unsigned 32-bit *p, is if (*p > ((*p + 15ULL) & ~15ULL)) ... The SelectionDAG goes through phases. The DAG starts with an i64 add *p + 15ULL, but 32-bit PowerPC has no i64 ops, so the legalize-types phase converts the i64 add to an i32 addc/adde pair. The DAG keeps some i1 ops because clang -mcrbits (the default) enables i1 ops on condition register bits. DAGCombiner is optimizing the i1 ops when it truncates the i32 adde to an i1 adde, but i1 adde is an illegal op. Legalize-ops runs next, and would convert illegal ops to legal ops, but it can't fix i1 adde, because it has no case for ISD::ADDE. LegalizeIntegerTypes.cpp:1966 knows this, and refuses to insert an illegal adde, but DAGCombiner::visitTRUNCATE didn't know. This diff might affect other arches. I guess that Aarch64, ARM, MIPS, Sparc don't have integer ops smaller than i32, and X86 doesn't use ISD::ADDE, so this diff might not affect them. OK to commit? Index: lib/CodeGen//SelectionDAG/DAGCombiner.cpp === RCS file: /cvs/src/gnu/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp,v retrieving revision 1.1.1.8 diff -u -p -r1.1.1.8 DAGCombiner.cpp --- lib/CodeGen//SelectionDAG/DAGCombiner.cpp 23 Jun 2019 21:36:48 - 1.1.1.8 +++ lib/CodeGen//SelectionDAG/DAGCombiner.cpp 27 Apr 2020 21:48:18 - @@ -9904,9 +9904,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNod // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) // When the adde's carry is not used. + // Don't make an illegal adde: LegalizeDAG can't expand nor promote it. if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && - (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) { + TLI.isOperationLegal(N0.getOpcode(), VT)) { SDLoc SL(N); auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
Re: powerpc: mplock & WITNESS
On Thu, 9 Apr 2020 20:05:34 +0200 Martin Pieuchot wrote: > +void > +stacktrace_save_at(struct stacktrace *st, unsigned int skip) > +{ > + vaddr_t lr, sp, lastsp; > + > + sp = (vaddr_t)__builtin_frame_address(0); > + if (!INKERNEL(sp) && !ININTSTK(sp)) > + return; > + > + st->st_count = 0; > + while (st->st_count < STACKTRACE_MAX) { > + lr = *(vaddr_t *)(sp + 4) - 4; > + if (lr & 3) > + break; > + > + if (skip == 0) > + st->st_pc[st->st_count++] = lr; > + else > + skip--; > + > + lastsp = sp; > + sp = *(vaddr_t *)sp; > + > + if ((sp == 0) || (sp & 3) || (sp <= lastsp)) > + break; > + if (!INKERNEL(sp) && !ININTSTK(sp)) > + break; > + } > +} In the trace, #0 and #1 are wrong, but the rest of the trace looks good enough for WITNESS. I added an artificial lock order reversal to ums(4) for WITNESS to catch. I got this trace, #0 0xe4d764 #1 witness_checkorder+0x308 #2 mtx_enter+0x50 #3 ums_attach+0x68 #4 config_attach+0x270 ... "#0 0xe4d764" is stack garbage: a function saves its lr in its caller's frame, but stacktrace_save_at() first reads the lr slot in its own frame. "#1 witness_checkorder+0x308" is a dead value. In the disassembly (objdump -dlr db_trace.o), clang optimized stacktrace_save_at() to skip saving its lr on the stack, because it is a leaf function (that never calls other functions). The lr from the stack isn't the return address for stacktrace_save_at(), but might be a leftover return address from some other function (that needed to save lr). #2 and after seem to be correct; "#3 ums_attach+0x68" points exactly to where I am grabbing the second lock. This is enough for WITNESS, so we might want to use your diff now, and fix #0 and #1 later. Also know that a compiler may optimize stacktrace_save_at() to have no stack frame. Our clang 8.0.1 never does this (because it always sets r31 = stack pointer r1, so it always needs a stack frame to save the caller's r31), but gcc and newer clang might. I don't know how __builtin_frame_address(0) works if the stack frame is gone. --George
Re: [PATCH] dired-jump for mg
On Wed, 01 Apr 2020 13:47:58 +0200 phi...@warpmail.net (Philip K.) wrote: > George Koehler writes: > > > C-x C-j doesn't work for me: it does show the dired buffer, but > > doesn't jump to the file that I was editing. > > Looks like that was because I called gotofile before showbuffer. I > changed that before submitting it, and I remember it seeming to work, > but I guess I was mistaken :/ This patch should fix that though. I built your diff from 1 Apr. Now C-x C-j does jump. In emacs, if I visit /tmp/nowhere/new, but /tmp/nowhere doesn't exist, then C-x C-j shows an error: | Setting current directory: No such file or directory, /tmp/nowhere/ In mg with your diff, if I do the same, then C-x C-j does nothing, and doesn't show an error. I would like to see an error. I don't need the exact same error message as emacs; but I want to know that mg got my command, and the command failed. (For people trying emacs, C-x C-j doesn't work until you load dired-x. An easy way to load dired-x is to visit any dir, then M-x dired-jump.) I didn't review all your code, but I now think that you should call xbasename() instead of basename(), to be consistent with the existing dired code. --George
macppc libunwind without altivec
Hello tech, powerpc libunwind is broken on machines without altivec. It crashes SIGILL when code (built with base-clang++) throws a C++ exception, because libunwind always saves the altivec registers. You don't have altivec if sysctl machdep.altivec is 0. I believe that G3 cpus don't have altivec, and G4 and G5 do have it. This diff defers saving the altivec registers until we need to access them. I took the idea from arm, which defers saving VFP registers. The diff fixes a small C++ demo on my G3. I didn't try other C++ code; I was building ports with macppc base-clang but stopped at an error from lang/python/3.7. Registers_arm has members like "bool _saved_vfp_d0_d15;" to know if VFP got saved. I can't add a "bool _saved_vrs" to Registers_ppc, because some assertions would fail, because unw_context_t would be too small. I don't enlarge unw_context_t (an opaque struct of 117 * 8 bytes) because it is in /usr/include/c++/v1/libunwind.h and I don't want to change the ABI. I instead check if vrsave != 0; vrsave is a bitset of altivec registers in use (Altivec Programming Environments Manual, ALTIVECPEM.pdf, section 2.3.3). libunwind operates on a saved context, not real registers. If a compiler exists that would tell libunwind to set v31 when vrsave == 0, this diff would break it. I have no bool to check if I have already saved the vrs but vrsave == 0. I also stop using the red zone, because it doesn't exist: a signal handler may clobber anything below the stack pointer. Is the diff OK to commit? It applies to src/lib/libunwind but you build it in src/lib/libcxxabi $ cat thrown.cpp #include #include using std::cout; using std::runtime_error; int main() { try { throw runtime_error("ouch"); } catch(runtime_error e) { cout << "caught " << e.what() << "\n"; } } $ clang++ -o thrown thrown.cpp $ ./thrown # without the fix Illegal instruction (core dumped) $ ./thrown # with the fixed libc++abi.so.2.0 caught ouch Index: src/Registers.hpp === RCS file: /cvs/src/lib/libunwind/src/Registers.hpp,v retrieving revision 1.8 diff -u -p -r1.8 Registers.hpp --- src/Registers.hpp 17 Jun 2019 22:28:51 - 1.8 +++ src/Registers.hpp 2 Apr 2020 16:40:32 - @@ -630,7 +630,7 @@ private: unsigned int __lr; /* Link register */ unsigned int __ctr;/* Count register */ unsigned int __mq; /* MQ register (601 only) */ -unsigned int __vrsave; /* Vector Save Register */ +mutable unsigned int __vrsave; /* Vector Save Register */ }; struct ppc_float_state_t { @@ -640,9 +640,11 @@ private: unsigned int __fpscr; /* floating point status register */ }; + void saveVectorRegisters() const; + ppc_thread_state_t _registers; ppc_float_state_t _floatRegisters; - v128 _vectorRegisters[32]; // offset 424 + mutable v128 _vectorRegisters[32]; // offset 424 }; inline Registers_ppc::Registers_ppc(const void *registers) { @@ -657,10 +659,8 @@ inline Registers_ppc::Registers_ppc(cons sizeof(_floatRegisters)); static_assert(sizeof(ppc_thread_state_t) + sizeof(ppc_float_state_t) == 424, "expected vector register offset to be 424 bytes"); - memcpy(_vectorRegisters, - static_cast(registers) + sizeof(ppc_thread_state_t) + - sizeof(ppc_float_state_t), - sizeof(_vectorRegisters)); + // no values until saveVectorRegisters() + memset(&_vectorRegisters, 0, sizeof(_vectorRegisters)); } inline Registers_ppc::Registers_ppc() { @@ -780,6 +780,7 @@ inline uint32_t Registers_ppc::getRegist case UNW_PPC_CR7: return (_registers.__cr & 0x000F); case UNW_PPC_VRSAVE: +saveVectorRegisters(); return _registers.__vrsave; } _LIBUNWIND_ABORT("unsupported ppc register"); @@ -932,6 +933,7 @@ inline void Registers_ppc::setRegister(i _registers.__cr |= (value & 0x000F); return; case UNW_PPC_VRSAVE: +saveVectorRegisters(); _registers.__vrsave = value; return; // not saved @@ -976,12 +978,14 @@ inline bool Registers_ppc::validVectorRe inline v128 Registers_ppc::getVectorRegister(int regNum) const { assert(validVectorRegister(regNum)); + saveVectorRegisters(); v128 result = _vectorRegisters[regNum - UNW_PPC_V0]; return result; } inline void Registers_ppc::setVectorRegister(int regNum, v128 value) { assert(validVectorRegister(regNum)); + saveVectorRegisters(); _vectorRegisters[regNum - UNW_PPC_V0] = value; } Index: src/UnwindRegistersRestore.S === RCS file: /cvs/src/lib/libunwind/src/UnwindRegistersRestore.S,v retrieving revision 1.8 diff -u -p -r1.8 UnwindRegistersRestore.S --- src/UnwindRegistersRestore.S17 Jun 2019 22:28:51 - 1.8 +++ src/UnwindRegistersRestore.S2 Apr 2020 16:40:33 - @@ -476,11 +476,
Re: macppc kernel and clang
On Mon, 30 Mar 2020 22:39:48 +0200 (CEST) Mark Kettenis wrote: > P.S. Userland seems to be in good shape as well. I built and rebuilt > the world with clang. That was on a kernel built with gcc, so > I'm repeating that now with a kernel built with clang. That's good. My iMac G3 is too slow to build the base system, so it has been running a macppc snapshot where I rebuilt kernel, libc, and a few other things with clang. I know that clang can't build bsd.rd unless I use crunchgen -M; see my mails of 17 and 18 Mar. I recently learned that libunwind doesn't work on my G3. I built (with base-clang) a small program to throw a C++ exception; it crashes at an illegal instruction because libunwind uses altivec and my G3 doesn't have altivec. (G4 and G5 cpus probably have altivec.) I can run clang; LLVM and clang almost never throw C++ exceptions. I also noticed that some .S files in libc give a clang warning, but I didn't find why. --George
Re: [PATCH] dired-jump for mg
On Mon, 30 Mar 2020 15:11:14 +0200 phi...@warpmail.net (Philip K.) wrote: > I apologise in case there are any problems applying this patch, I'm > (sadly) not a OpenBSD user so I developed the change using the Linux > port and then copied the changes. I optimistically assumed that this > should work, but in case there are any issues, I'll spin up a VM and > debug it. Your patch causes error on OpenBSD: cc -O2 -pipe -Wall -DREGEX -Werror-implicit-function-declaration -MD -MP -c /usr/src/usr.bin/mg/dired.c /usr/src/usr.bin/mg/dired.c:280:13: error: implicit declaration of function 'basename' is invalid in C99 [-Werror,-Wimplicit-function-declaration] gotofile(basename(fname)); ^ /usr/src/usr.bin/mg/dired.c:280:13: note: did you mean 'xbasename'? /usr/src/usr.bin/mg/def.h:381:10: note: 'xbasename' declared here size_t xbasename(char *, const char *, size_t); ^ /usr/src/usr.bin/mg/dired.c:280:13: warning: incompatible integer to pointer conversion passing 'int' to parameter of type 'char *' [-Wint-conversion] gotofile(basename(fname)); ^~~ /usr/src/usr.bin/mg/dired.c:56:27: note: passing argument to parameter here static int gotofile(char*); ^ 1 warning and 1 error generated. *** Error 1 in /usr/src/usr.bin/mg (:87 'dired.o') I solved the error by adding #include , as in the manual https://man.openbsd.org/man3/basename.3 C-x C-j doesn't work for me: it does show the dired buffer, but doesn't jump to the file that I was editing. --George
Re: macppc kernel and clang
Here is a new diff for macppc's ofw_stack() problem, without using __attribute__((noinline)). I use this diff to build and run a macppc kernel with clang. It also works with gcc. The kernel did 3 steps to prepare an Open Firmware call: 1. turn off interrupts (EE and RI in msr) 2. move the stack pointer %r1 to firmstk 3. switch to Open Firmware's pmap? I don't understand these steps, but I tried to preserve all 3 steps as I shuffled the code. The diff doesn't touch step 3. The problem was at step 2: ofw_stack() copied the caller's stack frame to firmstk, and changed the caller's return address to ofw_back (which will restore the old %r1 and msr). If clang inlines the caller into another function, then ofw_back would run too late. I move step 2 into openfirmware(), so there is no more copying a stack frame nor hijacking a return address. (I claim that firmstk+NBPG-16 is a multiple of 16; that mtctr,bctrl is more idiomatic than mtlr,blrl.) ofw_stack() becomes s = ofw_msr() and only does step 1, turning off EE and RI in msr. ppc_mtmsr(s) restores the saved msr. I don't use intr_disable() because it turns off only EE, not RI. I changed OF_call_method*() to turn off EE (external interrupts) before they touch their static args. Some functions, like OF_boot() and OF_quiesce(), seem unused, so I can't know if my changes are correct. To build a kernel with clang, I do # make CC=clang COMPILER_VERSION=clang Is this OK to commit? Would it be better to use intr_disable() in OF_*() and turn off RI in ofwreal.S fwentry? Index: ofw_machdep.h === RCS file: /cvs/src/sys/arch/macppc/macppc/ofw_machdep.h,v retrieving revision 1.9 diff -u -p -r1.9 ofw_machdep.h --- ofw_machdep.h 7 Apr 2015 14:36:34 - 1.9 +++ ofw_machdep.h 29 Mar 2020 16:16:27 - @@ -26,6 +26,9 @@ * */ +#include +#include + extern int cons_backlight_available; void ofwconprobe(void); @@ -49,3 +52,12 @@ void of_setbrightness(int); void of_setcolors(const uint8_t *, unsigned int, unsigned int); void OF_quiesce(void); + +static inline uint32_t +ofw_msr(void) +{ + uint32_t s = ppc_mfmsr(); + + ppc_mtmsr(s & ~(PSL_EE|PSL_RI)); /* turn off interrupts */ + return s; +} Index: ofwreal.S === RCS file: /cvs/src/sys/arch/macppc/macppc/ofwreal.S,v retrieving revision 1.5 diff -u -p -r1.5 ofwreal.S --- ofwreal.S 3 Sep 2019 14:37:22 - 1.5 +++ ofwreal.S 29 Mar 2020 16:16:27 - @@ -355,96 +355,32 @@ _ENTRY(_C_LABEL(fwentry)) addi%r1,%r1,16 blr +.lcomm firmstk,NBPG,16 +.comm _C_LABEL(OF_buf),NBPG + /* * OpenFirmware entry point + * + * Note: caller has to set the machine state register (msr) + * to be correct for OpenFirmware. */ _ENTRY(_C_LABEL(openfirmware)) - stwu%r1,-16(%r1) - mflr%r0 /* save return address */ - stw %r0,20(%r1) + mflr%r0 + stw %r0,4(%r1) /* save return address */ + + /* switch to OpenFirmware real mode stack */ + lis %r7,firmstk+NBPG-16@ha + addi%r7,%r7,firmstk+NBPG-16@l + stw %r1,0(%r7) + mr %r1,%r7 lis %r4,fwcall@ha lwz %r4,fwcall@l(%r4) - mtlr%r4 - blrl - - lwz %r0,20(%r1) - mtlr%r0 - lwz %r1,0(%r1) - blr - -/* - * Switch to/from OpenFirmware real mode stack - * - * Note: has to be called as the very first thing in OpenFirmware interface routines. - * E.g.: - * int - * OF_xxx(arg1, arg2) - * type arg1, arg2; - * { - * static struct { - * char *name; - * int nargs; - * int nreturns; - * char *method; - * int arg1; - * int arg2; - * int ret; - * } args = { - * "xxx", - * 2, - * 1, - * }; - * - * ofw_stack(); - * args.arg1 = arg1; - * args.arg2 = arg2; - * if (openfirmware(&args) < 0) - * return -1; - * return args.ret; - * } - */ -.lcomm firmstk,NBPG,16 -.comm _C_LABEL(OF_buf),NBPG - -_ENTRY(_C_LABEL(ofw_stack)) - mfmsr %r8 /* turn off interrupts */ - andi. %r0,%r8,~(PSL_EE|PSL_RI)@l - mtmsr %r0 - stw %r8,4(%r1) /* abuse return address slot */ - - lwz %r5,0(%r1) /* get length of stack frame */ - subf%r5,%r1,%r5 - - lis %r7,firmstk+NBPG-8@ha - addi%r7,%r7,firmstk+NBPG-8@l - li %r6,0xf - andc%r7,%r7,%r6 - lis %r6,ofw_back@ha - addi%r6,%r6,ofw_back@l - subf%r4,%r5,%r7 /* make room for stack frame on new stack */ - stwu%r1,-16(%r7) - stw %r6,4(%r7) /* setup return pointer */ + mtctr %r4 + bctrl - stw
Re: macppc kernel and clang
On Tue, 17 Mar 2020 13:23:28 -0400 George Koehler wrote: > clang -static -L. -nopie -o instbin instbin.o dd.lo ... > /usr/bin/ld: dd.lo(.text+0x14): R_PPC_PLTREL24 reloc against local symbol > > ... > > Passing -M to crunchgen(8), as we do on {longsoon,octeon,sgi}, might > work around the problem, but I haven't tried it, and I don't know > whether this is the only problem. crunchgen -M worked and I got a bsd.rd from clang, while I had the %L0 and noinline stuff in /sys. (I didn't build a release. I did need to build a few other things before bsd.rd.) Index: Makefile === RCS file: /cvs/src/distrib/macppc/ramdisk/Makefile,v retrieving revision 1.46 diff -u -p -r1.46 Makefile --- Makefile3 May 2019 20:03:58 - 1.46 +++ Makefile19 Mar 2020 03:12:47 - @@ -55,7 +55,7 @@ mr.fs: instbin makefs ${MRMAKEFSARGS} $@ $@.d instbin.mk instbin.cache instbin.c: instbin.conf - crunchgen -E -D ${.CURDIR}/../../.. -L ${DESTDIR}/usr/lib \ + crunchgen -E -M -D ${.CURDIR}/../../.. -L ${DESTDIR}/usr/lib \ -c instbin.c -e instbin -m instbin.mk instbin.conf instbin: instbin.mk instbin.cache instbin.c
Re: macppc kernel and clang
On Mon, 16 Mar 2020 19:13:13 -0600 "Theo de Raadt" wrote: > How are the bootblocks faring? > > And userland? ofwboot with clang works for me. I failed to make bsd.rd (in src/distrib/macppc/ramdisk) with clang, but I don't remember exactly what I did, so I might have been making it the wrong way. My build failed at clang -static -L. -nopie -o instbin instbin.o dd.lo ... /usr/bin/ld: dd.lo(.text+0x14): R_PPC_PLTREL24 reloc against local symbol I was able to make bsd.rd with gcc. "$CC -fno-pie -c something.c; objdump -dlr something.o" shows that gcc -fno-pie uses R_PPC_REL24, but clang -fno-pie uses R_PPC_PLTREL24, for calls to external functions. The symbols would be global until crunchgen(8) hides them by marking them local. R_PPC_REL24 would branch directly to a function, and R_PPC_PLTREL24 would go through the PLT (for a function in a shared library). The compiler can't know whether the function is in a shared lib; the linker must decide. clang -fno-pie on amd64 uses R_X86_64_PLT32, so I guess that clang -fno-pie on powerpc isn't wrong to use R_PPC_PLTREL24. Passing -M to crunchgen(8), as we do on {longsoon,octeon,sgi}, might work around the problem, but I haven't tried it, and I don't know whether this is the only problem. --George
Re: macppc kernel and clang
On Mon, 16 Mar 2020 12:54:52 +0100 (CET) Mark Kettenis wrote: > I had a look at what NetBSD does, and they use '%L0' instead of > '%0+1'. As far as I can tell this works on both gcc and clang. The > diff below produces a working kernel when building with gcc. Not yet > in a position to test a clang-built kernel myself yet. But if this > produces a working kernel with clang as well, I'd prefer this diff > instead of yours. Yes, the clang kernel is working with your %L0 diff and the noinline stuff. I now prefer your %L0 diff, ok gkoehler@ "mftb %L0" becomes "mftb ${0:L}" in LLVM IR (clang -S -emit-llvm), then LLVM handles the 'L' in PPCAsmPrinter::PrintAsmOperand in /usr/src/gnu/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp > Index: arch/powerpc/include/cpu.h > === > RCS file: /cvs/src/sys/arch/powerpc/include/cpu.h,v > retrieving revision 1.65 > diff -u -p -r1.65 cpu.h > --- arch/powerpc/include/cpu.h23 Mar 2019 05:27:53 - 1.65 > +++ arch/powerpc/include/cpu.h16 Mar 2020 11:30:42 - > @@ -336,7 +336,7 @@ ppc_mftb(void) > u_long scratch; > u_int64_t tb; > > - __asm volatile ("1: mftbu %0; mftb %0+1; mftbu %1;" > + __asm volatile ("1: mftbu %0; mftb %L0; mftbu %1;" > " cmpw 0,%0,%1; bne 1b" : "=r"(tb), "=r"(scratch)); > return tb; > } -- George Koehler
macppc pdisk: keep Mac OS 9 drivers
Hello tech, This diff solves a problem with pdisk(8): it is disabling the Mac OS 9 drivers on the disk. I use pdisk(8) to share a disk with Mac OS 9 and OpenBSD macppc. After pdisk(8) disabled my drivers, my Mac OS 9 boot got stuck at the blinking '?' on the floppy icon. I fixed it by booting a CD and using Apple's Drive Setup to update the drivers. pdisk(8) overwrote the list of drivers in block 0 with malloc garbage: $ MALLOC_OPTIONS=S pdisk -l wd0 ... Drivers- 1: 56283 @ 3688618971, type=0xdbdb 2: 56283 @ 3688618971, type=0xdbdb 3: 56283 @ 3688618971, type=0xdbdb 4: 56283 @ 3688618971, type=0xdbdb read_block0() should read the bytes from block0_ondisk->sbDDMap, not the uninitialized garbage from map->sbDDMap; and write_block0() should write to block0_ondisk->sbDDMap. I also change some betoh32 to betoh16, but the macppc host is already big-endian. I am editing code that krw@ edited in 2016. Today, I redid my macppc partitions. I built a bsd.rd (with gcc) with this diff. I reinstalled Mac OS 9, then OpenBSD. While in bsd.rd, I used pdisk to change my free space (left from Apple's Drive Setup) to an OpenBSD partition. Mac OS 9 still boots. --George Index: file_media.c === RCS file: /cvs/src/sbin/pdisk/file_media.c,v retrieving revision 1.48 diff -u -p -r1.48 file_media.c --- file_media.c30 Jan 2016 17:21:10 - 1.48 +++ file_media.c15 Mar 2020 15:58:51 - @@ -152,7 +152,7 @@ read_block0(int fd, struct partition_map for (i = 0; i < 8; i++) { memcpy(&ddmap_ondisk, - map->sbDDMap+i*sizeof(struct ddmap_ondisk), + block0_ondisk->sbDDMap+i*sizeof(struct ddmap_ondisk), sizeof(ddmap_ondisk)); memcpy(&map->sbDDMap[i].ddBlock, &ddmap_ondisk.ddBlock, sizeof(map->sbDDMap[i].ddBlock)); @@ -163,7 +163,7 @@ read_block0(int fd, struct partition_map map->sbDDMap[i].ddSize = betoh16(map->sbDDMap[i].ddSize); memcpy(&map->sbDDMap[i].ddType, &ddmap_ondisk.ddType, sizeof(map->sbDDMap[i].ddType)); - map->sbDDMap[i].ddType = betoh32(map->sbDDMap[i].ddType); + map->sbDDMap[i].ddType = betoh16(map->sbDDMap[i].ddType); } free(block0_ondisk); @@ -212,10 +212,10 @@ write_block0(int fd, struct partition_ma tmp16 = htobe16(map->sbDDMap[i].ddSize); memcpy(&ddmap_ondisk.ddSize, &tmp16, sizeof(ddmap_ondisk.ddSize)); - tmp16 = betoh32(map->sbDDMap[i].ddType); + tmp16 = betoh16(map->sbDDMap[i].ddType); memcpy(&ddmap_ondisk.ddType, &tmp16, sizeof(ddmap_ondisk.ddType)); - memcpy(map->sbDDMap+i*sizeof(struct ddmap_ondisk), + memcpy(block0_ondisk->sbDDMap+i*sizeof(struct ddmap_ondisk), &ddmap_ondisk, sizeof(ddmap_ondisk)); }
macppc kernel and clang
Hello tech@ list! With this diff, it becomes possible to build macppc kernel with base-clang. The default compiler for macppc is base-gcc. rgc mailed the ppc@ list to report problems with clang kernel, and got a working kernel with clang -Oz (to stop clang inlining some functions in openfirm.c) and my modified ppc_mftb(). In this diff, I use __attribute__((noinline)) so I don't need -Oz. I built my kernel with # clang CC=clang COMPILER_VERSION=clang because someone has already prepared the Makefile for macppc to pass different flags when COMPILER_VERSION is clang. I booted my clang kernel and it worked well enough to build another kernel with the same diff, but with gcc. In the second half of the diff, I change ppc_mftb(), because "mftb %0+1" doesn't always work in clang. For example, clang can put "=r"(tb) in register pair r27:r29, but 27+1 isn't 29. I don't know the syntax for the 2nd register of a pair, so I instead split "=r"(tb) into 2 variables. We use ppc_mftb() as timecounter. (gcc and clang emit different machine code for "mftbu" and "mftb", but both forms work for me. "objdump -dlr obj/clock.o" ppc_mftb() would show "mftb" from gcc or "mfspr" from clang. Power ISA 2.03 says that mfspr time base can't work on 601 or POWER3. A few Old World Mac models had PowerPC 601, but we only run on New World models.) Is the ppc_mftb() change by itself OK to commit? I am less sure about the first half of the diff, the noinline part. I don't like the design of this code. These functions call ofw_stack() like noinline int OF_peer(int phandle) { static struct ... args = ...; ofw_stack(); args.phandle = phandle; if (openfirmware(&args) == -1) return 0; return args.sibling; } but ofw_stack() is assembly code that changes the cpu's msr, moves the stack pointer %r1 to a different stack, copies the stack frame of OF_peer() to this new stack, and hijacks the saved return address so that, when OF_peer() returns, it switches back to the old stack and restores the msr. If clang inlines a function like OF_peer(), it no longer has its own stack frame, so the return-hijack stops working. We don't have retguard on powerpc, but if OF_peer() would use retguard to protect its return address, then the return-hijack might become impossible. Perhaps the code should look like msr = do_something_to_msr(); args.phandle = phandle; if (openfirmware(&args) == -1) ret = 0; else ret = args.sibling; ppc_mtmsr(msr); return ret; and openfirmware() should do the stack-switching, but I have not yet tried to make such a change. (I suspect that we need the msr to disable interrupts and protect the static args.) After the diff, I put my dmesg; my iMac G3 isn't GENERIC.MP and doesn't have radeondrm. --George Index: arch/macppc/macppc/openfirm.c === RCS file: /cvs/src/sys/arch/macppc/macppc/openfirm.c,v retrieving revision 1.12 diff -u -p -r1.12 openfirm.c --- arch/macppc/macppc/openfirm.c 3 Sep 2019 17:51:52 - 1.12 +++ arch/macppc/macppc/openfirm.c 11 Mar 2020 22:21:50 - @@ -38,10 +38,13 @@ #include +/* Callers of ofw_stack() must not be inline. */ +#define noinline __attribute__((noinline)) + extern void ofw_stack(void); extern void ofbcopy(const void *, void *, size_t); -int +noinline int OF_peer(int phandle) { static struct { @@ -63,7 +66,7 @@ OF_peer(int phandle) return args.sibling; } -int +noinline int OF_child(int phandle) { static struct { @@ -85,7 +88,7 @@ OF_child(int phandle) return args.child; } -int +noinline int OF_parent(int phandle) { static struct { @@ -107,7 +110,7 @@ OF_parent(int phandle) return args.parent; } -int +noinline int OF_getproplen(int handle, char *prop) { static struct { @@ -131,7 +134,7 @@ OF_getproplen(int handle, char *prop) return args.size; } -int +noinline int OF_getprop(int handle, char *prop, void *buf, int buflen) { static struct { @@ -163,7 +166,7 @@ OF_getprop(int handle, char *prop, void return args.size; } -int +noinline int OF_setprop(int handle, char *prop, const void *buf, int buflen) { static struct { @@ -194,7 +197,7 @@ OF_setprop(int handle, char *prop, const return args.size; } -int +noinline int OF_nextprop(int handle, char *prop, void *nextprop) { static struct { @@ -221,7 +224,7 @@ OF_nextprop(int handle, char *prop, void return args.flag; } -int +noinline int OF_interpret(char *cmd, int nreturns, ...) { va_list ap; @@ -258,7 +261,7 @@ OF_interpret(char *cmd, int nreturns, .. } -int +noinline int OF_finddevice(char *name) { static struct { @@ -281,7 +284,7 @@ OF_finddevice(char *name) } static void OF_rboot(char *bootspec); -static void +static noinline void
Re: macppc base-clang -msvr4-struct-return
This is the everything diff: it includes the clang diff that I sent to tech@, plus the major version bumps for libLLVM libc++ libc++abi. - distrib/sets/lists: put the new versions in the base sets. - gnu/llvm/tools/clang: -msvr4-struct-return - gnu/usr.bin/clang/libLLVM, lib/libcxx, lib/libcxxabi: new versions Index: distrib/sets/lists/base/md.amd64 === RCS file: /cvs/src/distrib/sets/lists/base/md.amd64,v retrieving revision 1.875 diff -u -p -r1.875 md.amd64 --- distrib/sets/lists/base/md.amd6430 Dec 2019 02:20:33 - 1.875 +++ distrib/sets/lists/base/md.amd6412 Feb 2020 20:52:32 - @@ -81,9 +81,9 @@ ./usr/lib/gcc-lib/amd64-unknown-openbsd6.6/4.2.1/collect2 ./usr/lib/gcc-lib/amd64-unknown-openbsd6.6/4.2.1/libgcc.a ./usr/lib/gcc-lib/amd64-unknown-openbsd6.6/4.2.1/specs -./usr/lib/libLLVM.so.1.0 -./usr/lib/libc++.so.3.0 -./usr/lib/libc++abi.so.1.0 +./usr/lib/libLLVM.so.2.0 +./usr/lib/libc++.so.4.0 +./usr/lib/libc++abi.so.2.0 ./usr/lib/libcompiler_rt.a ./usr/lib/libstdc++.so.57.0 ./usr/libdata/perl5/amd64-openbsd Index: distrib/sets/lists/base/md.arm64 === RCS file: /cvs/src/distrib/sets/lists/base/md.arm64,v retrieving revision 1.22 diff -u -p -r1.22 md.arm64 --- distrib/sets/lists/base/md.arm6430 Dec 2019 02:20:34 - 1.22 +++ distrib/sets/lists/base/md.arm6412 Feb 2020 20:52:32 - @@ -37,9 +37,9 @@ ./usr/lib/crtbeginS.o ./usr/lib/crtend.o ./usr/lib/crtendS.o -./usr/lib/libLLVM.so.1.0 -./usr/lib/libc++.so.3.0 -./usr/lib/libc++abi.so.1.0 +./usr/lib/libLLVM.so.2.0 +./usr/lib/libc++.so.4.0 +./usr/lib/libc++abi.so.2.0 ./usr/lib/libcompiler_rt.a ./usr/libdata/perl5/aarch64-openbsd ./usr/libdata/perl5/aarch64-openbsd/.packlist Index: distrib/sets/lists/base/md.armv7 === RCS file: /cvs/src/distrib/sets/lists/base/md.armv7,v retrieving revision 1.284 diff -u -p -r1.284 md.armv7 --- distrib/sets/lists/base/md.armv730 Dec 2019 02:20:34 - 1.284 +++ distrib/sets/lists/base/md.armv712 Feb 2020 20:52:32 - @@ -37,9 +37,9 @@ ./usr/lib/crtbeginS.o ./usr/lib/crtend.o ./usr/lib/crtendS.o -./usr/lib/libLLVM.so.1.0 -./usr/lib/libc++.so.3.0 -./usr/lib/libc++abi.so.1.0 +./usr/lib/libLLVM.so.2.0 +./usr/lib/libc++.so.4.0 +./usr/lib/libc++abi.so.2.0 ./usr/lib/libcompiler_rt.a ./usr/libdata/perl5/arm-openbsd ./usr/libdata/perl5/arm-openbsd/.packlist Index: distrib/sets/lists/base/md.i386 === RCS file: /cvs/src/distrib/sets/lists/base/md.i386,v retrieving revision 1.1254 diff -u -p -r1.1254 md.i386 --- distrib/sets/lists/base/md.i386 30 Dec 2019 02:20:34 - 1.1254 +++ distrib/sets/lists/base/md.i386 12 Feb 2020 20:52:32 - @@ -78,9 +78,9 @@ ./usr/lib/crtbeginS.o ./usr/lib/crtend.o ./usr/lib/crtendS.o -./usr/lib/libLLVM.so.1.0 -./usr/lib/libc++.so.3.0 -./usr/lib/libc++abi.so.1.0 +./usr/lib/libLLVM.so.2.0 +./usr/lib/libc++.so.4.0 +./usr/lib/libc++abi.so.2.0 ./usr/lib/libcompiler_rt.a ./usr/libdata/perl5/i386-openbsd ./usr/libdata/perl5/i386-openbsd/.packlist Index: distrib/sets/lists/base/md.loongson === RCS file: /cvs/src/distrib/sets/lists/base/md.loongson,v retrieving revision 1.456 diff -u -p -r1.456 md.loongson --- distrib/sets/lists/base/md.loongson 30 Dec 2019 02:20:34 - 1.456 +++ distrib/sets/lists/base/md.loongson 12 Feb 2020 20:52:32 - @@ -48,9 +48,9 @@ ./usr/lib/gcc-lib/mips64el-unknown-openbsd6.6/4.2.1/collect2 ./usr/lib/gcc-lib/mips64el-unknown-openbsd6.6/4.2.1/libgcc.a ./usr/lib/gcc-lib/mips64el-unknown-openbsd6.6/4.2.1/specs -./usr/lib/libLLVM.so.1.0 -./usr/lib/libc++.so.3.0 -./usr/lib/libc++abi.so.1.0 +./usr/lib/libLLVM.so.2.0 +./usr/lib/libc++.so.4.0 +./usr/lib/libc++abi.so.2.0 ./usr/lib/libcompiler_rt.a ./usr/lib/libstdc++.so.57.0 ./usr/libdata/perl5/mips64el-openbsd Index: distrib/sets/lists/base/md.macppc === RCS file: /cvs/src/distrib/sets/lists/base/md.macppc,v retrieving revision 1.1023 diff -u -p -r1.1023 md.macppc --- distrib/sets/lists/base/md.macppc 30 Dec 2019 02:20:34 - 1.1023 +++ distrib/sets/lists/base/md.macppc 12 Feb 2020 20:52:32 - @@ -78,9 +78,9 @@ ./usr/lib/gcc-lib/powerpc-unknown-openbsd6.6/4.2.1/collect2 ./usr/lib/gcc-lib/powerpc-unknown-openbsd6.6/4.2.1/libgcc.a ./usr/lib/gcc-lib/powerpc-unknown-openbsd6.6/4.2.1/specs -./usr/lib/libLLVM.so.1.0 -./usr/lib/libc++.so.3.0 -./usr/lib/libc++abi.so.1.0 +./usr/lib/libLLVM.so.2.0 +./usr/lib/libc++.so.4.0 +./usr/lib/libc++abi.so.2.0 ./usr/lib/libcompiler_rt.a ./usr/lib/libstdc++.so.57.0 ./usr/libdata/perl5/powerpc-openbsd Index: distrib/sets/lists/base/md.octeon === RCS fil
Re: macppc base-clang -msvr4-struct-return
On Tue, 11 Feb 2020 18:58:48 +0100 Jeremie Courreges-Anglas wrote: > So the steps would be: > - build and install a new clang > - bump the major of libc++, build and install it > - rebuild and reinstall clang > - build new snap You are the first person to suggest bumping .so majors; I didn't think to do so. If libc++ needs a major bump, then libc++abi and libLLVM might also need them. These 3 are the only .so (that I can find) built by base-clang on powerpc. Across base + xenocara + ports, almost nothing uses these 3 .so on powerpc. The only users that I find are - /usr/bin/clang* using libc++.so.3.0 and libc++abi.so.1.0 - nothing using libLLVM.so.1.0 (xenocara/lib/mesa uses libLLVM only for i386 amd64 arm64. I looked for ports with LLVM in WANTLIB, found only devel/py-llvmlite, but it requires base-clang, and ports doesn't use base-clang for powerpc.) If we don't bump majors, then people who upgrade OpenBSD macppc would get the new libc++.so.3.0 and libc++abi.so.1.0 at the same time as the new clang; and people on other platforms won't see an unnecessary major bump. If we do bump majors, then people who had used /usr/bin/clang++ on OpenBSD macppc might be able to run their old binaries with the old libc++.so.3.0 and libc++abi.so.1.0, while the new clang would use libc++.so.4.0 and libc++abi.so.2.0. --George
Re: macppc base-clang -msvr4-struct-return
On Tue, 11 Feb 2020 15:20:00 +0100 Jeremie Courreges-Anglas wrote: > fwiw I'm already ok with the diff George sent for ports/devel/llvm. > I'm mostly ok with this one but it would be nice to know whether > base-clang can rebuild itself. :) base-clang can't rebuild itself in the normal way. I have been exchanging mails with Todd Mortimer, who has been testing my diff with a faster macppc machine. The diff changes the ABI between /usr/bin/clang and /usr/lib/libc++.so.3.0, so when we install a new libc++ built by clang -msvr4-struct-return, but still have a clang built as if by -maix-struct-return, then clang crashes and can't rebuild itself! It might be possible to use a static-link clang to cross the ABI change. A backtrace from clang pointed to a function in libc++ that returns a std::string::iterator, a small struct where sizeof(iterator) == 4. This was not a problem with ports-clang, because we use ports-gcc to build ports-clang; and ports-clang uses libestdc++ (from ports-gcc), not libc++ (from base). Both ports-clang and libestdc++ got built by gcc -msvr4-struct-return. --George
macppc base-clang -msvr4-struct-return
Hello tech list, This is a diff for base-clang. It would change the powerpc target to return small structs in registers r3 and r4. This would fix an incompatibility with gcc in OpenBSD macppc. I fear that if I commit the diff, I might break snapshot builds. I am looking for help. I posted the diff upstream: https://reviews.llvm.org/D73290 Then backported it to ports-clang and mailed it to ports@: https://marc.info/?l=openbsd-ports&m=158026852003182&w=2 This is the same diff for base-clang. Apply diff in /usr/src/gnu/llvm, then build clang like # cd /usr/src/gnu/usr.bin/clang # make obj # make # make install My macppc machine, an iMac G3 with 512M RAM, is too slow to build llvm and clang. The iMac is now in the 11th day of building ports-clang (but I turned it off overnight), and within the last 200 of over 4400 targets. After a .cpp file got stuck while using over 800M swap (on hard disk), I set vm.swapencrypt.enable=0 and switched to swap on NFS. (For contrast, my amd64 desktop built ports-clang in about 2 hours.) cwen@ built ports-clang with my diff on a G4 in "29 hours". rgc built base-clang with the diff in this mail. Both got good results from my small test programs in the attachment (sret-examples.tar.gz) to my ports@ mail. The diff affects all platforms (if they build clang), so if the diff would break clang, it might break arches other than macppc. I built and installed base-clang with this diff on amd64 (using make -j4 to speed up the build). Then, with the patched clang, I built an amd64 release(8) of base and xenocara. I used my new install66.iso to install a new amd64 virtual machine, including the patched clang. We build most of OpenBSD macppc with base-gcc, but we use base-clang for at least clang itself. I don't know whether the patched base-clang can still build base-clang, whether the patch would break the base build on macppc. The patch adds -maix-struct-return and -msvr4-struct-return: $ clang -msvr-struct-return -x c /dev/null clang: error: unknown argument '-msvr-struct-return', did you mean '-msvr4-struct-return'? $ clang -msvr4-struct-return -x c /dev/null clang: error: unsupported option '-msvr4-struct-return' for target 'amd64-unknown-openbsd6.6' On OpenBSD macppc, gcc defaults to -msvr4-struct-return (to return small structs in registers r3 and r4), but clang without the patch always acts like gcc -maix-struct-return (to return small structs in memory through a sret pointer). The patch adds the options and changes the default to -msvr4-struct-return on OpenBSD (and other ELF except Linux). The options work only on 32-bit powerpc. When the patch returns a struct in registers, it coerces the struct to an integer; this changes the LLVM IR from clang to not use a sret pointer. The option handling for -m{aix,svr4}-struct-return is mostly a copy of that for -f{pcc,reg}-struct-return on i386. --George Index: tools/clang/include/clang/Driver/Options.td === RCS file: /cvs/src/gnu/llvm/tools/clang/include/clang/Driver/Options.td,v retrieving revision 1.11 diff -u -p -r1.11 Options.td --- tools/clang/include/clang/Driver/Options.td 23 Jun 2019 22:05:15 - 1.11 +++ tools/clang/include/clang/Driver/Options.td 30 Jan 2020 01:13:29 - @@ -2238,6 +2238,12 @@ def mlongcall: Flag<["-"], "mlongcall">, Group; def mno_longcall : Flag<["-"], "mno-longcall">, Group; +def maix_struct_return : Flag<["-"], "maix-struct-return">, + Group, Flags<[CC1Option]>, + HelpText<"Return all structs in memory (PPC32 only)">; +def msvr4_struct_return : Flag<["-"], "msvr4-struct-return">, + Group, Flags<[CC1Option]>, + HelpText<"Return small structs in registers (PPC32 only)">; def mvx : Flag<["-"], "mvx">, Group; def mno_vx : Flag<["-"], "mno-vx">, Group; Index: tools/clang/lib/CodeGen/TargetInfo.cpp === RCS file: /cvs/src/gnu/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp,v retrieving revision 1.1.1.7 diff -u -p -r1.1.1.7 TargetInfo.cpp --- tools/clang/lib/CodeGen/TargetInfo.cpp 23 Jun 2019 21:37:39 - 1.1.1.7 +++ tools/clang/lib/CodeGen/TargetInfo.cpp 30 Jan 2020 01:13:30 - @@ -4092,12 +4092,24 @@ namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { bool IsSoftFloatABI; + bool IsRetSmallStructInRegABI; CharUnits getParamTypeAlignment(QualType Ty) const; public: - PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI) - : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {} + PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI, + bool RetSmallStructInRegABI) + : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI), +IsRetSmallStructInRegABI(RetSmallStructInRegABI) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + + void computeInfo(CGFunct