[PATCH v2] Issue 64-bit versions of *XSAVE* for 64-bit amd64 programs

2020-10-16 Thread Michał Górny
When calling FXSAVE, XSAVE, FXRSTOR, ... for 64-bit programs on amd64
use the 64-suffixed variant in order to include the complete FIP/FDP
registers in the x87 area.

The difference between the two variants is that the FXSAVE64 (new)
variant represents FIP/FDP as 64-bit fields (union fp_addr.fa_64),
while the legacy FXSAVE variant uses split fields: 32-bit offset,
16-bit segment and 16-bit reserved field (union fp_addr.fa_32).
The latter implies that the actual addresses are truncated to 32 bits
which is insufficient in modern programs.

The change is applied only to 64-bit programs on amd64.  Plain i386
and compat32 continue using plain FXSAVE.  Similarly, NVMM is not
changed as I am not familiar with that code.

This is a potentially breaking change.  However, I don't think it likely
to actually break anything because the data provided by the old variant
were not meaningful (because of the truncated pointer).
---
 sys/arch/x86/include/cpufunc.h | 76 ++
 sys/arch/x86/include/fpu.h |  4 +-
 sys/arch/x86/x86/fpu.c | 30 ++
 sys/dev/nvmm/x86/nvmm_x86_svm.c|  6 +-
 sys/dev/nvmm/x86/nvmm_x86_vmx.c|  6 +-
 tests/lib/libc/sys/t_ptrace_x86_wait.h |  2 -
 6 files changed, 105 insertions(+), 19 deletions(-)

diff --git a/sys/arch/x86/include/cpufunc.h b/sys/arch/x86/include/cpufunc.h
index 38534c305544..dd8b0c7dc375 100644
--- a/sys/arch/x86/include/cpufunc.h
+++ b/sys/arch/x86/include/cpufunc.h
@@ -485,6 +485,82 @@ xrstor(const void *addr, uint64_t mask)
);
 }
 
+#ifdef __x86_64__
+static inline void
+fxsave64(void *addr)
+{
+   uint8_t *area = addr;
+
+   __asm volatile (
+   "fxsave64   %[area]"
+   : [area] "=m" (*area)
+   :
+   : "memory"
+   );
+}
+
+static inline void
+fxrstor64(const void *addr)
+{
+   const uint8_t *area = addr;
+
+   __asm volatile (
+   "fxrstor64 %[area]"
+   :
+   : [area] "m" (*area)
+   : "memory"
+   );
+}
+
+static inline void
+xsave64(void *addr, uint64_t mask)
+{
+   uint8_t *area = addr;
+   uint32_t low, high;
+
+   low = mask;
+   high = mask >> 32;
+   __asm volatile (
+   "xsave64%[area]"
+   : [area] "=m" (*area)
+   : "a" (low), "d" (high)
+   : "memory"
+   );
+}
+
+static inline void
+xsaveopt64(void *addr, uint64_t mask)
+{
+   uint8_t *area = addr;
+   uint32_t low, high;
+
+   low = mask;
+   high = mask >> 32;
+   __asm volatile (
+   "xsaveopt64 %[area]"
+   : [area] "=m" (*area)
+   : "a" (low), "d" (high)
+   : "memory"
+   );
+}
+
+static inline void
+xrstor64(const void *addr, uint64_t mask)
+{
+   const uint8_t *area = addr;
+   uint32_t low, high;
+
+   low = mask;
+   high = mask >> 32;
+   __asm volatile (
+   "xrstor64 %[area]"
+   :
+   : [area] "m" (*area), "a" (low), "d" (high)
+   : "memory"
+   );
+}
+#endif
+
 /* -- 
*/
 
 #ifdef XENPV
diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h
index 3255a8ca39e0..bdd86abfdd39 100644
--- a/sys/arch/x86/include/fpu.h
+++ b/sys/arch/x86/include/fpu.h
@@ -14,8 +14,8 @@ struct trapframe;
 void fpuinit(struct cpu_info *);
 void fpuinit_mxcsr_mask(void);
 
-void fpu_area_save(void *, uint64_t);
-void fpu_area_restore(const void *, uint64_t);
+void fpu_area_save(void *, uint64_t, bool);
+void fpu_area_restore(const void *, uint64_t, bool);
 
 void fpu_save(void);
 
diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c
index baff8d008299..e16a64f9ff8a 100644
--- a/sys/arch/x86/x86/fpu.c
+++ b/sys/arch/x86/x86/fpu.c
@@ -156,7 +156,7 @@ fpu_save_lwp(struct lwp *l)
s = splvm();
if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
KASSERT((l->l_flag & LW_SYSTEM) == 0);
-   fpu_area_save(area, x86_xsave_features);
+   fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & 
PK_32));
l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
}
splx(s);
@@ -246,21 +246,27 @@ fpu_errata_amd(void)
fldummy();
 }
 
+#ifdef __x86_64__
+#define XS64(x) (is_64bit ? x##64 : x)
+#else
+#define XS64(x) x
+#endif
+
 void
-fpu_area_save(void *area, uint64_t xsave_features)
+fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit)
 {
switch (x86_fpu_save) {
case FPU_SAVE_FSAVE:
fnsave(area);
break;
case FPU_SAVE_FXSAVE:
-   fxsave(area);
+   XS64(fxsave)(area);
break;
case FPU_SAVE_XSAVE:
-   xsave(area, xsave_features);
+   XS64(xsave)(area, xsave_features);
break;
case FPU_SAVE_XSAVEO

[PATCH v2 2/4] Fix s87_tw reconstruction to correctly indicate register states

2020-10-12 Thread Michał Górny
Fix the code reconstructing s87_tw (full tag word) from fx_sw (abridged
tag word) to correctly represent all register states.  The previous code
only distinguished between empty/non-empty registers, and assigned
'regular value' to all non-empty registers.  The new code explicitly
distinguishes the two other tag word values: empty and special.
---
 sys/arch/x86/x86/convert_xmm_s87.c | 28 --
 tests/lib/libc/sys/t_ptrace_x86_wait.h |  2 --
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/sys/arch/x86/x86/convert_xmm_s87.c 
b/sys/arch/x86/x86/convert_xmm_s87.c
index 53116545769e..0734fab3943c 100644
--- a/sys/arch/x86/x86/convert_xmm_s87.c
+++ b/sys/arch/x86/x86/convert_xmm_s87.c
@@ -40,7 +40,7 @@ __KERNEL_RCSID(0, "$NetBSD: convert_xmm_s87.c,v 1.3 
2014/02/15 22:20:42 dsl Exp
 void
 process_xmm_to_s87(const struct fxsave *sxmm, struct save87 *s87)
 {
-   unsigned int tag, ab_tag;
+   unsigned int tag, ab_tag, st;
const struct fpaccfx *fx_reg;
struct fpacc87 *s87_reg;
int i;
@@ -95,12 +95,28 @@ process_xmm_to_s87(const struct fxsave *sxmm, struct save87 
*s87)
return;
}
 
+   /* For ST(i), i = fpu_reg - top, we start with fpu_reg=7. */
+   st = 7 - ((sxmm->fx_sw >> 11) & 7);
tag = 0;
-   /* Separate bits of abridged tag word with zeros */
-   for (i = 0x80; i != 0; tag <<= 1, i >>= 1)
-   tag |= ab_tag & i;
-   /* Replicate and invert so that 0 => 0b11 and 1 => 0b00 */
-   s87->s87_tw = (tag | tag >> 1) ^ 0x;
+   for (i = 0x80; i != 0; i >>= 1) {
+   tag <<= 2;
+   if (ab_tag & i) {
+   unsigned int exp;
+   /* Non-empty - we need to check ST(i) */
+   fx_reg = &sxmm->fx_87_ac[st];
+   exp = fx_reg->r.f87_exp_sign & 0x7fff;
+   if (exp == 0) {
+   if (fx_reg->r.f87_mantissa == 0)
+   tag |= 1; /* Zero */
+   else
+   tag |= 2; /* Denormal */
+   } else if (exp == 0x7fff)
+   tag |= 2; /* Infinity or NaN */
+   } else
+   tag |= 3; /* Empty */
+   st = (st - 1) & 7;
+   }
+   s87->s87_tw = tag;
 }
 
 void
diff --git a/tests/lib/libc/sys/t_ptrace_x86_wait.h 
b/tests/lib/libc/sys/t_ptrace_x86_wait.h
index d7e93fde55a3..6067066a36c1 100644
--- a/tests/lib/libc/sys/t_ptrace_x86_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h
@@ -3367,10 +3367,8 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
expected_fpu.cw);
ATF_CHECK_EQ(fpr.fstate.s87_sw,
expected_fpu.sw);
-#if 0 /* TODO: translation from FXSAVE is broken */
ATF_CHECK_EQ(fpr.fstate.s87_tw,
expected_fpu.tw);
-#endif
ATF_CHECK_EQ(fpr.fstate.s87_opcode,
expected_fpu.opcode);
ATF_CHECK_EQ(fpr.fstate.s87_ip.fa_32.fa_off,
-- 
2.28.0



[PATCH v2 4/4] Add tests for process_xmm_to_s87() and process_s87_to_xmm()

2020-10-12 Thread Michał Górny
---
 distrib/sets/lists/tests/mi   |   5 +
 etc/mtree/NetBSD.dist.tests   |   2 +
 tests/sys/Makefile|   3 +
 tests/sys/x86/Makefile|  11 ++
 tests/sys/x86/t_convert_xmm_s87.c | 211 ++
 5 files changed, 232 insertions(+)
 create mode 100644 tests/sys/x86/Makefile
 create mode 100644 tests/sys/x86/t_convert_xmm_s87.c

diff --git a/distrib/sets/lists/tests/mi b/distrib/sets/lists/tests/mi
index 007e6d91c7d0..c05e2a0a56e7 100644
--- a/distrib/sets/lists/tests/mi
+++ b/distrib/sets/lists/tests/mi
@@ -191,6 +191,7 @@
 ./usr/libdata/debug/usr/tests/sys/netatalk tests-sys-debug 
compattestfile,atf
 ./usr/libdata/debug/usr/tests/sys/netinet  tests-sys-debug 
compattestfile,atf
 ./usr/libdata/debug/usr/tests/sys/netinet6 tests-sys-debug 
compattestfile,atf
+./usr/libdata/debug/usr/tests/sys/x86  tests-sys-debug 
compattestfile,atf
 ./usr/libdata/debug/usr/tests/syscall  tests-obsolete  
obsolete
 ./usr/libdata/debug/usr/tests/usr.bin  tests-usr.bin-debug 
compattestfile,atf
 ./usr/libdata/debug/usr/tests/usr.bin/cpio tests-usr.bin-debug 
compattestfile,atf
@@ -4137,6 +4138,10 @@
 ./usr/tests/sys/rc/h_args  tests-sys-tests 
compattestfile,atf
 ./usr/tests/sys/rc/h_simpletests-sys-tests 
compattestfile,atf
 ./usr/tests/sys/rc/t_rc_d_cli  tests-sys-tests 
compattestfile,atf
+./usr/tests/sys/x86tests-sys-tests 
compattestfile,atf
+./usr/tests/sys/x86/Atffiletests-sys-tests 
compattestfile,atf
+./usr/tests/sys/x86/Kyuafile   tests-sys-tests 
compattestfile,atf,kyua
+./usr/tests/sys/x86/t_convert_xmm_s87  tests-sys-tests 
compattestfile,atf
 ./usr/tests/syscalltests-obsolete  
obsolete
 ./usr/tests/syscall/Atffiletests-obsolete  
obsolete
 ./usr/tests/syscall/t_access   tests-obsolete  
obsolete
diff --git a/etc/mtree/NetBSD.dist.tests b/etc/mtree/NetBSD.dist.tests
index 13365b71d317..0bd6742b2556 100644
--- a/etc/mtree/NetBSD.dist.tests
+++ b/etc/mtree/NetBSD.dist.tests
@@ -171,6 +171,7 @@
 ./usr/libdata/debug/usr/tests/sys/netatalk
 ./usr/libdata/debug/usr/tests/sys/netinet
 ./usr/libdata/debug/usr/tests/sys/netinet6
+./usr/libdata/debug/usr/tests/sys/x86
 ./usr/libdata/debug/usr/tests/usr.bin
 ./usr/libdata/debug/usr/tests/usr.bin/cpio
 ./usr/libdata/debug/usr/tests/usr.bin/id
@@ -402,6 +403,7 @@
 ./usr/tests/sys/netinet
 ./usr/tests/sys/netinet6
 ./usr/tests/sys/rc
+./usr/tests/sys/x86
 ./usr/tests/usr.bin
 ./usr/tests/usr.bin/awk
 ./usr/tests/usr.bin/basename
diff --git a/tests/sys/Makefile b/tests/sys/Makefile
index de28a0c258a5..0f0f0720fa18 100644
--- a/tests/sys/Makefile
+++ b/tests/sys/Makefile
@@ -10,5 +10,8 @@ TESTS_SUBDIRS+=   netatalk
 TESTS_SUBDIRS+=netinet
 TESTS_SUBDIRS+=netinet6
 TESTS_SUBDIRS+=rc
+.if ${MACHINE} == amd64 || ${MACHINE} == i386
+TESTS_SUBDIRS+=x86
+.endif
 
 .include 
diff --git a/tests/sys/x86/Makefile b/tests/sys/x86/Makefile
new file mode 100644
index ..8273b586429a
--- /dev/null
+++ b/tests/sys/x86/Makefile
@@ -0,0 +1,11 @@
+# $NetBSD$
+#
+
+.include 
+
+TESTSDIR=  ${TESTSBASE}/sys/x86
+CPPFLAGS+= -I${NETBSDSRCDIR}/sys
+
+TESTS_C=   t_convert_xmm_s87
+
+.include 
diff --git a/tests/sys/x86/t_convert_xmm_s87.c 
b/tests/sys/x86/t_convert_xmm_s87.c
new file mode 100644
index ..cefd74d6d572
--- /dev/null
+++ b/tests/sys/x86/t_convert_xmm_s87.c
@@ -0,0 +1,211 @@
+/* $NetBSD$*/
+
+/*-
+ * Copyright (c) 2020 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Michał Górny for Moritz Systems Technology Company Sp. z o.o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIAB

[PATCH v2 1/4] Revert "Merge convert_xmm_s87.c into fpu.c"

2020-10-12 Thread Michał Górny
I am going to add ATF tests for these two functions, and having them
in a separate file will make it more convenient to build and run them
in userspace.
---
 sys/arch/amd64/conf/files.amd64|   1 +
 sys/arch/i386/conf/files.i386  |   1 +
 sys/arch/x86/include/fpu.h |   3 +
 sys/arch/x86/x86/convert_xmm_s87.c | 156 +
 sys/arch/x86/x86/fpu.c | 120 --
 5 files changed, 161 insertions(+), 120 deletions(-)
 create mode 100644 sys/arch/x86/x86/convert_xmm_s87.c

diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
index 1ebf70169dc5..c7a12226af9a 100644
--- a/sys/arch/amd64/conf/files.amd64
+++ b/sys/arch/amd64/conf/files.amd64
@@ -51,6 +51,7 @@ file  arch/amd64/amd64/process_machdep.c  machdep
 file   arch/amd64/amd64/trap.c machdep
 file   arch/x86/x86/fpu.c  machdep
 file   arch/x86/x86/dbregs.c   machdep
+file   arch/x86/x86/convert_xmm_s87.c  machdep
 file   arch/x86/x86/spectre.c  machdep & !xenpv
 file   arch/amd64/amd64/lock_stubs.S   machdep
 file   dev/cons.c  machdep
diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386
index 5b9d3ef3017a..1cff33234412 100644
--- a/sys/arch/i386/conf/files.i386
+++ b/sys/arch/i386/conf/files.i386
@@ -69,6 +69,7 @@ file  arch/i386/i386/machdep.c
 file   arch/i386/i386/longrun.c
 file   arch/i386/i386/mtrr_k6.cmtrr
 file   arch/i386/i386/process_machdep.c
+file   arch/x86/x86/convert_xmm_s87.c
 file   arch/i386/i386/trap.c
 file   dev/cons.c
 file   arch/x86/x86/fpu.c
diff --git a/sys/arch/x86/include/fpu.h b/sys/arch/x86/include/fpu.h
index fe73037001f6..80ae18cb793b 100644
--- a/sys/arch/x86/include/fpu.h
+++ b/sys/arch/x86/include/fpu.h
@@ -24,6 +24,9 @@ void fpu_set_default_cw(struct lwp *, unsigned int);
 void fputrap(struct trapframe *);
 void fpudna(struct trapframe *);
 
+void process_xmm_to_s87(const struct fxsave *, struct save87 *);
+void process_s87_to_xmm(const struct save87 *, struct fxsave *);
+
 void fpu_clear(struct lwp *, unsigned int);
 void fpu_sigreset(struct lwp *);
 
diff --git a/sys/arch/x86/x86/convert_xmm_s87.c 
b/sys/arch/x86/x86/convert_xmm_s87.c
new file mode 100644
index ..53116545769e
--- /dev/null
+++ b/sys/arch/x86/x86/convert_xmm_s87.c
@@ -0,0 +1,156 @@
+/* $NetBSD: convert_xmm_s87.c,v 1.3 2014/02/15 22:20:42 dsl Exp $  */
+
+/*-
+ * Copyright (c) 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Charles M. Hannum; by Jason R. Thorpe of Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+__KERNEL_RCSID(0, "$NetBSD: convert_xmm_s87.c,v 1.3 2014/02/15 22:20:42 dsl 
Exp $");
+
+
+#include 
+#include 
+#include 
+
+void
+process_xmm_to_s87(const struct fxsave *sxmm, struct save87 *s87)
+{
+   unsigned int tag, ab_tag;
+   const struct fpaccfx *fx_reg;
+   struct fpacc87 *s87_reg;
+   int i;
+
+   /*
+* For historic reasons core dumps and ptrace all use the old save87
+* layout.  Convert the important parts.
+* getucontext gets what we give it.
+* setucontext should return something given by getucontext, but
+* we are (at the moment) willing to change it.
+*
+* It really isn't worth setting the 'tag' bits to 01 (zero) or
+* 10 (NaN etc) since the processor will set any internal bits
+* correctly when the value is loaded (the 287 believed them).
+*
+* Additionally the s87_tw an

[PATCH v2 3/4] Remove unnecessary include

2020-10-12 Thread Michał Górny
---
 sys/arch/x86/x86/convert_xmm_s87.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sys/arch/x86/x86/convert_xmm_s87.c 
b/sys/arch/x86/x86/convert_xmm_s87.c
index 0734fab3943c..3a2932958ace 100644
--- a/sys/arch/x86/x86/convert_xmm_s87.c
+++ b/sys/arch/x86/x86/convert_xmm_s87.c
@@ -34,7 +34,6 @@ __KERNEL_RCSID(0, "$NetBSD: convert_xmm_s87.c,v 1.3 
2014/02/15 22:20:42 dsl Exp
 
 
 #include 
-#include 
 #include 
 
 void
-- 
2.28.0



[PATCH v2 0/4] Fix s87_tw reconstruction to correctly indicate register states

2020-10-12 Thread Michał Górny
Hi,

Here's the same as previously but with tests.  I have noticed that
the two relevant functions were in a separate file before, and I have
decided to revert the patch in order to separate them again (for tests).

The tests use similar approach as sys/netatalk tests, i.e. #include
the .c file containing utility functions in order to compile and test
them in userspace.

The first test issues series of FSAVE/FXSAVE calls for different test
vectors.  The result of each is converted to the other format,
and compared to the result of corresponding instruction.

The two next tests use fixed test data to populate appropriate structs,
and compare the result of conversion with known expected values.

---

Michał Górny (4):
  Revert "Merge convert_xmm_s87.c into fpu.c"
  Fix s87_tw reconstruction to correctly indicate register states
  Remove unnecessary  include
  Add tests for process_xmm_to_s87() and process_s87_to_xmm()

 distrib/sets/lists/tests/mi|   5 +
 etc/mtree/NetBSD.dist.tests|   2 +
 sys/arch/amd64/conf/files.amd64|   1 +
 sys/arch/i386/conf/files.i386  |   1 +
 sys/arch/x86/include/fpu.h |   3 +
 sys/arch/x86/x86/convert_xmm_s87.c | 171 
 sys/arch/x86/x86/fpu.c | 120 --
 tests/lib/libc/sys/t_ptrace_x86_wait.h |   2 -
 tests/sys/Makefile |   3 +
 tests/sys/x86/Makefile |  11 ++
 tests/sys/x86/t_convert_xmm_s87.c  | 211 +
 11 files changed, 408 insertions(+), 122 deletions(-)
 create mode 100644 sys/arch/x86/x86/convert_xmm_s87.c
 create mode 100644 tests/sys/x86/Makefile
 create mode 100644 tests/sys/x86/t_convert_xmm_s87.c

-- 
2.28.0



[PATCH] Fix s87_tw reconstruction to correctly indicate register states

2020-10-11 Thread Michał Górny
Fix the code reconstructing s87_tw (full tag word) from fx_sw (abridged
tag word) to correctly represent all register states.  The previous code
only distinguished between empty/non-empty registers, and assigned
'regular value' to all non-empty registers.  The new code explicitly
distinguishes the two other tag word values: empty and special.
---
 sys/arch/x86/x86/fpu.c | 28 --
 tests/lib/libc/sys/t_ptrace_x86_wait.h |  2 --
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/sys/arch/x86/x86/fpu.c b/sys/arch/x86/x86/fpu.c
index d89c78dcef47..c580f34b0615 100644
--- a/sys/arch/x86/x86/fpu.c
+++ b/sys/arch/x86/x86/fpu.c
@@ -668,7 +668,7 @@ fpu_sigreset(struct lwp *l)
 static void
 process_xmm_to_s87(const struct fxsave *sxmm, struct save87 *s87)
 {
-   unsigned int tag, ab_tag;
+   unsigned int tag, ab_tag, st;
const struct fpaccfx *fx_reg;
struct fpacc87 *s87_reg;
int i;
@@ -723,12 +723,28 @@ process_xmm_to_s87(const struct fxsave *sxmm, struct 
save87 *s87)
return;
}
 
+   /* For ST(i), i = fpu_reg - top, we start with fpu_reg=7. */
+   st = 7 - ((sxmm->fx_sw >> 11) & 7);
tag = 0;
-   /* Separate bits of abridged tag word with zeros */
-   for (i = 0x80; i != 0; tag <<= 1, i >>= 1)
-   tag |= ab_tag & i;
-   /* Replicate and invert so that 0 => 0b11 and 1 => 0b00 */
-   s87->s87_tw = (tag | tag >> 1) ^ 0x;
+   for (i = 0x80; i != 0; i >>= 1) {
+   tag <<= 2;
+   if (ab_tag & i) {
+   unsigned int exp;
+   /* Non-empty - we need to check ST(i) */
+   fx_reg = &sxmm->fx_87_ac[st];
+   exp = fx_reg->r.f87_exp_sign & 0x7fff;
+   if (exp == 0) {
+   if (fx_reg->r.f87_mantissa == 0)
+   tag |= 1; /* Zero */
+   else
+   tag |= 2; /* Denormal */
+   } else if (exp == 0x7fff)
+   tag |= 2; /* Infinity or NaN */
+   } else
+   tag |= 3; /* Empty */
+   st = (st - 1) & 7;
+   }
+   s87->s87_tw = tag;
 }
 
 static void
diff --git a/tests/lib/libc/sys/t_ptrace_x86_wait.h 
b/tests/lib/libc/sys/t_ptrace_x86_wait.h
index d7e93fde55a3..6067066a36c1 100644
--- a/tests/lib/libc/sys/t_ptrace_x86_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h
@@ -3367,10 +3367,8 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
expected_fpu.cw);
ATF_CHECK_EQ(fpr.fstate.s87_sw,
expected_fpu.sw);
-#if 0 /* TODO: translation from FXSAVE is broken */
ATF_CHECK_EQ(fpr.fstate.s87_tw,
expected_fpu.tw);
-#endif
ATF_CHECK_EQ(fpr.fstate.s87_opcode,
expected_fpu.opcode);
ATF_CHECK_EQ(fpr.fstate.s87_ip.fa_32.fa_off,
-- 
2.28.0



[PATCH] Issue 64-bit (REX.W=1) versions of *XSAVE* on amd64

2020-10-01 Thread Michał Górny
When calling FXSAVE, XSAVE, FXRSTOR, ... on amd64 use the "64"-suffixed
variant in order to prefix the operand with REX.W=1.  This causes them
to use the FXSAVE64 variant of x87 area that includes complete FIP/FDP
registers.

The difference between the two variants is that the FXSAVE64 (new)
variant represents FIP/FDP as 64-bit fields (union fp_addr.fa_64),
while the legacy FXSAVE variant uses split fields: 32-bit offset,
16-bit segment and 16-bit reserved field (union fp_addr.fa_32).
The latter implies that the actual addresses are truncated to 32 bits
which is insufficient in modern programs.

This is a potentially breaking change.  However, I don't think it likely
to actually break anything because the data provided by the old variant
were not meaningful (because of the truncated pointer).
---
 sys/arch/x86/include/cpufunc.h | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/sys/arch/x86/include/cpufunc.h b/sys/arch/x86/include/cpufunc.h
index 38534c305544..e0ea1e3210b8 100644
--- a/sys/arch/x86/include/cpufunc.h
+++ b/sys/arch/x86/include/cpufunc.h
@@ -411,13 +411,19 @@ frstor(const void *addr)
);
 }
 
+#ifdef __x86_64__
+#define XSAVE_SUFFIX "64"
+#else
+#define XSAVE_SUFFIX
+#endif
+
 static inline void
 fxsave(void *addr)
 {
uint8_t *area = addr;
 
__asm volatile (
-   "fxsave %[area]"
+   "fxsave" XSAVE_SUFFIX " %[area]"
: [area] "=m" (*area)
:
: "memory"
@@ -430,7 +436,7 @@ fxrstor(const void *addr)
const uint8_t *area = addr;
 
__asm volatile (
-   "fxrstor %[area]"
+   "fxrstor" XSAVE_SUFFIX " %[area]"
:
: [area] "m" (*area)
: "memory"
@@ -446,7 +452,7 @@ xsave(void *addr, uint64_t mask)
low = mask;
high = mask >> 32;
__asm volatile (
-   "xsave  %[area]"
+   "xsave" XSAVE_SUFFIX "  %[area]"
: [area] "=m" (*area)
: "a" (low), "d" (high)
: "memory"
@@ -462,7 +468,7 @@ xsaveopt(void *addr, uint64_t mask)
low = mask;
high = mask >> 32;
__asm volatile (
-   "xsaveopt %[area]"
+   "xsaveopt" XSAVE_SUFFIX " %[area]"
: [area] "=m" (*area)
: "a" (low), "d" (high)
: "memory"
@@ -478,7 +484,7 @@ xrstor(const void *addr, uint64_t mask)
low = mask;
high = mask >> 32;
__asm volatile (
-   "xrstor %[area]"
+   "xrstor" XSAVE_SUFFIX " %[area]"
:
: [area] "m" (*area), "a" (low), "d" (high)
: "memory"
-- 
2.28.0



Re: Proposal to enable WAPBL by default for 10.0

2020-07-22 Thread Michał Górny
On Thu, 2020-07-23 at 05:17 +, David Holland wrote:
> On Wed, Jul 22, 2020 at 11:24:16PM +0200, Kamil Rytarowski wrote:
>  > I propose to enable WAPBL ("log" in fstab(5)) by default for 10.0 and 
> newer.
>  > [...]
>  >
>  > Rationale: the default filesystem (FFS) without WAPBL is more prone to
>  > data loss.
> 
> It is not, unfortunately. We had WAPBL on by default some time back
> and eventually switched it off.
> 
> The problem is that because it still doesn't do anything about
> journaling or preserving file contents, but runs a lot faster, it
> loses more data when interrupted.

How does that compare to the level of damage non-journaled FFS takes?
 My VM was just bricked a second time because /etc/passwd was turned to
junk.  I dare say that a proper metadata journaling + proper writes
(i.e. using rename() -- haven't verified whether that's done correctly)
should prevent that from happening again.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


[PATCH v5 3/4] Combine x86 register tests into unified test function

2019-12-24 Thread Michał Górny
Reduce the code duplication and improve maintainability of x86 register
tests by combining all of them to a single base function.
---
 tests/lib/libc/sys/t_ptrace_amd64_wait.h |  406 +---
 tests/lib/libc/sys/t_ptrace_i386_wait.h  |  335 +--
 tests/lib/libc/sys/t_ptrace_x86_wait.h   | 2417 ++
 3 files changed, 1103 insertions(+), 2055 deletions(-)

diff --git a/tests/lib/libc/sys/t_ptrace_amd64_wait.h 
b/tests/lib/libc/sys/t_ptrace_amd64_wait.h
index 1ea17ea1ec1a..0f410f3600d0 100644
--- a/tests/lib/libc/sys/t_ptrace_amd64_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_amd64_wait.h
@@ -111,415 +111,11 @@ ATF_TC_BODY(x86_64_regs1, tc)
TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
 }
 
-ATF_TC(x86_64_regs_gp_read);
-ATF_TC_HEAD(x86_64_regs_gp_read, tc)
-{
-   atf_tc_set_md_var(tc, "descr",
-   "Set general-purpose reg values from debugged program and read "
-   "them via PT_GETREGS, comparing values against expected.");
-}
-
-ATF_TC_BODY(x86_64_regs_gp_read, tc)
-{
-   const int exitval = 5;
-   pid_t child, wpid;
-#if defined(TWAIT_HAVE_STATUS)
-   const int sigval = SIGTRAP;
-   int status;
-#endif
-   struct reg gpr;
-
-   const uint64_t rax = 0x0001020304050607;
-   const uint64_t rbx = 0x1011121314151617;
-   const uint64_t rcx = 0x2021222324252627;
-   const uint64_t rdx = 0x3031323334353637;
-   const uint64_t rsi = 0x4041424344454647;
-   const uint64_t rdi = 0x5051525354555657;
-   const uint64_t rsp = 0x6061626364656667;
-   const uint64_t rbp = 0x7071727374757677;
-
-   DPRINTF("Before forking process PID=%d\n", getpid());
-   SYSCALL_REQUIRE((child = fork()) != -1);
-   if (child == 0) {
-   DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid());
-   FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
-
-   DPRINTF("Before running assembly from child\n");
-
-   __asm__ __volatile__(
-   /* rbp & rbp are a bit tricky, we must not clobber them 
*/
-   "movq%%rsp, %%r8\n\t"
-   "movq%%rbp, %%r9\n\t"
-   "movq%6, %%rsp\n\t"
-   "movq%7, %%rbp\n\t"
-   "\n\t"
-   "int3\n\t"
-   "\n\t"
-   "movq%%r8, %%rsp\n\t"
-   "movq%%r9, %%rbp\n\t"
-   :
-   : "a"(rax), "b"(rbx), "c"(rcx), "d"(rdx), "S"(rsi), 
"D"(rdi),
- "i"(rsp), "i"(rbp)
-   : "%r8", "%r9"
-   );
-
-   DPRINTF("Before exiting of the child process\n");
-   _exit(exitval);
-   }
-   DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child);
-
-   DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
-   TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
-
-   validate_status_stopped(status, sigval);
-
-   DPRINTF("Call GETREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETREGS, child, &gpr, 0) != -1);
-
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RAX], rax);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RBX], rbx);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RCX], rcx);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RDX], rdx);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RSI], rsi);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RDI], rdi);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RSP], rsp);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RBP], rbp);
-
-   DPRINTF("Before resuming the child process where it left off and "
-   "without signal to be sent\n");
-   SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1);
-
-   DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
-   TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
-
-   validate_status_exited(status, exitval);
-
-   DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
-   TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
-}
-
-ATF_TC(x86_64_regs_gp_write);
-ATF_TC_HEAD(x86_64_regs_gp_write, tc)
-{
-   atf_tc_set_md_var(tc, "descr",
-   "Set general-purpose reg values into a debugged program via "
-   "PT_SETREGS and compare the result against expected.");
-}
-
-ATF_TC_BODY(x86_64_regs_gp_write, tc)
-{
-   const int exitval = 5;
-   pid_t child, wpid;
-#if defined(TWAIT_HAVE_STATUS)
-   const int sigval = SIGTRAP;
-   int status;
-#endif
-   struct reg gpr;
-
-   const uint64_t rax = 0x0001020304050607;
-   const uint64_t rbx = 0x1011121314151617;
-   const uint64_t rcx = 0x2021222324252627;
-   const uint64_t rdx = 0x3031323334353637;
-   const uint64_t rsi = 0x4041424344454647;
-   const uint64_t rdi 

[PATCH v5 1/4] Include XSTATE note in x86 core dumps

2019-12-24 Thread Michał Górny
Introduce a simple COREDUMP_MACHDEP_LWP_NOTES logic to provide machdep
API for injecting per-LWP notes into coredumps, and use it to append
PT_GETXSTATE note.

Since the XSTATE block uses the same format on i386 and amd64, the code
does not have to conditionalize between 32-bit and 64-bit ELF format
on that.  However, it does need to distinguish between 32-bit and 64-bit
PT_* values.  In order to do that, it reuses PT32_* constant already
present for ptrace(), and adds a matching PT64_GETXSTATE to satisfy
the cpp logic.
---
 sys/arch/amd64/include/ptrace.h | 16 
 sys/arch/i386/include/ptrace.h  | 12 
 sys/kern/core_elf32.c   |  6 +-
 3 files changed, 33 insertions(+), 1 deletion(-)

Changes in v5:
- used roundup() in patch 2

diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h
index f9a98a63c2b1..987a13923b85 100644
--- a/sys/arch/amd64/include/ptrace.h
+++ b/sys/arch/amd64/include/ptrace.h
@@ -105,6 +105,22 @@ int process_machdep_validfpu(struct proc *);
 MODULE_HOOK(netbsd32_process_doxmmregs_hook, int,
 (struct lwp *, struct lwp *, void *, bool));
 
+#ifdef EXEC_ELF32
+#include 
+#endif
+#define PT64_GETXSTATE PT_GETXSTATE
+#define COREDUMP_MACHDEP_LWP_NOTES(l, ns, name)
\
+{  \
+   struct xstate xstate;   \
+   memset(&xstate, 0, sizeof(xstate)); \
+   if (!process_read_xstate(l, &xstate))   \
+   {   \
+   ELFNAMEEND(coredump_savenote)(ns,   \
+   CONCAT(CONCAT(PT, ELFSIZE), _GETXSTATE), name,  \
+   &xstate, sizeof(xstate));   \
+   }   \
+}
+
 #endif /* _KERNEL */
 
 #ifdef _KERNEL_OPT
diff --git a/sys/arch/i386/include/ptrace.h b/sys/arch/i386/include/ptrace.h
index 41ad7a119f0b..561be6440ac0 100644
--- a/sys/arch/i386/include/ptrace.h
+++ b/sys/arch/i386/include/ptrace.h
@@ -159,6 +159,18 @@
{ DT_REG, N("xmmregs"), Pmachdep_xmmregs,   \
  procfs_machdep_validxmmregs },
 
+#define COREDUMP_MACHDEP_LWP_NOTES(l, ns, name)
\
+{  \
+   struct xstate xstate;   \
+   memset(&xstate, 0, sizeof(xstate)); \
+   if (!process_read_xstate(l, &xstate))   \
+   {   \
+   ELFNAMEEND(coredump_savenote)(ns,   \
+   CONCAT(CONCAT(PT, ELFSIZE), _GETXSTATE), name,  \
+   &xstate, sizeof(xstate));   \
+   }   \
+}
+
 struct xmmregs;
 
 /* Functions used by both ptrace(2) and procfs. */
diff --git a/sys/kern/core_elf32.c b/sys/kern/core_elf32.c
index 7db43d0cc10b..95e5658053c8 100644
--- a/sys/kern/core_elf32.c
+++ b/sys/kern/core_elf32.c
@@ -506,7 +506,11 @@ ELFNAMEEND(coredump_note)(struct lwp *l, struct note_state 
*ns)
 
ELFNAMEEND(coredump_savenote)(ns, PT_GETFPREGS, name, &freg, freglen);
 #endif
-   /* XXX Add hook for machdep per-LWP notes. */
+
+#ifdef COREDUMP_MACHDEP_LWP_NOTES
+   COREDUMP_MACHDEP_LWP_NOTES(l, ns, name);
+#endif
+
return (0);
 }
 
-- 
2.24.1



[PATCH v5 4/4] Add tests for reading registers from x86 core dumps

2019-12-24 Thread Michał Górny
---
 tests/lib/libc/sys/t_ptrace_x86_wait.h | 187 +++--
 1 file changed, 141 insertions(+), 46 deletions(-)

diff --git a/tests/lib/libc/sys/t_ptrace_x86_wait.h 
b/tests/lib/libc/sys/t_ptrace_x86_wait.h
index 4572e3eb8b23..7867f1e8dd1c 100644
--- a/tests/lib/libc/sys/t_ptrace_x86_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h
@@ -2225,7 +2225,8 @@ enum x86_test_registers {
 
 enum x86_test_regmode {
TEST_GETREGS,
-   TEST_SETREGS
+   TEST_SETREGS,
+   TEST_COREDUMP
 };
 
 static __inline void get_gp32_regs(union x86_test_register out[])
@@ -2687,8 +2688,10 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
 #endif
struct xstate xst;
struct iovec iov;
-   struct fxsave* fxs;
+   struct fxsave* fxs = NULL;
uint64_t xst_flags = 0;
+   char core_path[] = "/tmp/core.XX";
+   int core_fd;
 
const union x86_test_register expected[] __aligned(32) = {
{{ 0x0706050403020100, 0x0F0E0D0C0B0A0908,
@@ -2796,6 +2799,7 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
DPRINTF("Before running assembly from child\n");
switch (regmode) {
case TEST_GETREGS:
+   case TEST_COREDUMP:
switch (regs) {
case GPREGS_32:
set_gp32_regs(expected);
@@ -2969,40 +2973,7 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
 
validate_status_stopped(status, sigval);
 
-   switch (regset) {
-   case TEST_GPREGS:
-   ATF_REQUIRE(regs < FPREGS_MM);
-   DPRINTF("Call GETREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETREGS, child, &gpr, 0) != -1);
-   break;
-   case TEST_XMMREGS:
-#if defined(__i386__)
-   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
-   DPRINTF("Call GETXMMREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETXMMREGS, child, &xmm, 0) != -1);
-   fxs = &xmm.fxstate;
-   break;
-#else
-   /*FALLTHROUGH*/
-#endif
-   case TEST_FPREGS:
-#if defined(__x86_64__)
-   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
-   fxs = &fpr.fxstate;
-#else
-   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_XMM);
-#endif
-   DPRINTF("Call GETFPREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETFPREGS, child, &fpr, 0) != -1);
-   break;
-   case TEST_XSTATE:
-   ATF_REQUIRE(regs >= FPREGS_MM);
-   iov.iov_base = &xst;
-   iov.iov_len = sizeof(xst);
-
-   DPRINTF("Call GETXSTATE for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1);
-
+   if (regset == TEST_XSTATE) {
switch (regs) {
case FPREGS_MM:
xst_flags |= XCR0_X87;
@@ -3020,21 +2991,117 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
__unreachable();
break;
}
+   }
 
-   ATF_REQUIRE((xst.xs_rfbm & xst_flags) == xst_flags);
-   switch (regmode) {
-   case TEST_SETREGS:
-   xst.xs_rfbm = xst_flags;
-   xst.xs_xstate_bv = xst_flags;
+   switch (regmode) {
+   case TEST_GETREGS:
+   case TEST_SETREGS:
+   switch (regset) {
+   case TEST_GPREGS:
+   ATF_REQUIRE(regs < FPREGS_MM);
+   DPRINTF("Call GETREGS for the child process\n");
+   SYSCALL_REQUIRE(ptrace(PT_GETREGS, child, &gpr, 0)
+   != -1);
break;
-   case TEST_GETREGS:
+   case TEST_XMMREGS:
+#if defined(__i386__)
+   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
+   DPRINTF("Call GETXMMREGS for the child process\n");
+   SYSCALL_REQUIRE(ptrace(PT_GETXMMREGS, child, &xmm, 0)
+   != -1);
+   fxs = &xmm.fxstate;
+   break;
+#else
+   /*FALLTHROUGH*/
+#endif
+   case TEST_FPREGS:
+#if defined(__x86_64__)
+   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
+   fxs = &fpr.fxstate;
+#else
+   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_XMM);
+#endif
+   DPRINTF("Call GETFPREGS for the child process\n");
+   SYSCALL_REQUIRE(ptrace(PT_GETFPREGS, child, &fpr, 0)
+   != -1);
+   break;
+   case TEST_XSTATE:
+   A

[PATCH v5 2/4] Fix alignment when reading core notes

2019-12-24 Thread Michał Górny
Both desc and note header needs to be aligned.  Therefore, we need
to realign after skipping past desc as well.

While at it, fix the other alignment fix to use roundup() macro.
---
 tests/lib/libc/sys/t_ptrace_wait.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

Changes in v5:
- use roundup() to align offsets.

diff --git a/tests/lib/libc/sys/t_ptrace_wait.c 
b/tests/lib/libc/sys/t_ptrace_wait.c
index 6fc43572e015..b20c94295326 100644
--- a/tests/lib/libc/sys/t_ptrace_wait.c
+++ b/tests/lib/libc/sys/t_ptrace_wait.c
@@ -7590,8 +7590,7 @@ static ssize_t core_find_note(const char *core_path,
 
offset += note_hdr.n_namesz;
/* fix to alignment */
-   offset = ((offset + core_hdr.p_align - 1)
-   / core_hdr.p_align) * core_hdr.p_align;
+   offset = roundup(offset, core_hdr.p_align);
 
/* if name & type matched above */
if (ret != -1) {
@@ -7603,6 +7602,8 @@ static ssize_t core_find_note(const char *core_path,
}
 
offset += note_hdr.n_descsz;
+   /* fix to alignment */
+   offset = roundup(offset, core_hdr.p_align);
}
}
 
-- 
2.24.1



Re: [PATCH v4 2/4] Fix alignment when reading core notes

2019-12-23 Thread Michał Górny
On Tue, 2019-12-24 at 06:57 +, Taylor R Campbell wrote:
> > Date: Tue, 24 Dec 2019 07:52:14 +0100
> > From: Micha�  Górny 
> > 
> > On Mon, 2019-12-23 at 16:17 +, Christos Zoulas wrote:
> > > In article <20191222164104.165346-2-mgo...@gentoo.org>,
> > > Micha�  Górny   wrote:
> > > > Both desc and note header needs to be aligned.  Therefore, we need
> > > > to realign after skipping past desc as well.
> > > 
> > > Should probably be done with a standard alignment macro?
> > 
> > Could you be more specific?  I don't see any macro that would accept
> > actual alignment value.  Well, besides STACK_ALIGN() but that seems to
> > do some weird casting, so I presume it's not semantically correct here.
> 
> Instead of
> 
> + offset = ((offset + core_hdr.p_align - 1)
> + / core_hdr.p_align) * core_hdr.p_align;
> 
> do
> 
> + offset = roundup(offset, core_hdr.p_align);

Thanks.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v4 2/4] Fix alignment when reading core notes

2019-12-23 Thread Michał Górny
On Mon, 2019-12-23 at 16:17 +, Christos Zoulas wrote:
> In article <20191222164104.165346-2-mgo...@gentoo.org>,
> Micha�  Górny   wrote:
> > Both desc and note header needs to be aligned.  Therefore, we need
> > to realign after skipping past desc as well.
> 
> Should probably be done with a standard alignment macro?
> 

Could you be more specific?  I don't see any macro that would accept
actual alignment value.  Well, besides STACK_ALIGN() but that seems to
do some weird casting, so I presume it's not semantically correct here.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


[PATCH v4 3/4] Combine x86 register tests into unified test function

2019-12-22 Thread Michał Górny
Reduce the code duplication and improve maintainability of x86 register
tests by combining all of them to a single base function.
---
 tests/lib/libc/sys/t_ptrace_amd64_wait.h |  406 +---
 tests/lib/libc/sys/t_ptrace_i386_wait.h  |  335 +--
 tests/lib/libc/sys/t_ptrace_x86_wait.h   | 2417 ++
 3 files changed, 1103 insertions(+), 2055 deletions(-)

diff --git a/tests/lib/libc/sys/t_ptrace_amd64_wait.h 
b/tests/lib/libc/sys/t_ptrace_amd64_wait.h
index 1ea17ea1ec1a..0f410f3600d0 100644
--- a/tests/lib/libc/sys/t_ptrace_amd64_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_amd64_wait.h
@@ -111,415 +111,11 @@ ATF_TC_BODY(x86_64_regs1, tc)
TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
 }
 
-ATF_TC(x86_64_regs_gp_read);
-ATF_TC_HEAD(x86_64_regs_gp_read, tc)
-{
-   atf_tc_set_md_var(tc, "descr",
-   "Set general-purpose reg values from debugged program and read "
-   "them via PT_GETREGS, comparing values against expected.");
-}
-
-ATF_TC_BODY(x86_64_regs_gp_read, tc)
-{
-   const int exitval = 5;
-   pid_t child, wpid;
-#if defined(TWAIT_HAVE_STATUS)
-   const int sigval = SIGTRAP;
-   int status;
-#endif
-   struct reg gpr;
-
-   const uint64_t rax = 0x0001020304050607;
-   const uint64_t rbx = 0x1011121314151617;
-   const uint64_t rcx = 0x2021222324252627;
-   const uint64_t rdx = 0x3031323334353637;
-   const uint64_t rsi = 0x4041424344454647;
-   const uint64_t rdi = 0x5051525354555657;
-   const uint64_t rsp = 0x6061626364656667;
-   const uint64_t rbp = 0x7071727374757677;
-
-   DPRINTF("Before forking process PID=%d\n", getpid());
-   SYSCALL_REQUIRE((child = fork()) != -1);
-   if (child == 0) {
-   DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid());
-   FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
-
-   DPRINTF("Before running assembly from child\n");
-
-   __asm__ __volatile__(
-   /* rbp & rbp are a bit tricky, we must not clobber them 
*/
-   "movq%%rsp, %%r8\n\t"
-   "movq%%rbp, %%r9\n\t"
-   "movq%6, %%rsp\n\t"
-   "movq%7, %%rbp\n\t"
-   "\n\t"
-   "int3\n\t"
-   "\n\t"
-   "movq%%r8, %%rsp\n\t"
-   "movq%%r9, %%rbp\n\t"
-   :
-   : "a"(rax), "b"(rbx), "c"(rcx), "d"(rdx), "S"(rsi), 
"D"(rdi),
- "i"(rsp), "i"(rbp)
-   : "%r8", "%r9"
-   );
-
-   DPRINTF("Before exiting of the child process\n");
-   _exit(exitval);
-   }
-   DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child);
-
-   DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
-   TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
-
-   validate_status_stopped(status, sigval);
-
-   DPRINTF("Call GETREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETREGS, child, &gpr, 0) != -1);
-
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RAX], rax);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RBX], rbx);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RCX], rcx);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RDX], rdx);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RSI], rsi);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RDI], rdi);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RSP], rsp);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RBP], rbp);
-
-   DPRINTF("Before resuming the child process where it left off and "
-   "without signal to be sent\n");
-   SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1);
-
-   DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
-   TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
-
-   validate_status_exited(status, exitval);
-
-   DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
-   TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
-}
-
-ATF_TC(x86_64_regs_gp_write);
-ATF_TC_HEAD(x86_64_regs_gp_write, tc)
-{
-   atf_tc_set_md_var(tc, "descr",
-   "Set general-purpose reg values into a debugged program via "
-   "PT_SETREGS and compare the result against expected.");
-}
-
-ATF_TC_BODY(x86_64_regs_gp_write, tc)
-{
-   const int exitval = 5;
-   pid_t child, wpid;
-#if defined(TWAIT_HAVE_STATUS)
-   const int sigval = SIGTRAP;
-   int status;
-#endif
-   struct reg gpr;
-
-   const uint64_t rax = 0x0001020304050607;
-   const uint64_t rbx = 0x1011121314151617;
-   const uint64_t rcx = 0x2021222324252627;
-   const uint64_t rdx = 0x3031323334353637;
-   const uint64_t rsi = 0x4041424344454647;
-   const uint64_t rdi 

[PATCH v4 4/4] Add tests for reading registers from x86 core dumps

2019-12-22 Thread Michał Górny
---
 tests/lib/libc/sys/t_ptrace_x86_wait.h | 187 +++--
 1 file changed, 141 insertions(+), 46 deletions(-)

diff --git a/tests/lib/libc/sys/t_ptrace_x86_wait.h 
b/tests/lib/libc/sys/t_ptrace_x86_wait.h
index 4572e3eb8b23..7867f1e8dd1c 100644
--- a/tests/lib/libc/sys/t_ptrace_x86_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h
@@ -2225,7 +2225,8 @@ enum x86_test_registers {
 
 enum x86_test_regmode {
TEST_GETREGS,
-   TEST_SETREGS
+   TEST_SETREGS,
+   TEST_COREDUMP
 };
 
 static __inline void get_gp32_regs(union x86_test_register out[])
@@ -2687,8 +2688,10 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
 #endif
struct xstate xst;
struct iovec iov;
-   struct fxsave* fxs;
+   struct fxsave* fxs = NULL;
uint64_t xst_flags = 0;
+   char core_path[] = "/tmp/core.XX";
+   int core_fd;
 
const union x86_test_register expected[] __aligned(32) = {
{{ 0x0706050403020100, 0x0F0E0D0C0B0A0908,
@@ -2796,6 +2799,7 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
DPRINTF("Before running assembly from child\n");
switch (regmode) {
case TEST_GETREGS:
+   case TEST_COREDUMP:
switch (regs) {
case GPREGS_32:
set_gp32_regs(expected);
@@ -2969,40 +2973,7 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
 
validate_status_stopped(status, sigval);
 
-   switch (regset) {
-   case TEST_GPREGS:
-   ATF_REQUIRE(regs < FPREGS_MM);
-   DPRINTF("Call GETREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETREGS, child, &gpr, 0) != -1);
-   break;
-   case TEST_XMMREGS:
-#if defined(__i386__)
-   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
-   DPRINTF("Call GETXMMREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETXMMREGS, child, &xmm, 0) != -1);
-   fxs = &xmm.fxstate;
-   break;
-#else
-   /*FALLTHROUGH*/
-#endif
-   case TEST_FPREGS:
-#if defined(__x86_64__)
-   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
-   fxs = &fpr.fxstate;
-#else
-   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_XMM);
-#endif
-   DPRINTF("Call GETFPREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETFPREGS, child, &fpr, 0) != -1);
-   break;
-   case TEST_XSTATE:
-   ATF_REQUIRE(regs >= FPREGS_MM);
-   iov.iov_base = &xst;
-   iov.iov_len = sizeof(xst);
-
-   DPRINTF("Call GETXSTATE for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1);
-
+   if (regset == TEST_XSTATE) {
switch (regs) {
case FPREGS_MM:
xst_flags |= XCR0_X87;
@@ -3020,21 +2991,117 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
__unreachable();
break;
}
+   }
 
-   ATF_REQUIRE((xst.xs_rfbm & xst_flags) == xst_flags);
-   switch (regmode) {
-   case TEST_SETREGS:
-   xst.xs_rfbm = xst_flags;
-   xst.xs_xstate_bv = xst_flags;
+   switch (regmode) {
+   case TEST_GETREGS:
+   case TEST_SETREGS:
+   switch (regset) {
+   case TEST_GPREGS:
+   ATF_REQUIRE(regs < FPREGS_MM);
+   DPRINTF("Call GETREGS for the child process\n");
+   SYSCALL_REQUIRE(ptrace(PT_GETREGS, child, &gpr, 0)
+   != -1);
break;
-   case TEST_GETREGS:
+   case TEST_XMMREGS:
+#if defined(__i386__)
+   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
+   DPRINTF("Call GETXMMREGS for the child process\n");
+   SYSCALL_REQUIRE(ptrace(PT_GETXMMREGS, child, &xmm, 0)
+   != -1);
+   fxs = &xmm.fxstate;
+   break;
+#else
+   /*FALLTHROUGH*/
+#endif
+   case TEST_FPREGS:
+#if defined(__x86_64__)
+   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
+   fxs = &fpr.fxstate;
+#else
+   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_XMM);
+#endif
+   DPRINTF("Call GETFPREGS for the child process\n");
+   SYSCALL_REQUIRE(ptrace(PT_GETFPREGS, child, &fpr, 0)
+   != -1);
+   break;
+   case TEST_XSTATE:
+   A

[PATCH v4 2/4] Fix alignment when reading core notes

2019-12-22 Thread Michał Górny
Both desc and note header needs to be aligned.  Therefore, we need
to realign after skipping past desc as well.
---
 tests/lib/libc/sys/t_ptrace_wait.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/lib/libc/sys/t_ptrace_wait.c 
b/tests/lib/libc/sys/t_ptrace_wait.c
index 6fc43572e015..1097a351fd92 100644
--- a/tests/lib/libc/sys/t_ptrace_wait.c
+++ b/tests/lib/libc/sys/t_ptrace_wait.c
@@ -7603,6 +7603,9 @@ static ssize_t core_find_note(const char *core_path,
}
 
offset += note_hdr.n_descsz;
+   /* fix to alignment */
+   offset = ((offset + core_hdr.p_align - 1)
+   / core_hdr.p_align) * core_hdr.p_align;
}
}
 
-- 
2.24.1



[PATCH v4 1/4] Include XSTATE note in x86 core dumps

2019-12-22 Thread Michał Górny
Introduce a simple COREDUMP_MACHDEP_LWP_NOTES logic to provide machdep
API for injecting per-LWP notes into coredumps, and use it to append
PT_GETXSTATE note.

Since the XSTATE block uses the same format on i386 and amd64, the code
does not have to conditionalize between 32-bit and 64-bit ELF format
on that.  However, it does need to distinguish between 32-bit and 64-bit
PT_* values.  In order to do that, it reuses PT32_* constant already
present for ptrace(), and adds a matching PT64_GETXSTATE to satisfy
the cpp logic.
---
 sys/arch/amd64/include/ptrace.h | 16 
 sys/arch/i386/include/ptrace.h  | 12 
 sys/kern/core_elf32.c   |  6 +-
 3 files changed, 33 insertions(+), 1 deletion(-)

Changes since v3:
- instead of returning early on xstate getting errors, just skip
  the entry,
- pass all relevant variables as macro parameters.

No changes in the remaining patches, resending them due to long delay
from last submission.

diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h
index f9a98a63c2b1..987a13923b85 100644
--- a/sys/arch/amd64/include/ptrace.h
+++ b/sys/arch/amd64/include/ptrace.h
@@ -105,6 +105,22 @@ int process_machdep_validfpu(struct proc *);
 MODULE_HOOK(netbsd32_process_doxmmregs_hook, int,
 (struct lwp *, struct lwp *, void *, bool));
 
+#ifdef EXEC_ELF32
+#include 
+#endif
+#define PT64_GETXSTATE PT_GETXSTATE
+#define COREDUMP_MACHDEP_LWP_NOTES(l, ns, name)
\
+{  \
+   struct xstate xstate;   \
+   memset(&xstate, 0, sizeof(xstate)); \
+   if (!process_read_xstate(l, &xstate))   \
+   {   \
+   ELFNAMEEND(coredump_savenote)(ns,   \
+   CONCAT(CONCAT(PT, ELFSIZE), _GETXSTATE), name,  \
+   &xstate, sizeof(xstate));   \
+   }   \
+}
+
 #endif /* _KERNEL */
 
 #ifdef _KERNEL_OPT
diff --git a/sys/arch/i386/include/ptrace.h b/sys/arch/i386/include/ptrace.h
index 41ad7a119f0b..561be6440ac0 100644
--- a/sys/arch/i386/include/ptrace.h
+++ b/sys/arch/i386/include/ptrace.h
@@ -159,6 +159,18 @@
{ DT_REG, N("xmmregs"), Pmachdep_xmmregs,   \
  procfs_machdep_validxmmregs },
 
+#define COREDUMP_MACHDEP_LWP_NOTES(l, ns, name)
\
+{  \
+   struct xstate xstate;   \
+   memset(&xstate, 0, sizeof(xstate)); \
+   if (!process_read_xstate(l, &xstate))   \
+   {   \
+   ELFNAMEEND(coredump_savenote)(ns,   \
+   CONCAT(CONCAT(PT, ELFSIZE), _GETXSTATE), name,  \
+   &xstate, sizeof(xstate));   \
+   }   \
+}
+
 struct xmmregs;
 
 /* Functions used by both ptrace(2) and procfs. */
diff --git a/sys/kern/core_elf32.c b/sys/kern/core_elf32.c
index 7db43d0cc10b..95e5658053c8 100644
--- a/sys/kern/core_elf32.c
+++ b/sys/kern/core_elf32.c
@@ -506,7 +506,11 @@ ELFNAMEEND(coredump_note)(struct lwp *l, struct note_state 
*ns)
 
ELFNAMEEND(coredump_savenote)(ns, PT_GETFPREGS, name, &freg, freglen);
 #endif
-   /* XXX Add hook for machdep per-LWP notes. */
+
+#ifdef COREDUMP_MACHDEP_LWP_NOTES
+   COREDUMP_MACHDEP_LWP_NOTES(l, ns, name);
+#endif
+
return (0);
 }
 
-- 
2.24.1



Re: [PATCH v3] Include XSTATE note in x86 core dumps

2019-12-22 Thread Michał Górny
Hi,

First of all, I'm sorry for not replying for this long.  I've got
distracted with other things, and I've completely forgotten about this.

On Tue, 2019-07-16 at 05:23 +1000, matthew green wrote:
> > Introduce a simple COREDUMP_MACHDEP_LWP_NOTES logic to provide machdep
> > API for injecting per-LWP notes into coredumps, and use it to append
> > PT_GETXSTATE note.
> 
> this macro is pretty gross.  these are the problems i see:
> 
> - assumes 'int error'

This goes away if we're removing error handling.

> - assumes 'struct lwp *l = curlwp'
> - assumes 'name' is something
> - assumes 'ns' is something

Made into parameters.

> - returns from macro

Resolved as you suggested -- we're silently skipping this entry if it
fails.

I'm going to resend the whole series as v4 now.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: PT32_[GS]ETXMMREGS for amd64 (Was: netbsd32_{,u}int64 in sys/types.h for compat/sys/siginfo.h)

2019-11-21 Thread Michał Górny
On Fri, 2019-11-22 at 10:10 +0900, Rin Okuyama wrote:
> Hi, thank you for your review!
> 
> On 2019/11/22 0:48, Kamil Rytarowski wrote:
> > On 21.11.2019 10:49, Rin Okuyama wrote:
> ...
> > > I wrote a draft version of patch which adds PT32_[GS]ETXMMREGS support:
> > > 
> > > http://www.netbsd.org/~rin/amd64-PT32_GSETXMMREGS-20191121.patch
> > > 
> > > With this patch, XMM-related tests pass for COMPAT_NETBSD32 on amd64.
> > > 
> > > Some remarks:
> > > 
> > > (1) PT_[GS]ETXMMREGS ptrace(2) commands are added to .
> > >  These are only used for COMPAT_NETBSD32, and not exposed to userland.
> > > 
> > 
> > This is correct.
> > We don't want to export XMMREGS to amd64 users.
> > 
> > Pleae remove /* */ from this part:
> > 
> > +/*
> > +   "PT_GETXMMREGS", \
> > +   "PT_SETXMMREGS"
> > + */
> 
> Yes, I will.
> 
> > This will allow ktruss and related software to have meaningful form for
> > compat32 ptracing.
> > 
> > > (2) COMPAT_NETBSD32 codes are called from process_machdep.c via
> > >  module_hook framework. This may be too much though...
> > > 
> > 
> > I have no opinion here.
> 
> Now, Paul and I are discussed how to improve the usage of module_hook.
> I will update this part in accordance with his advice.
> 
> > > Comments?
> > > 
> > 
> > I will leave this to be reviewed by mgorny@. Adding him to CC.
> 
> I see. Hi, mgorny@. Please look into it!
> > > http://www.netbsd.org/~rin/amd64-PT32_GSETXMMREGS-20191121.patch
> > > > 

It seems correct at a first glance.  The hook logic is unknown to me, so
I can't comment on that.  If I was to be picky, I'd prefer if changes
in existing code that are not exactly relevant to adding this were split
into a separate patches (e.g. changing int to bool, renaming valid*
func).

-- 
Best regards,
Michał Górny



Re: [PATCH v3] Include XSTATE note in x86 core dumps

2019-07-16 Thread Michał Górny
On Tue, 2019-07-16 at 05:23 +1000, matthew green wrote:
> > Introduce a simple COREDUMP_MACHDEP_LWP_NOTES logic to provide machdep
> > API for injecting per-LWP notes into coredumps, and use it to append
> > PT_GETXSTATE note.
> 
> this macro is pretty gross.  these are the problems i see:
> 
> - assumes 'int error'
> - assumes 'struct lwp *l = curlwp'
> - assumes 'name' is something
> - assumes 'ns' is something
> 

It also relies on ELFSIZE being defined in unit using the macro.  Should
I make that into a parameter as well?

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v3] Include XSTATE note in x86 core dumps

2019-07-16 Thread Michał Górny
On Tue, 2019-07-16 at 05:23 +1000, matthew green wrote:
> > Introduce a simple COREDUMP_MACHDEP_LWP_NOTES logic to provide machdep
> > API for injecting per-LWP notes into coredumps, and use it to append
> > PT_GETXSTATE note.
> 
> this macro is pretty gross.  these are the problems i see:
> 
> - assumes 'int error'
> - assumes 'struct lwp *l = curlwp'
> - assumes 'name' is something
> - assumes 'ns' is something
> - returns from macro
> 
> all of those should be resolved.  4 are simple, they can be easy
> inputs.  the return issue is more difficult, and perhaps should
> be revisited (ie, perhaps just skip over this entry instead of
> assuming return is right.)

Will do.  I have mixed feelings about ignoring errors but I guess it
won't hurt that much.

> 
> > Since the XSTATE block uses the same format on i386 and amd64, the code
> > does not have to conditionalize between 32-bit and 64-bit ELF format
> > on that.  However, it does need to distinguish between 32-bit and 64-bit
> > PT_* values.  In order to do that, it reuses PT32_* constant already
> > present for ptrace(), and adds a matching PT64_GETXSTATE to satisfy
> > the cpp logic.
> 
> i don't understand why the need to include netbsd32 headers or
> define these separately.
> 
> if the layout is the same, what's the benefit?  we already know
> to assume 32 or 64 bit from the ELF core header, right?
> 

That's because PT_GETXSTATE constant has different value on amd64
and i386.  We need to use appropriate value depending on whether
the running program is 64- or 32-bit.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v2] Include XSTATE note in x86 core dumps

2019-07-15 Thread Michał Górny
On Sun, 2019-07-14 at 16:17 +0200, Maxime Villard wrote:
> Le 05/07/2019 à 17:22, Michał Górny a écrit :
> > +#ifdef EXEC_ELF32
> > +#include 
> > +#endif
> > +#define PT64_GETXSTATE PT_GETXSTATE
> > +#define COREDUMP_MACHDEP_LWP_NOTES \
> > +{  \
> > +   struct xstate xstate;   \
> 
> memset needed, otherwise we leak stuff

Good catch, thanks.  I've sent v3 to the ml but accidentally forgot to
set in-reply-to.

> 
> > +   error = process_read_xstate(l, &xstate);\
> > +   if (error)  \
> > +   return (error); \
> > +   ELFNAMEEND(coredump_savenote)(ns,   \
> > +   CONCAT(CONCAT(PT, ELFSIZE), _GETXSTATE), name, &xstate, \
> > +   sizeof(xstate));\
> > +}

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


[PATCH v3] Include XSTATE note in x86 core dumps

2019-07-15 Thread Michał Górny
Introduce a simple COREDUMP_MACHDEP_LWP_NOTES logic to provide machdep
API for injecting per-LWP notes into coredumps, and use it to append
PT_GETXSTATE note.

Since the XSTATE block uses the same format on i386 and amd64, the code
does not have to conditionalize between 32-bit and 64-bit ELF format
on that.  However, it does need to distinguish between 32-bit and 64-bit
PT_* values.  In order to do that, it reuses PT32_* constant already
present for ptrace(), and adds a matching PT64_GETXSTATE to satisfy
the cpp logic.
---
 sys/arch/amd64/include/ptrace.h | 16 
 sys/arch/i386/include/ptrace.h  | 11 +++
 sys/kern/core_elf32.c   |  6 +-
 3 files changed, 32 insertions(+), 1 deletion(-)

/* CHANGED in v3:
 * added memset() on xstate to prevent leaking data
 */

diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h
index 44b38d750f9c..a8338b32fa58 100644
--- a/sys/arch/amd64/include/ptrace.h
+++ b/sys/arch/amd64/include/ptrace.h
@@ -90,6 +90,22 @@
 int process_machdep_doxstate(struct lwp *, struct lwp *, struct uio *);
 int process_machdep_validxstate(struct proc *);
 
+#ifdef EXEC_ELF32
+#include 
+#endif
+#define PT64_GETXSTATE PT_GETXSTATE
+#define COREDUMP_MACHDEP_LWP_NOTES \
+{  \
+   struct xstate xstate;   \
+   memset(&xstate, 0, sizeof(xstate)); \
+   error = process_read_xstate(l, &xstate);\
+   if (error)  \
+   return (error); \
+   ELFNAMEEND(coredump_savenote)(ns,   \
+   CONCAT(CONCAT(PT, ELFSIZE), _GETXSTATE), name, &xstate, \
+   sizeof(xstate));\
+}
+
 #endif /* _KERNEL */
 
 #ifdef _KERNEL_OPT
diff --git a/sys/arch/i386/include/ptrace.h b/sys/arch/i386/include/ptrace.h
index 41ad7a119f0b..c9ec102a12dc 100644
--- a/sys/arch/i386/include/ptrace.h
+++ b/sys/arch/i386/include/ptrace.h
@@ -159,6 +159,17 @@
{ DT_REG, N("xmmregs"), Pmachdep_xmmregs,   \
  procfs_machdep_validxmmregs },
 
+#define COREDUMP_MACHDEP_LWP_NOTES \
+{  \
+   struct xstate xstate;   \
+   memset(&xstate, 0, sizeof(xstate)); \
+   error = process_read_xstate(l, &xstate);\
+   if (error)  \
+   return (error); \
+   ELFNAMEEND(coredump_savenote)(ns, PT_GETXSTATE, name, &xstate,  \
+   sizeof(xstate));\
+}
+
 struct xmmregs;
 
 /* Functions used by both ptrace(2) and procfs. */
diff --git a/sys/kern/core_elf32.c b/sys/kern/core_elf32.c
index 5b69cab2ab74..ce90a1e44fa9 100644
--- a/sys/kern/core_elf32.c
+++ b/sys/kern/core_elf32.c
@@ -500,7 +500,11 @@ ELFNAMEEND(coredump_note)(struct lwp *l, struct note_state 
*ns)
 
ELFNAMEEND(coredump_savenote)(ns, PT_GETFPREGS, name, &freg, freglen);
 #endif
-   /* XXX Add hook for machdep per-LWP notes. */
+
+#ifdef COREDUMP_MACHDEP_LWP_NOTES
+   COREDUMP_MACHDEP_LWP_NOTES
+#endif
+
return (0);
 }
 
-- 
2.22.0



[PATCH v2] Include XSTATE note in x86 core dumps

2019-07-05 Thread Michał Górny
Introduce a simple COREDUMP_MACHDEP_LWP_NOTES logic to provide machdep
API for injecting per-LWP notes into coredumps, and use it to append
PT_GETXSTATE note.

Since the XSTATE block uses the same format on i386 and amd64, the code
does not have to conditionalize between 32-bit and 64-bit ELF format
on that.  However, it does need to distinguish between 32-bit and 64-bit
PT_* values.  In order to do that, it reuses PT32_* constant already
present for ptrace(), and adds a matching PT64_GETXSTATE to satisfy
the cpp logic.
---
 sys/arch/amd64/include/ptrace.h | 15 +++
 sys/arch/i386/include/ptrace.h  | 10 ++
 sys/kern/core_elf32.c   |  6 +-
 3 files changed, 30 insertions(+), 1 deletion(-)

Changed in v2: added EXEC_ELF32 conditional to fix building exec_elf32
module.

diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h
index 44b38d750f9c..df78eaabd0fd 100644
--- a/sys/arch/amd64/include/ptrace.h
+++ b/sys/arch/amd64/include/ptrace.h
@@ -90,6 +90,21 @@
 int process_machdep_doxstate(struct lwp *, struct lwp *, struct uio *);
 int process_machdep_validxstate(struct proc *);
 
+#ifdef EXEC_ELF32
+#include 
+#endif
+#define PT64_GETXSTATE PT_GETXSTATE
+#define COREDUMP_MACHDEP_LWP_NOTES \
+{  \
+   struct xstate xstate;   \
+   error = process_read_xstate(l, &xstate);\
+   if (error)  \
+   return (error); \
+   ELFNAMEEND(coredump_savenote)(ns,   \
+   CONCAT(CONCAT(PT, ELFSIZE), _GETXSTATE), name, &xstate, \
+   sizeof(xstate));\
+}
+
 #endif /* _KERNEL */
 
 #ifdef _KERNEL_OPT
diff --git a/sys/arch/i386/include/ptrace.h b/sys/arch/i386/include/ptrace.h
index 41ad7a119f0b..4edf7f09a38d 100644
--- a/sys/arch/i386/include/ptrace.h
+++ b/sys/arch/i386/include/ptrace.h
@@ -159,6 +159,16 @@
{ DT_REG, N("xmmregs"), Pmachdep_xmmregs,   \
  procfs_machdep_validxmmregs },
 
+#define COREDUMP_MACHDEP_LWP_NOTES \
+{  \
+   struct xstate xstate;   \
+   error = process_read_xstate(l, &xstate);\
+   if (error)  \
+   return (error); \
+   ELFNAMEEND(coredump_savenote)(ns, PT_GETXSTATE, name, &xstate,  \
+   sizeof(xstate));\
+}
+
 struct xmmregs;
 
 /* Functions used by both ptrace(2) and procfs. */
diff --git a/sys/kern/core_elf32.c b/sys/kern/core_elf32.c
index 5b69cab2ab74..ce90a1e44fa9 100644
--- a/sys/kern/core_elf32.c
+++ b/sys/kern/core_elf32.c
@@ -500,7 +500,11 @@ ELFNAMEEND(coredump_note)(struct lwp *l, struct note_state 
*ns)
 
ELFNAMEEND(coredump_savenote)(ns, PT_GETFPREGS, name, &freg, freglen);
 #endif
-   /* XXX Add hook for machdep per-LWP notes. */
+
+#ifdef COREDUMP_MACHDEP_LWP_NOTES
+   COREDUMP_MACHDEP_LWP_NOTES
+#endif
+
return (0);
 }
 
-- 
2.22.0



[PATCH 3/4] Combine x86 register tests into unified test function

2019-07-05 Thread Michał Górny
Reduce the code duplication and improve maintainability of x86 register
tests by combining all of them to a single base function.
---
 tests/lib/libc/sys/t_ptrace_amd64_wait.h |  406 +---
 tests/lib/libc/sys/t_ptrace_i386_wait.h  |  335 +--
 tests/lib/libc/sys/t_ptrace_x86_wait.h   | 2417 ++
 3 files changed, 1103 insertions(+), 2055 deletions(-)

diff --git a/tests/lib/libc/sys/t_ptrace_amd64_wait.h 
b/tests/lib/libc/sys/t_ptrace_amd64_wait.h
index 1ea17ea1ec1a..0f410f3600d0 100644
--- a/tests/lib/libc/sys/t_ptrace_amd64_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_amd64_wait.h
@@ -111,415 +111,11 @@ ATF_TC_BODY(x86_64_regs1, tc)
TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
 }
 
-ATF_TC(x86_64_regs_gp_read);
-ATF_TC_HEAD(x86_64_regs_gp_read, tc)
-{
-   atf_tc_set_md_var(tc, "descr",
-   "Set general-purpose reg values from debugged program and read "
-   "them via PT_GETREGS, comparing values against expected.");
-}
-
-ATF_TC_BODY(x86_64_regs_gp_read, tc)
-{
-   const int exitval = 5;
-   pid_t child, wpid;
-#if defined(TWAIT_HAVE_STATUS)
-   const int sigval = SIGTRAP;
-   int status;
-#endif
-   struct reg gpr;
-
-   const uint64_t rax = 0x0001020304050607;
-   const uint64_t rbx = 0x1011121314151617;
-   const uint64_t rcx = 0x2021222324252627;
-   const uint64_t rdx = 0x3031323334353637;
-   const uint64_t rsi = 0x4041424344454647;
-   const uint64_t rdi = 0x5051525354555657;
-   const uint64_t rsp = 0x6061626364656667;
-   const uint64_t rbp = 0x7071727374757677;
-
-   DPRINTF("Before forking process PID=%d\n", getpid());
-   SYSCALL_REQUIRE((child = fork()) != -1);
-   if (child == 0) {
-   DPRINTF("Before calling PT_TRACE_ME from child %d\n", getpid());
-   FORKEE_ASSERT(ptrace(PT_TRACE_ME, 0, NULL, 0) != -1);
-
-   DPRINTF("Before running assembly from child\n");
-
-   __asm__ __volatile__(
-   /* rbp & rbp are a bit tricky, we must not clobber them 
*/
-   "movq%%rsp, %%r8\n\t"
-   "movq%%rbp, %%r9\n\t"
-   "movq%6, %%rsp\n\t"
-   "movq%7, %%rbp\n\t"
-   "\n\t"
-   "int3\n\t"
-   "\n\t"
-   "movq%%r8, %%rsp\n\t"
-   "movq%%r9, %%rbp\n\t"
-   :
-   : "a"(rax), "b"(rbx), "c"(rcx), "d"(rdx), "S"(rsi), 
"D"(rdi),
- "i"(rsp), "i"(rbp)
-   : "%r8", "%r9"
-   );
-
-   DPRINTF("Before exiting of the child process\n");
-   _exit(exitval);
-   }
-   DPRINTF("Parent process PID=%d, child's PID=%d\n", getpid(), child);
-
-   DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
-   TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
-
-   validate_status_stopped(status, sigval);
-
-   DPRINTF("Call GETREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETREGS, child, &gpr, 0) != -1);
-
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RAX], rax);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RBX], rbx);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RCX], rcx);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RDX], rdx);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RSI], rsi);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RDI], rdi);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RSP], rsp);
-   ATF_CHECK_EQ((uint64_t)gpr.regs[_REG_RBP], rbp);
-
-   DPRINTF("Before resuming the child process where it left off and "
-   "without signal to be sent\n");
-   SYSCALL_REQUIRE(ptrace(PT_CONTINUE, child, (void *)1, 0) != -1);
-
-   DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
-   TWAIT_REQUIRE_SUCCESS(wpid = TWAIT_GENERIC(child, &status, 0), child);
-
-   validate_status_exited(status, exitval);
-
-   DPRINTF("Before calling %s() for the child\n", TWAIT_FNAME);
-   TWAIT_REQUIRE_FAILURE(ECHILD, wpid = TWAIT_GENERIC(child, &status, 0));
-}
-
-ATF_TC(x86_64_regs_gp_write);
-ATF_TC_HEAD(x86_64_regs_gp_write, tc)
-{
-   atf_tc_set_md_var(tc, "descr",
-   "Set general-purpose reg values into a debugged program via "
-   "PT_SETREGS and compare the result against expected.");
-}
-
-ATF_TC_BODY(x86_64_regs_gp_write, tc)
-{
-   const int exitval = 5;
-   pid_t child, wpid;
-#if defined(TWAIT_HAVE_STATUS)
-   const int sigval = SIGTRAP;
-   int status;
-#endif
-   struct reg gpr;
-
-   const uint64_t rax = 0x0001020304050607;
-   const uint64_t rbx = 0x1011121314151617;
-   const uint64_t rcx = 0x2021222324252627;
-   const uint64_t rdx = 0x3031323334353637;
-   const uint64_t rsi = 0x4041424344454647;
-   const uint64_t rdi 

[PATCH 4/4] Add tests for reading registers from x86 core dumps.

2019-07-04 Thread Michał Górny
---
 tests/lib/libc/sys/t_ptrace_x86_wait.h | 187 +++--
 1 file changed, 141 insertions(+), 46 deletions(-)

diff --git a/tests/lib/libc/sys/t_ptrace_x86_wait.h 
b/tests/lib/libc/sys/t_ptrace_x86_wait.h
index 4572e3eb8b23..7867f1e8dd1c 100644
--- a/tests/lib/libc/sys/t_ptrace_x86_wait.h
+++ b/tests/lib/libc/sys/t_ptrace_x86_wait.h
@@ -2225,7 +2225,8 @@ enum x86_test_registers {
 
 enum x86_test_regmode {
TEST_GETREGS,
-   TEST_SETREGS
+   TEST_SETREGS,
+   TEST_COREDUMP
 };
 
 static __inline void get_gp32_regs(union x86_test_register out[])
@@ -2687,8 +2688,10 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
 #endif
struct xstate xst;
struct iovec iov;
-   struct fxsave* fxs;
+   struct fxsave* fxs = NULL;
uint64_t xst_flags = 0;
+   char core_path[] = "/tmp/core.XX";
+   int core_fd;
 
const union x86_test_register expected[] __aligned(32) = {
{{ 0x0706050403020100, 0x0F0E0D0C0B0A0908,
@@ -2796,6 +2799,7 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
DPRINTF("Before running assembly from child\n");
switch (regmode) {
case TEST_GETREGS:
+   case TEST_COREDUMP:
switch (regs) {
case GPREGS_32:
set_gp32_regs(expected);
@@ -2969,40 +2973,7 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
 
validate_status_stopped(status, sigval);
 
-   switch (regset) {
-   case TEST_GPREGS:
-   ATF_REQUIRE(regs < FPREGS_MM);
-   DPRINTF("Call GETREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETREGS, child, &gpr, 0) != -1);
-   break;
-   case TEST_XMMREGS:
-#if defined(__i386__)
-   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
-   DPRINTF("Call GETXMMREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETXMMREGS, child, &xmm, 0) != -1);
-   fxs = &xmm.fxstate;
-   break;
-#else
-   /*FALLTHROUGH*/
-#endif
-   case TEST_FPREGS:
-#if defined(__x86_64__)
-   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
-   fxs = &fpr.fxstate;
-#else
-   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_XMM);
-#endif
-   DPRINTF("Call GETFPREGS for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETFPREGS, child, &fpr, 0) != -1);
-   break;
-   case TEST_XSTATE:
-   ATF_REQUIRE(regs >= FPREGS_MM);
-   iov.iov_base = &xst;
-   iov.iov_len = sizeof(xst);
-
-   DPRINTF("Call GETXSTATE for the child process\n");
-   SYSCALL_REQUIRE(ptrace(PT_GETXSTATE, child, &iov, 0) != -1);
-
+   if (regset == TEST_XSTATE) {
switch (regs) {
case FPREGS_MM:
xst_flags |= XCR0_X87;
@@ -3020,21 +2991,117 @@ x86_register_test(enum x86_test_regset regset, enum 
x86_test_registers regs,
__unreachable();
break;
}
+   }
 
-   ATF_REQUIRE((xst.xs_rfbm & xst_flags) == xst_flags);
-   switch (regmode) {
-   case TEST_SETREGS:
-   xst.xs_rfbm = xst_flags;
-   xst.xs_xstate_bv = xst_flags;
+   switch (regmode) {
+   case TEST_GETREGS:
+   case TEST_SETREGS:
+   switch (regset) {
+   case TEST_GPREGS:
+   ATF_REQUIRE(regs < FPREGS_MM);
+   DPRINTF("Call GETREGS for the child process\n");
+   SYSCALL_REQUIRE(ptrace(PT_GETREGS, child, &gpr, 0)
+   != -1);
break;
-   case TEST_GETREGS:
+   case TEST_XMMREGS:
+#if defined(__i386__)
+   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
+   DPRINTF("Call GETXMMREGS for the child process\n");
+   SYSCALL_REQUIRE(ptrace(PT_GETXMMREGS, child, &xmm, 0)
+   != -1);
+   fxs = &xmm.fxstate;
+   break;
+#else
+   /*FALLTHROUGH*/
+#endif
+   case TEST_FPREGS:
+#if defined(__x86_64__)
+   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_YMM);
+   fxs = &fpr.fxstate;
+#else
+   ATF_REQUIRE(regs >= FPREGS_MM && regs < FPREGS_XMM);
+#endif
+   DPRINTF("Call GETFPREGS for the child process\n");
+   SYSCALL_REQUIRE(ptrace(PT_GETFPREGS, child, &fpr, 0)
+   != -1);
+   break;
+   case TEST_XSTATE:
+   A

[PATCH 1/4] Include XSTATE note in x86 core dumps

2019-07-04 Thread Michał Górny
Introduce a simple COREDUMP_MACHDEP_LWP_NOTES logic to provide machdep
API for injecting per-LWP notes into coredumps, and use it to append
PT_GETXSTATE note.

Since the XSTATE block uses the same format on i386 and amd64, the code
does not have to conditionalize between 32-bit and 64-bit ELF format
on that.  However, it does need to distinguish between 32-bit and 64-bit
PT_* values.  In order to do that, it reuses PT32_* constant already
present for ptrace(), and adds a matching PT64_GETXSTATE to satisfy
the cpp logic.
---
 sys/arch/amd64/include/ptrace.h | 12 
 sys/arch/i386/include/ptrace.h  | 10 ++
 sys/kern/core_elf32.c   |  6 +-
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h
index 44b38d750f9c..2c378260a6e3 100644
--- a/sys/arch/amd64/include/ptrace.h
+++ b/sys/arch/amd64/include/ptrace.h
@@ -90,6 +90,18 @@
 int process_machdep_doxstate(struct lwp *, struct lwp *, struct uio *);
 int process_machdep_validxstate(struct proc *);
 
+#define PT64_GETXSTATE PT_GETXSTATE
+#define COREDUMP_MACHDEP_LWP_NOTES \
+{  \
+   struct xstate xstate;   \
+   error = process_read_xstate(l, &xstate);\
+   if (error)  \
+   return (error); \
+   ELFNAMEEND(coredump_savenote)(ns,   \
+   CONCAT(CONCAT(PT, ELFSIZE), _GETXSTATE), name, &xstate, \
+   sizeof(xstate));\
+}
+
 #endif /* _KERNEL */
 
 #ifdef _KERNEL_OPT
diff --git a/sys/arch/i386/include/ptrace.h b/sys/arch/i386/include/ptrace.h
index 41ad7a119f0b..4edf7f09a38d 100644
--- a/sys/arch/i386/include/ptrace.h
+++ b/sys/arch/i386/include/ptrace.h
@@ -159,6 +159,16 @@
{ DT_REG, N("xmmregs"), Pmachdep_xmmregs,   \
  procfs_machdep_validxmmregs },
 
+#define COREDUMP_MACHDEP_LWP_NOTES \
+{  \
+   struct xstate xstate;   \
+   error = process_read_xstate(l, &xstate);\
+   if (error)  \
+   return (error); \
+   ELFNAMEEND(coredump_savenote)(ns, PT_GETXSTATE, name, &xstate,  \
+   sizeof(xstate));\
+}
+
 struct xmmregs;
 
 /* Functions used by both ptrace(2) and procfs. */
diff --git a/sys/kern/core_elf32.c b/sys/kern/core_elf32.c
index 5b69cab2ab74..ce90a1e44fa9 100644
--- a/sys/kern/core_elf32.c
+++ b/sys/kern/core_elf32.c
@@ -500,7 +500,11 @@ ELFNAMEEND(coredump_note)(struct lwp *l, struct note_state 
*ns)
 
ELFNAMEEND(coredump_savenote)(ns, PT_GETFPREGS, name, &freg, freglen);
 #endif
-   /* XXX Add hook for machdep per-LWP notes. */
+
+#ifdef COREDUMP_MACHDEP_LWP_NOTES
+   COREDUMP_MACHDEP_LWP_NOTES
+#endif
+
return (0);
 }
 
-- 
2.22.0



[PATCH 2/4] Fix alignment when reading core notes

2019-07-04 Thread Michał Górny
Both desc and note header needs to be aligned.  Therefore, we need
to realign after skipping past desc as well.
---
 tests/lib/libc/sys/t_ptrace_wait.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/lib/libc/sys/t_ptrace_wait.c 
b/tests/lib/libc/sys/t_ptrace_wait.c
index 32e007d5e054..5ded0c311b7f 100644
--- a/tests/lib/libc/sys/t_ptrace_wait.c
+++ b/tests/lib/libc/sys/t_ptrace_wait.c
@@ -7728,6 +7728,9 @@ static ssize_t core_find_note(const char *core_path,
}
 
offset += note_hdr.n_descsz;
+   /* fix to alignment */
+   offset = ((offset + core_hdr.p_align - 1)
+   / core_hdr.p_align) * core_hdr.p_align;
}
}
 
-- 
2.22.0



[PATCH 0/4] XSTATE core dump support and tests

2019-07-04 Thread Michał Górny
Hi,

Here's a short patch set focused on adding PT_GETXSTATE note to core
dumps, and adding tests for register notes in core dumps.
For the former, I had to implement machdep interface for core dumps.
For the latter, I've decided to combine existing tests into a single
large function (alike functions used for other tests already),
to reduce code duplication.

The advantages of including PT_GETXSTATE in core dumps is that:

1. it includes YMM registers that are not present in any other state
   and is extensible to future register types,

2. it uses FXSAVE format on i386, filling missing support for XMM
   registers there (we could eventually implement PT_GETXMMREGS there
   but it would be redundant),

3. it provides state bitmasks, making it possible to determine which
   registers were supported by the CPU on which coredump was created.

The disadvantage is that it partially duplicates existing PT_GETFPREGS.
However, I don't think this is a major problem.

-- 
Best regards,
Michał Górny

Michał Górny (4):
  Include XSTATE note in x86 core dumps
  Fix alignment when reading core notes
  Combine x86 register tests into unified test function
  Add tests for reading registers from x86 core dumps.

 sys/arch/amd64/include/ptrace.h  |   12 +
 sys/arch/i386/include/ptrace.h   |   10 +
 sys/kern/core_elf32.c|6 +-
 tests/lib/libc/sys/t_ptrace_amd64_wait.h |  406 +---
 tests/lib/libc/sys/t_ptrace_i386_wait.h  |  335 +--
 tests/lib/libc/sys/t_ptrace_wait.c   |3 +
 tests/lib/libc/sys/t_ptrace_x86_wait.h   | 2508 ++
 7 files changed, 1226 insertions(+), 2054 deletions(-)

-- 
2.22.0



Re: re-enabling debugging of 32 bit processes with 64 bit debugger

2019-06-28 Thread Michał Górny
On Fri, 2019-06-28 at 16:44 -0400, Christos Zoulas wrote:
> Background:
> 
> Max disabled the 32 bit code paths in process_machdep.c and matchdep.c 
> so trying to debug 32 bit processes from a 64 bit debugger. From his commit
> message I think this was not intentional:
> 
> revision 1.36
> date: 2017-10-19 05:32:01 -0400;  author: maxv;  state: Exp;  lines: +35 
> -0;  commitid: 0ZqTTwMXhMd40EbA;
> Make sure we don't go farther with 32bit LWPs. There appears to be some
> confusion in the code - in part introduced by myself -, and clearly this
> place is not supposed to handle 32bit LWPs.
> 
> Right now we're returning EINVAL, but verily we would need to redirect
> these calls to their netbsd32 counterparts.
> 
> I've been asking him privately to re-add the code (I even gave him a patch),
> but after not responding for a few days we had the exchange (appended below)
> which leads me to believe that he does not believe the functionality is 
> useful.
> 
> I would like to have this functinality restored because as I explained below
> it is easier to use a 64 bit debugger on a 32 bit app (for various reasons),
> plus I want to add some unit-tests to make sure we don't break it in the
> future, since it is required for mips64 right now. It is harder to add
> a new testing platform than doing this on amd64.
> 
> What do you think? SHould we make the code work like before? Or this is
> functionality that we don't want to have because it is "dumb"? (I think
> that Max here means that it adds complexity and it could be dangerous,
> but I am just guessing)
> 

I'm also interested in this since it's going to affect LLDB long term. 
However, I am not sure how it should work exactly, especially in context
of registers.

AFAIU the old behavior was that ptrace() used 64-bit data types,
and padded unused parts.  I suppose this could be somewhat convenient
(since you have less variety to account for) but at the same time rather
weird.  And in the end, debuggers would have to know how to correctly
truncate those registers, and skip the unused ones.

The alternative would be to reuse compat32 codepaths more, effectively
switching to 32-bit data types.  However, this would mean that debuggers
would have to switch between structures passed to ptrace() based
on whether the tracee is 32- or 64-bit.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v3 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-25 Thread Michał Górny
On Tue, 2019-06-25 at 20:55 +0200, Maxime Villard wrote:
> Le 22/06/2019 à 09:38, Michał Górny a écrit :
> > Hi,
> > 
> > Attached is the next version of unified patch.  I've updated the boolean
> > logic as requested, and fixed indentations.
> 
> Looks good to me. Please don't use ()s when not needed, like
> 
> - memset(&(xstate->xs_fxsave), 0, sizeof(xstate->xs_fxsave));
> - process_s87_to_xmm(&fpu_save->sv_87, &(xstate->xs_fxsave));
> + memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave));
> + process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave);
> 
> but whatever, this is detail

Will do.  Thanks for the review.  Is it fine to commit then?

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v3 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-22 Thread Michał Górny
Hi,

Attached is the next version of unified patch.  I've updated the boolean
logic as requested, and fixed indentations.

-- 
Best regards,
Michał Górny

diff --git a/lib/libc/sys/ptrace.2 b/lib/libc/sys/ptrace.2
index 9cd99ac94bd1..2ea13872e421 100644
--- a/lib/libc/sys/ptrace.2
+++ b/lib/libc/sys/ptrace.2
@@ -1,7 +1,7 @@
 .\"	$NetBSD: ptrace.2,v 1.74 2019/06/12 12:33:42 wiz Exp $
 .\"
 .\" This file is in the public domain.
-.Dd June 12, 2019
+.Dd June 22, 2019
 .Dt PTRACE 2
 .Os
 .Sh NAME
@@ -771,6 +771,69 @@ The
 argument contains the LWP ID of the thread whose registers are to
 be written.
 If zero is supplied, the first thread of the process is written.
+.It Dv PT_GETXSTATE
+This request reads the traced process' FPU extended state into
+the
+.Dq Li "struct xstate"
+(defined in
+.In machine/cpu_extended_state.h ) .
+.Fa addr
+should be a pointer to
+.Dq Li "struct iovec"
+(defined in
+.In sys/uio.h )
+specifying the pointer to the aforementioned struct as
+.Fa iov_base
+and its size as
+.Fa iov_len .
+The
+.Fa data
+argument contains the LWP ID of the thread whose registers are to
+be read.
+If zero is supplied, the first thread of the process is read.
+The struct will be filled up to the specified
+.Fa iov_len .
+The caller needs to check
+.Fa xs_rfbm
+bitmap in order to determine which fields were provided by the CPU,
+and may check
+.Fa xs_xstate_bv
+to determine which component states were changed from the initial state.
+.It Dv PT_SETXSTATE
+This request is the converse of
+.Dv PT_GETXSTATE ;
+it loads the traced process' extended FPU state from the
+.Dq Li "struct xstate"
+(defined in
+.In machine/cpu_extended_state.h ) .
+.Fa addr
+should be a pointer to
+.Dq Li "struct iovec"
+(defined in
+.In sys/uio.h )
+specifying the pointer to the aforementioned struct as
+.Fa iov_base
+and its size as
+.Fa iov_len .
+The
+.Fa data
+argument contains the LWP ID of the thread whose registers are to
+be written.
+If zero is supplied, the first thread of the process is written.
+The
+.Fa xs_rfbm
+field of the supplied xstate specifies which state components are to
+be updated.  Other components (fields) will be ignored.  The
+.Fa xs_xstate_bv
+specifies whether component state should be set to provided values
+(when 1) or reset to unitialized (when 0).  The request
+will fail if
+.Fa xs_xstate_bv
+is not a subset of
+.Fa xs_rfbm ,
+or any of the specified components is not supported by the CPU or kernel
+(i.e. not returned by
+.Dv PT_GETXSTATE .
 .El
 .Sh ERRORS
 Some requests can cause
@@ -819,8 +882,10 @@ was neither 0 nor a legal signal number.
 .Dv PT_GETREGS ,
 .Dv PT_SETREGS ,
 .Dv PT_GETFPREGS ,
+.Dv PT_SETFPREGS ,
+.Dv PT_GETXSTATE ,
 or
-.Dv PT_SETFPREGS
+.Dv PT_SETXSTATE
 was attempted on a process with no valid register set.
 (This is normally true only of system processes.)
 .It
@@ -832,6 +897,13 @@ or
 with
 .Dv vm.user_va0_disable
 set to 1.
+.It
+.Dv PT_SETXSTATE
+attempted to set state components not supported by the kernel,
+or
+.Dv xs_xstate_bv
+was not a subset of
+.Dv xs_rfbm .
 .El
 .It Bq Er EPERM
 .Bl -bullet -compact
diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 81bf78f6ecc4..3e007c79761b 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -353,6 +353,8 @@ netbsd32_ptrace_translate_request(int req)
 	case PT32_SETDBREGS:		return PT_SETDBREGS;
 	case PT32_SETSTEP:		return PT_SETSTEP;
 	case PT32_CLEARSTEP:		return PT_CLEARSTEP;
+	case PT32_GETXSTATE:		return PT_GETXSTATE;
+	case PT32_SETXSTATE:		return PT_SETXSTATE;
 	default:			return -1;
 	}
 }
diff --git a/sys/arch/amd64/amd64/process_machdep.c b/sys/arch/amd64/amd64/process_machdep.c
index c204556c9168..d4e2c9a4009e 100644
--- a/sys/arch/amd64/amd64/process_machdep.c
+++ b/sys/arch/amd64/amd64/process_machdep.c
@@ -84,6 +84,9 @@ __KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.39 2019/02/11 14:59:32 cherry
 #include 
 #include 
 
+#include 
+
+#include 
 #include 
 #include 
 #include 
@@ -288,3 +291,131 @@ process_set_pc(struct lwp *l, void *addr)
 
 	return 0;
 }
+
+#ifdef __HAVE_PTRACE_MACHDEP
+static int
+process_machdep_read_xstate(struct lwp *l, struct xstate *regs)
+{
+	return process_read_xstate(l, regs);
+}
+
+static int
+process_machdep_write_xstate(struct lwp *l, const struct xstate *regs)
+{
+	int error;
+
+	/*
+	 * Check for security violations.
+	 */
+	error = process_verify_xstate(regs);
+	if (error != 0)
+		return error;
+
+	return process_write_xstate(l, regs);
+}
+
+int
+ptrace_machdep_dorequest(
+struct lwp *l,
+struct lwp *lt,
+int req,
+void *addr,
+int data
+)
+{
+	struct uio uio;
+	struct iovec iov;
+	struct vmspace *vm;
+	int error;
+	int write = 0;
+
+	switch (req) {
+	case PT_SETXSTATE:
+		write = 1;
+
+		/* FALLTHROUGH */
+	case PT_GETXSTATE:
+		/* write = 0 done above. */
+		if (!process_machdep_v

Re: [PATCH v3 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-21 Thread Michał Górny
On Sat, 2019-06-22 at 08:26 +0200, Maxime Villard wrote:
> Le 22/06/2019 à 07:42, Michał Górny a écrit :
> > On Sat, 2019-06-22 at 07:35 +0200, Maxime Villard wrote:
> > > Le 19/06/2019 à 14:20, Michał Górny a écrit :> On Wed, 2019-06-19 at 
> > > 06:32 +0200, Maxime Villard wrote:
> > > > > Can you provide a unified patch so I can review quickly? Sorry for 
> > > > > the delay
> > > > 
> > > > Here you are.  Note that the manpage is not updated yet, as I'm waiting
> > > > for your comments on the xs_rfbm approach.
> > > > +   memcpy(&(xstate -> field),  
> > > > \
> > > > +   (char*)fpu_save + 
> > > > x86_xsave_offsets[xsave_val], \
> > > > +   sizeof(xstate -> field));   
> > > > \
> > > 
> > > Please use proper KNF here and in other places
> > 
> > Are you talking of indentation?  It would be really helpful to be more
> > specific here.
> 
> Yes
> 
>   memcpy(&xstate->field,  \
>   (char*)fpu_save + x86_xsave_offsets[xsave_val], \
>   sizeof(xstate->field)); \
> 
> But that's just cosmetic
> 
> > > > +   fpu_save->sv_xsave_hdr.xsh_xstate_bv =
> > > > +   (~xstate->xs_rfbm | xstate->xs_xstate_bv) &
> > > > +   (xstate->xs_rfbm | 
> > > > fpu_save->sv_xsave_hdr.xsh_xstate_bv);
> > > 
> > > Can't we just do this? Seems simpler
> > > 
> > >   fpu_save->sv_xsave_hdr.xsh_xstate_bv =
> > >   (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
> > >   xstate->xs_xstate_bv;
> > 
> > Unless I'm mistaken, this would prevent the user from being able to
> > force xs_xstate_bv to 0 if it was set previously, and therefore clear
> > registers to 'unmodified' state.
> 
> If the user gives RFBM=all and XSTATE_BV=0, it does force the result to zero

I'm sorry, I read it the other way around.  Yes, given that we force
xs_xstate_bv to be a subset of xs_rfbm, this is going to work correctly.

I'm going to update the patch, the manpage and send you a new version.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v3 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-21 Thread Michał Górny
On Sat, 2019-06-22 at 07:35 +0200, Maxime Villard wrote:
> Le 19/06/2019 à 14:20, Michał Górny a écrit :> On Wed, 2019-06-19 at 06:32 
> +0200, Maxime Villard wrote:
> > > Can you provide a unified patch so I can review quickly? Sorry for the 
> > > delay
> > 
> > Here you are.  Note that the manpage is not updated yet, as I'm waiting
> > for your comments on the xs_rfbm approach.
> > +   memcpy(&(xstate -> field),  
> > \
> > +   (char*)fpu_save + x86_xsave_offsets[xsave_val], 
> > \
> > +   sizeof(xstate -> field));   
> > \
> 
> Please use proper KNF here and in other places

Are you talking of indentation?  It would be really helpful to be more
specific here.

> 
> > +   fpu_save->sv_xsave_hdr.xsh_xstate_bv =
> > +   (~xstate->xs_rfbm | xstate->xs_xstate_bv) &
> > +   (xstate->xs_rfbm | 
> > fpu_save->sv_xsave_hdr.xsh_xstate_bv);
> 
> Can't we just do this? Seems simpler
> 
>   fpu_save->sv_xsave_hdr.xsh_xstate_bv =
>   (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
>   xstate->xs_xstate_bv;

Unless I'm mistaken, this would prevent the user from being able to
force xs_xstate_bv to 0 if it was set previously, and therefore clear
registers to 'unmodified' state.

> Unfortunately, in this code you will likely hit a race; between the moment you
> set fpu_save->sv_xsave_hdr.xsh_xstate_bv and the moment you reach 
> COPY_COMPONENT,
> the thread could have been context-switched, and xstate_bv could have been
> clobbered.

Unless I'm mistaken, this code should only be executed on stopped
processes.  Or at least that was the idea Kamil suggested to me.  Do we
need extra guards for that?

> 
> I think you can work around this race by adding kpreempt_disable/enable at
> the beginning/end of the function. ISTR there is the same problem in the
> other fpregs ptrace functions. In fact, there are a lot of problems in our
> FPU code right now.
> 
> Otherwise the rfbm approach seems good to me.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v3 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-13 Thread Michał Górny
On Sun, 2019-06-09 at 13:17 +0200, Maxime Villard wrote:
>   * I think it would be easier if you hosted your patches online, because the
> repeated threads make it hard to follow what's going on

I've pushed updates commits on:
https://github.com/mgorny/netbsd-src/commits/xstate

Could you take a look, please?  Most notably, I've added 'xs_rfbm' field
and corrected uses of 'xs_xstate_bv' (note I haven't updated manpage
yet).  I've also fixed some of the other issues you've pointed, waiting
for answers on the other question.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v3 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-09 Thread Michał Górny
On Sun, 2019-06-09 at 13:17 +0200, Maxime Villard wrote:
> Le 09/06/2019 à 09:46, Michał Górny a écrit :
> > On Sun, 2019-06-09 at 09:08 +0200, Maxime Villard wrote:
> > > Le 07/06/2019 à 20:33, Michał Górny a écrit :
> > > > [...]
> > > > +int
> > > > +process_machdep_doxstate(struct lwp *curl, struct lwp *l, struct uio 
> > > > *uio)
> > > > +   /* curl: tracer */
> > > > +   /* l:traced */
> > > > +{
> > > > +   int error;
> > > > +   struct xstate r;
> > > > +   char *kv;
> > > > +   ssize_t kl;
> > > > +
> > > > +   kl = MIN(uio->uio_iov->iov_len, sizeof(r));
> > > > +   kv = (char *) &r;
> > > > +
> > > > +   kv += uio->uio_offset;
> > > > +   kl -= uio->uio_offset;
> > > > +   if (kl > uio->uio_resid)
> > > > +   kl = uio->uio_resid;
> > > > +
> > > > +   if (kl < 0)
> > > > +   error = EINVAL;
> > > > +   else
> > > > +   error = process_machdep_read_xstate(l, &r);
> > > > +   if (error == 0)
> > > > +   error = uiomove(kv, kl, uio);
> > > > +   if (error == 0 && uio->uio_rw == UIO_WRITE) {
> > > > +   if (l->l_proc->p_stat != SSTOP)
> > > > +   error = EBUSY;
> > > 
> > > Isn't it supposed to always be the case?
> > 
> > To be honest, I've followed suit with all other getter-setters there.
> > I can't say if it's fail-safe that's never supposed to fire or if there
> > could be some mistaken use possible that would trigger this.
> 
> I don't see other places that do that. If it was needed there would be a
> race, because it doesn't seem to me we lock the proc, so what if the proc
> goes != SSTOP after the check.

I've copied it from process_machdep_doxmmregs()
in sys/arch/i386/i386/process_machdep.c.  I can remove it.  Should I
also remove the original occurrence?

> 
> > > > +   /* Copy MXCSR if either SSE or AVX state is requested */
> > > > +   if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
> > > > +   memcpy(&fpu_save->sv_xmm.fx_mxcsr, 
> > > > &xstate->xs_fxsave.fx_mxcsr, 8);
> > > > +
> > > > +   /*
> > > > +* Invalid bits in mxcsr or mxcsr_mask will cause 
> > > > faults.
> > > > +*/
> > > > +   fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
> > > > +   fpu_save->sv_xmm.fx_mxcsr &= 
> > > > fpu_save->sv_xmm.fx_mxcsr_mask;
> > > 
> > > Please use a simple assignment instead of memcpy, and also filter out 
> > > 'xstate'
> > > and not 'fpu_save'.
> > 
> > Will do.  Actually, simple assignment means I can filter it out while
> > assigning.
> > 
> > > Also, it would be nice to clarify the use case here. On x86, mxcsr gets 
> > > reloaded
> > > *regardless* of whether xstate_bv contains SSE|AVX. Should the ptrace api 
> > > also
> > > reload mxcsr regardless of whether the user requested SSE|AVX in 
> > > xstate_bv?
> > 
> > I was following the Intel programmer's manual.  XRSTOR uses mxcsr either
> > if SSE or AVX is requested via EAX.
> 
> Yes, but EAX is the RFBM, not XSTATE_BV.

I agree I could use a better term here.  The idea is that it is a field
stating which of the components in other fields are present.

> 
> > > > +   }
> > > > +
> > > > +   /* Copy SSE state if requested. */
> > > > +   if (xstate->xs_xstate_bv & XCR0_SSE) {
> > > > +   if (x86_fpu_save >= FPU_SAVE_XSAVE) {
> > > > +   KASSERT(fpu_save->sv_xsave_hdr.xsh_xstate_bv & 
> > > > XCR0_SSE);
> > > 
> > > Mmh, maybe it is possible that this KASSERT fires, if the LWP hasn't used 
> > > SSE.
> > 
> > I don't think that can actually happen with XSAVE.  I wouldn't be sure
> > with XSAVEOPT but we're not using it.
> > 
> > > In fact, what is the purpose?
> > 
> > The exact purpose is to verify that a request to write unsupported
> > component didn't get through pro

Re: [PATCH v3 1/2] Fetch XSAVE area component offsets and sizes when initializing x86 CPU

2019-06-09 Thread Michał Górny
On Sun, 2019-06-09 at 12:54 +0200, Maxime Villard wrote:
> Le 09/06/2019 à 09:33, Michał Górny a écrit :
> > On Sun, 2019-06-09 at 09:20 +0200, Maxime Villard wrote:
> > > In fact, the whole loop seems wrong: CPUID uses the XCR0_ constants, not 
> > > the
> > > XSAVE_ constants. Eg HDC is 13 in XCR0_ but 10 in XSAVE_, so you never 
> > > iterate
> > > over it.
> > 
> > It's intentional.  I only iterate up to XSAVE_MAX_COMPONENT,
> > i.e. the last component actually used by the kernel.  I don't skip
> > earlier unused components to avoid index madness but I see no purpose to
> > go beyond last component actually used.
> 
> Ok, that's fine for now and we can revisit that later for future states.
> However, maybe rename XSAVE_ -> XCRO_SHIFT_, or something else, to make it
> clear that this depends on the hardware layout.

It is meant to be 'XSAVE component index', to be precise.  As passed
e.g. to CPUID as ECX.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v3 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-09 Thread Michał Górny
On Sun, 2019-06-09 at 09:08 +0200, Maxime Villard wrote:
> Le 07/06/2019 à 20:33, Michał Górny a écrit :
> > [...]
> > +int
> > +process_machdep_doxstate(struct lwp *curl, struct lwp *l, struct uio *uio)
> > +   /* curl: tracer */
> > +   /* l:traced */
> > +{
> > +   int error;
> > +   struct xstate r;
> > +   char *kv;
> > +   ssize_t kl;
> > +
> > +   kl = MIN(uio->uio_iov->iov_len, sizeof(r));
> > +   kv = (char *) &r;
> > +
> > +   kv += uio->uio_offset;
> > +   kl -= uio->uio_offset;
> > +   if (kl > uio->uio_resid)
> > +   kl = uio->uio_resid;
> > +
> > +   if (kl < 0)
> > +   error = EINVAL;
> > +   else
> > +   error = process_machdep_read_xstate(l, &r);
> > +   if (error == 0)
> > +   error = uiomove(kv, kl, uio);
> > +   if (error == 0 && uio->uio_rw == UIO_WRITE) {
> > +   if (l->l_proc->p_stat != SSTOP)
> > +   error = EBUSY;
> 
> Isn't it supposed to always be the case?

To be honest, I've followed suit with all other getter-setters there.
I can't say if it's fail-safe that's never supposed to fire or if there
could be some mistaken use possible that would trigger this.

> 
> > +   /* Copy MXCSR if either SSE or AVX state is requested */
> > +   if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
> > +   memcpy(&fpu_save->sv_xmm.fx_mxcsr, &xstate->xs_fxsave.fx_mxcsr, 
> > 8);
> > +
> > +   /*
> > +* Invalid bits in mxcsr or mxcsr_mask will cause faults.
> > +*/
> > +   fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
> > +   fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask;
> 
> Please use a simple assignment instead of memcpy, and also filter out 'xstate'
> and not 'fpu_save'.

Will do.  Actually, simple assignment means I can filter it out while
assigning.

> Also, it would be nice to clarify the use case here. On x86, mxcsr gets 
> reloaded
> *regardless* of whether xstate_bv contains SSE|AVX. Should the ptrace api also
> reload mxcsr regardless of whether the user requested SSE|AVX in xstate_bv?

I was following the Intel programmer's manual.  XRSTOR uses mxcsr either
if SSE or AVX is requested via EAX.

> > +   }
> > +
> > +   /* Copy SSE state if requested. */
> > +   if (xstate->xs_xstate_bv & XCR0_SSE) {
> > +   if (x86_fpu_save >= FPU_SAVE_XSAVE) {
> > +   KASSERT(fpu_save->sv_xsave_hdr.xsh_xstate_bv & 
> > XCR0_SSE);
> 
> Mmh, maybe it is possible that this KASSERT fires, if the LWP hasn't used SSE.

I don't think that can actually happen with XSAVE.  I wouldn't be sure
with XSAVEOPT but we're not using it.

> In fact, what is the purpose?

The exact purpose is to verify that a request to write unsupported
component didn't get through process_verify_xstate().  It's an assert,
so it means to check for something-we-really-don't-expect-to-happen.

> Also general notes:
> 
>   * Please KNF
>   * Don't add too many comments when the code is simple enough

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v3 1/2] Fetch XSAVE area component offsets and sizes when initializing x86 CPU

2019-06-09 Thread Michał Górny
On Sun, 2019-06-09 at 09:20 +0200, Maxime Villard wrote:
> Le 07/06/2019 à 20:33, Michał Górny a écrit :
> > +#define XSAVE_MAX_COMPONENT XSAVE_Hi16_ZMM
> > [...]
> > +size_t x86_xsave_offsets[XSAVE_MAX_COMPONENT] __read_mostly;
> > +size_t x86_xsave_sizes[XSAVE_MAX_COMPONENT] __read_mostly;
> 
> Need +1, but see below

Right.

> 
> > +   /* Get component offsets and sizes for the save area */
> > +   for (i = XSAVE_YMM_Hi128; i < __arraycount(x86_xsave_offsets); i++) {
> > +   if (x86_xsave_features & ((uint64_t)1 << i)) {
> 
> I would use __BIT(i).

Makes sense.

> 
> In fact, the whole loop seems wrong: CPUID uses the XCR0_ constants, not the
> XSAVE_ constants. Eg HDC is 13 in XCR0_ but 10 in XSAVE_, so you never iterate
> over it.

It's intentional.  I only iterate up to XSAVE_MAX_COMPONENT,
i.e. the last component actually used by the kernel.  I don't skip
earlier unused components to avoid index madness but I see no purpose to
go beyond last component actually used.

Of course, if you prefer it that way, I can extend the array and go over
all of them.

> 
> I would drop XSAVE_* entirely and just use XCR0_*, it is forward compatible.

Unless I'm mistaken, that would require using log2() to get array
indices.

> 
> > -#define__NetBSD_Version__  899004200   /* NetBSD 8.99.42 */
> > +#define__NetBSD_Version__  899004300   /* NetBSD 8.99.43 */
> 
> No version bump needed for that

Ok.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


[PATCH v3 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-08 Thread Michał Górny
Introduce two new ptrace() requests: PT_GETXSTATE and PT_SETXSTATE,
that provide access to the extended (and extensible) set of FPU
registers on amd64 and i386.  At the moment, this covers AVX (YMM)
and AVX-512 (ZMM, opmask) registers.  It can be easily extended
to cover further register types without breaking backwards
compatibility.

PT_GETXSTATE issues the XSAVE instruction with all kernel-supported
extended components enabled.  The data is copied into 'struct xstate'
(which -- unlike the XSAVE area itself -- has stable format
and offsets).

PT_SETXSTATE issues the XRSTOR instruction to restore the register
values from user-provided 'struct xstate'.  The function replaces only
the specific XSAVE components that are listed in 'xs_xstate_bv' field,
making it possible to issue partial updates.

Both syscalls take a 'struct iovec' pointer rather than a direct
argument.  This requires the caller to explicitly specify the buffer
size.  As a result, existing code will continue to work correctly
when the structure is extended (performing partial reads/updates).

TODO:
- add more tests (zmm*).

Changed in v3:
- style corrections as requested by christos,
- added FSAVE & FXSAVE compatibility,
- updated ptrace(2) manpage,
- adjusted PT_* consts on i386 and used translation layer,
- added mm* tests that verify that x87 component works fine
  (I've used them to verify FSAVE compatibility).
---
 lib/libc/sys/ptrace.2 |  56 ++
 sys/arch/amd64/amd64/netbsd32_machdep.c   |   2 +
 sys/arch/amd64/amd64/process_machdep.c| 134 
 sys/arch/amd64/include/netbsd32_machdep.h |   2 +
 sys/arch/amd64/include/ptrace.h   |  23 +-
 sys/arch/i386/i386/process_machdep.c  | 128 +++-
 sys/arch/i386/include/ptrace.h|  13 +-
 sys/arch/x86/include/cpu_extended_state.h |  57 ++
 sys/arch/x86/include/fpu.h|   4 +
 sys/arch/x86/x86/fpu.c| 147 
 tests/lib/libc/sys/t_ptrace_wait.c|   2 +
 tests/lib/libc/sys/t_ptrace_x86_wait.h| 885 +-
 12 files changed, 1429 insertions(+), 24 deletions(-)

diff --git a/lib/libc/sys/ptrace.2 b/lib/libc/sys/ptrace.2
index fa3f31fc3336..5a996b3d9386 100644
--- a/lib/libc/sys/ptrace.2
+++ b/lib/libc/sys/ptrace.2
@@ -761,6 +761,59 @@ The
 argument contains the LWP ID of the thread whose registers are to
 be written.
 If zero is supplied, the first thread of the process is written.
+.It Dv PT_GETXSTATE
+This request reads the traced process' FPU extended state into
+the
+.Dq Li "struct xstate"
+(defined in
+.In machine/cpu_extended_state.h ) .
+.Fa addr
+should be a pointer to
+.Dq Li "struct iovec"
+(defined in
+.In sys/uio.h )
+specifying the pointer to the aforementioned struct as
+.Fa iov_base
+and its size as
+.Fa iov_len .
+The
+.Fa data
+argument contains the LWP ID of the thread whose registers are to
+be read.
+If zero is supplied, the first thread of the process is read.
+The struct will be filled up to the specified
+.Fa iov_len .
+The caller needs to check
+.Fa xs_xstate_bv
+bitmap in order to determine which fields were provided by the CPU.
+.It Dv PT_SETXSTATE
+This request is the converse of
+.Dv PT_GETXSTATE ;
+it loads the traced process' extended FPU state from the
+.Dq Li "struct xstate"
+(defined in
+.In machine/cpu_extended_state.h ) .
+.Fa addr
+should be a pointer to
+.Dq Li "struct iovec"
+(defined in
+.In sys/uio.h )
+specifying the pointer to the aforementioned struct as
+.Fa iov_base
+and its size as
+.Fa iov_len .
+The
+.Fa data
+argument contains the LWP ID of the thread whose registers are to
+be written.
+If zero is supplied, the first thread of the process is written.
+The
+.Fa xs_xstate_bv
+field of the supplied xstate specifies which state components are to
+be updated.  Other components (fields) will be ignored.  The request
+will fail if any of the specified components is not supported
+by the CPU or kernel (i.e. not returned by
+.Dv PT_GETXSTATE .
 .El
 .Sh ERRORS
 Some requests can cause
@@ -822,6 +875,9 @@ or
 with
 .Dv vm.user_va0_disable
 set to 1.
+.It
+.Dv PT_SETXSTATE
+attempted to set state components not supported by the kernel.
 .El
 .It Bq Er EPERM
 .Bl -bullet -compact
diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c 
b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 81bf78f6ecc4..3e007c79761b 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -353,6 +353,8 @@ netbsd32_ptrace_translate_request(int req)
case PT32_SETDBREGS:return PT_SETDBREGS;
case PT32_SETSTEP:  return PT_SETSTEP;
case PT32_CLEARSTEP:return PT_CLEARSTEP;
+   case PT32_GETXSTATE:return PT_GETXSTATE;
+   case PT32_SETXSTATE:return PT_SETXSTATE;
default:return -1;
}
 }
diff --git a/sys/arch/amd64/amd64/process_machdep.c 
b/sys/arch/amd64/amd64/process_machdep.c
index c204556c9168..b43475e45c7f 100644
--

Re: [PATCH v2 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-07 Thread Michał Górny
On Thu, 2019-06-06 at 11:10 -0400, Christos Zoulas wrote:
> On Jun 6,  4:43pm, mgo...@gentoo.org (=?UTF-8?Q?Micha=C5=82_G=C3=B3rny?=) 
> wrote:
> -- Subject: Re: [PATCH v2 2/2] Implement PT_GETXSTATE and PT_SETXSTATE
> 
> > I presume you mean the one in ptrace_machdep_dorequest()?  I suppose it
> > was there (the code is based on xmmregs in i386) to clearly scope
> > variables.
> > 
> > I think having something like:
> > 
> >   if (foo)
> > return x;
> >   {
> > ...
> >   }
> > 
> > would be confusing (it would look like a misplaced '{').  Do you prefer
> > if I put the scope outside 'if', i.e. directly for 'case'?
> 
> Or move the variable declarations up on top "ol school way". Well, if the
> language does not do it right...
> 
> > I suppose this avoids duplicating 'uio->uio_offset =3D 0'.  However, it's
> > also code copied from elsewhere so I'm not sure what the original
> > motivation was.
> 
> It is better style to do avoid the extra checks and duplication so I would
> also fix the original :-).
> 

I think I've done what you requested in v3 (now submitted in reply to
the top mail of the thread).  When it reaches the archive, I'm going to
try to find someone to help me test AVX-512 before I commit it.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


[PATCH v3 1/2] Fetch XSAVE area component offsets and sizes when initializing x86 CPU

2019-06-07 Thread Michał Górny
Introduce two new arrays, x86_xsave_offsets and x86_xsave_sizes,
and initialize them with XSAVE area component offsets and sizes queried
via CPUID.  This will be needed to implement getters and setters for
additional register types.

While at it, add XSAVE_* constants corresponding to specific XSAVE
components.
---
 sys/arch/x86/include/cpu.h|  2 ++
 sys/arch/x86/include/specialreg.h | 20 
 sys/arch/x86/x86/identcpu.c   | 12 
 sys/sys/param.h   |  2 +-
 4 files changed, 35 insertions(+), 1 deletion(-)

Changed in v3: bumped kernel version

diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h
index 143ae3c5c5ec..589f179ce758 100644
--- a/sys/arch/x86/include/cpu.h
+++ b/sys/arch/x86/include/cpu.h
@@ -459,6 +459,8 @@ extern int x86_fpu_save;
 #defineFPU_SAVE_XSAVEOPT   3
 extern unsigned int x86_fpu_save_size;
 extern uint64_t x86_xsave_features;
+extern size_t x86_xsave_offsets[];
+extern size_t x86_xsave_sizes[];
 extern uint32_t x86_fpu_mxcsr_mask;
 extern bool x86_fpu_eager;
 
diff --git a/sys/arch/x86/include/specialreg.h 
b/sys/arch/x86/include/specialreg.h
index 4f8c4cca6db7..1c0e8c972b07 100644
--- a/sys/arch/x86/include/specialreg.h
+++ b/sys/arch/x86/include/specialreg.h
@@ -146,6 +146,26 @@
 #define XCR0_FPU   (XCR0_X87 | XCR0_SSE | XCR0_YMM_Hi128 | \
 XCR0_Opmask | XCR0_ZMM_Hi256 | XCR0_Hi16_ZMM)
 
+/*
+ * XSAVE component indices.
+ */
+#define XSAVE_X87  0
+#define XSAVE_SSE  1
+#define XSAVE_YMM_Hi1282
+#define XSAVE_BNDREGS  3
+#define XSAVE_BNDCSR   4
+#define XSAVE_Opmask   5
+#define XSAVE_ZMM_Hi2566
+#define XSAVE_Hi16_ZMM 7
+#define XSAVE_PT   8
+#define XSAVE_PKRU 9
+#define XSAVE_HDC  10
+
+/*
+ * Highest XSAVE component enabled by XCR0_FPU.
+ */
+#define XSAVE_MAX_COMPONENT XSAVE_Hi16_ZMM
+
 /*
  * CPUID "features" bits
  */
diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c
index 9037fb2673fd..9714865bfb43 100644
--- a/sys/arch/x86/x86/identcpu.c
+++ b/sys/arch/x86/x86/identcpu.c
@@ -74,6 +74,8 @@ char cpu_brand_string[49];
 int x86_fpu_save __read_mostly;
 unsigned int x86_fpu_save_size __read_mostly = sizeof(struct save87);
 uint64_t x86_xsave_features __read_mostly = 0;
+size_t x86_xsave_offsets[XSAVE_MAX_COMPONENT] __read_mostly;
+size_t x86_xsave_sizes[XSAVE_MAX_COMPONENT] __read_mostly;
 
 /*
  * Note: these are just the ones that may not have a cpuid instruction.
@@ -755,6 +757,7 @@ static void
 cpu_probe_fpu(struct cpu_info *ci)
 {
u_int descs[4];
+   int i;
 
x86_fpu_eager = true;
x86_fpu_save = FPU_SAVE_FSAVE;
@@ -816,6 +819,15 @@ cpu_probe_fpu(struct cpu_info *ci)
x86_fpu_save_size = descs[2];
 
x86_xsave_features = (uint64_t)descs[3] << 32 | descs[0];
+
+   /* Get component offsets and sizes for the save area */
+   for (i = XSAVE_YMM_Hi128; i < __arraycount(x86_xsave_offsets); i++) {
+   if (x86_xsave_features & ((uint64_t)1 << i)) {
+   x86_cpuid2(0xd, i, descs);
+   x86_xsave_offsets[i] = descs[1];
+   x86_xsave_sizes[i] = descs[0];
+   }
+   }
 }
 
 void
diff --git a/sys/sys/param.h b/sys/sys/param.h
index c0c96d79fd49..59e22a240905 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -67,7 +67,7 @@
  * 2.99.9  (299000900)
  */
 
-#define__NetBSD_Version__  899004200   /* NetBSD 8.99.42 */
+#define__NetBSD_Version__  899004300   /* NetBSD 8.99.43 */
 
 #define __NetBSD_Prereq__(M,m,p) (M) * 1) + \
 (m) * 100) + (p) * 100) <= __NetBSD_Version__)
-- 
2.22.0.rc3



Re: [PATCH v2 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-06 Thread Michał Górny
On Wed, 2019-06-05 at 11:53 -0400, Christos Zoulas wrote:
> On Jun 5,  5:08pm, mgo...@gentoo.org (=?UTF-8?q?Micha=C5=82=20G=C3=B3rny?=) 
> wrote:
> -- Subject: [PATCH v2 2/2] Implement PT_GETXSTATE and PT_SETXSTATE
> 
> > Introduce two new ptrace() requests: PT_GETXSTATE and PT_SETXSTATE,
> > that provide access to the extended (and extensible) set of FPU
> > registers on amd64 and i386.  At the moment, this covers AVX (YMM)
> > and AVX-512 (ZMM, opmask) registers.  It can be easily extended
> > to cover further register types without breaking backwards
> > compatibility.
> > 
> > PT_GETXSTATE issues the XSAVE instruction with all kernel-supported
> > extended components enabled.  The data is copied into 'struct xstate'
> > (which -- unlike the XSAVE area itself -- has stable format
> > and offsets).
> > 
> > PT_SETXSTATE issues the XRSTOR instruction to restore the register
> > values from user-provided 'struct xstate'.  The function replaces only
> > the specific XSAVE components that are listed in 'xs_xstate_bv' field,
> > making it possible to issue partial updates.
> > 
> > Both syscalls take a 'struct iovec' pointer rather than a direct
> > argument.  This requires the caller to explicitly specify the buffer
> > size.  As a result, existing code will continue to work correctly
> > when the structure is extended (performing partial reads/updates).
> 
> LGTM:
> 
> - There is no need for '} else {' after return

I presume you mean the one in ptrace_machdep_dorequest()?  I suppose it
was there (the code is based on xmmregs in i386) to clearly scope
variables.

I think having something like:

  if (foo)
return x;
  {
...
  }

would be confusing (it would look like a misplaced '{').  Do you prefer
if I put the scope outside 'if', i.e. directly for 'case'?

> - Style says 'return a;' not 'return (a);'

Fixed.

> - While no early returns in 
>   +   if (kl < 0)
>   +   error = EINVAL;
>   and below?
> 

I suppose this avoids duplicating 'uio->uio_offset = 0'.  However, it's
also code copied from elsewhere so I'm not sure what the original
motivation was.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v2 1/2] Fetch XSAVE area component offsets and sizes when initializing x86 CPU

2019-06-06 Thread Michał Górny
On Wed, 2019-06-05 at 08:32 -0700, Paul Goyette wrote:
> Are there any userland programs that use cpu.h?  Or any kernel modules?
> 
> If so, these changes would create a change in the kernel API and would 
> therefore require a kernel version bump.
> 

I'm not really sure whether this breaks the API, so I'll bump
the version for a good measure.  Even if we aren't going to commit this
to 9.0, having the merge conflict will help me remember to update it.


-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


[PATCH v2 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-05 Thread Michał Górny
Introduce two new ptrace() requests: PT_GETXSTATE and PT_SETXSTATE,
that provide access to the extended (and extensible) set of FPU
registers on amd64 and i386.  At the moment, this covers AVX (YMM)
and AVX-512 (ZMM, opmask) registers.  It can be easily extended
to cover further register types without breaking backwards
compatibility.

PT_GETXSTATE issues the XSAVE instruction with all kernel-supported
extended components enabled.  The data is copied into 'struct xstate'
(which -- unlike the XSAVE area itself -- has stable format
and offsets).

PT_SETXSTATE issues the XRSTOR instruction to restore the register
values from user-provided 'struct xstate'.  The function replaces only
the specific XSAVE components that are listed in 'xs_xstate_bv' field,
making it possible to issue partial updates.

Both syscalls take a 'struct iovec' pointer rather than a direct
argument.  This requires the caller to explicitly specify the buffer
size.  As a result, existing code will continue to work correctly
when the structure is extended (performing partial reads/updates).

TODO:
- add more tests (zmm*),
- add compatibility with FXSAVE and FSAVE systems.
---

Changes in v2:
- updated for compat32 mapping,
- added ymm writing tests.

 sys/arch/amd64/amd64/netbsd32_machdep.c   |   2 +
 sys/arch/amd64/amd64/process_machdep.c| 137 +
 sys/arch/amd64/include/netbsd32_machdep.h |   2 +
 sys/arch/amd64/include/ptrace.h   |  23 +-
 sys/arch/i386/i386/process_machdep.c  |  97 +++
 sys/arch/i386/include/ptrace.h|  13 +-
 sys/arch/x86/include/cpu_extended_state.h |  52 ++
 sys/arch/x86/include/fpu.h|   4 +
 sys/arch/x86/x86/fpu.c| 120 
 tests/lib/libc/sys/t_ptrace_wait.c|   2 +
 tests/lib/libc/sys/t_ptrace_x86_wait.h| 688 +-
 11 files changed, 1135 insertions(+), 5 deletions(-)

diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c 
b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 81bf78f6ecc4..3e007c79761b 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -353,6 +353,8 @@ netbsd32_ptrace_translate_request(int req)
case PT32_SETDBREGS:return PT_SETDBREGS;
case PT32_SETSTEP:  return PT_SETSTEP;
case PT32_CLEARSTEP:return PT_CLEARSTEP;
+   case PT32_GETXSTATE:return PT_GETXSTATE;
+   case PT32_SETXSTATE:return PT_SETXSTATE;
default:return -1;
}
 }
diff --git a/sys/arch/amd64/amd64/process_machdep.c 
b/sys/arch/amd64/amd64/process_machdep.c
index c204556c9168..17c6328700c0 100644
--- a/sys/arch/amd64/amd64/process_machdep.c
+++ b/sys/arch/amd64/amd64/process_machdep.c
@@ -84,6 +84,9 @@ __KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.39 
2019/02/11 14:59:32 cherry
 #include 
 #include 
 
+#include 
+
+#include 
 #include 
 #include 
 #include 
@@ -288,3 +291,137 @@ process_set_pc(struct lwp *l, void *addr)
 
return 0;
 }
+
+#ifdef __HAVE_PTRACE_MACHDEP
+static int
+process_machdep_read_xstate(struct lwp *l, struct xstate *regs)
+{
+   return process_read_xstate(l, regs);
+}
+
+static int
+process_machdep_write_xstate(struct lwp *l, const struct xstate *regs)
+{
+   int error;
+
+   /*
+* Check for security violations.
+*/
+   error = process_verify_xstate(regs);
+   if (error != 0)
+   return error;
+
+   return process_write_xstate(l, regs);
+}
+
+int
+ptrace_machdep_dorequest(
+struct lwp *l,
+struct lwp *lt,
+int req,
+void *addr,
+int data
+)
+{
+   struct uio uio;
+   struct iovec iov;
+   int write = 0;
+
+   switch (req) {
+   case PT_SETXSTATE:
+   write = 1;
+
+   /* FALLTHROUGH */
+   case PT_GETXSTATE:
+   /* write = 0 done above. */
+   if (!process_machdep_validxstate(lt->l_proc))
+   return (EINVAL);
+   else {
+   struct vmspace *vm;
+   int error;
+
+   if (__predict_false(l->l_proc->p_flag & PK_32)) {
+   struct netbsd32_iovec *user_iov;
+   user_iov = (struct netbsd32_iovec*)addr;
+   iov.iov_base = 
NETBSD32PTR64(user_iov->iov_base);
+   iov.iov_len = user_iov->iov_len;
+   } else {
+   struct iovec *user_iov;
+   user_iov = (struct iovec*)addr;
+   iov.iov_base = user_iov->iov_base;
+   iov.iov_len = user_iov->iov_len;
+   }
+
+   error = proc_vmspace_getref(l->l_proc, &vm);
+   if (error)
+   return error;
+   if (iov.iov_len > sizeof(struct xstate))

[PATCH v2 1/2] Fetch XSAVE area component offsets and sizes when initializing x86 CPU

2019-06-05 Thread Michał Górny
Introduce two new arrays, x86_xsave_offsets and x86_xsave_sizes,
and initialize them with XSAVE area component offsets and sizes queried
via CPUID.  This will be needed to implement getters and setters for
additional register types.

While at it, add XSAVE_* constants corresponding to specific XSAVE
components.
---
 sys/arch/x86/include/cpu.h|  2 ++
 sys/arch/x86/include/specialreg.h | 20 
 sys/arch/x86/x86/identcpu.c   | 12 
 3 files changed, 34 insertions(+)

diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h
index 143ae3c5c5ec..589f179ce758 100644
--- a/sys/arch/x86/include/cpu.h
+++ b/sys/arch/x86/include/cpu.h
@@ -459,6 +459,8 @@ extern int x86_fpu_save;
 #defineFPU_SAVE_XSAVEOPT   3
 extern unsigned int x86_fpu_save_size;
 extern uint64_t x86_xsave_features;
+extern size_t x86_xsave_offsets[];
+extern size_t x86_xsave_sizes[];
 extern uint32_t x86_fpu_mxcsr_mask;
 extern bool x86_fpu_eager;
 
diff --git a/sys/arch/x86/include/specialreg.h 
b/sys/arch/x86/include/specialreg.h
index 4f8c4cca6db7..1c0e8c972b07 100644
--- a/sys/arch/x86/include/specialreg.h
+++ b/sys/arch/x86/include/specialreg.h
@@ -146,6 +146,26 @@
 #define XCR0_FPU   (XCR0_X87 | XCR0_SSE | XCR0_YMM_Hi128 | \
 XCR0_Opmask | XCR0_ZMM_Hi256 | XCR0_Hi16_ZMM)
 
+/*
+ * XSAVE component indices.
+ */
+#define XSAVE_X87  0
+#define XSAVE_SSE  1
+#define XSAVE_YMM_Hi1282
+#define XSAVE_BNDREGS  3
+#define XSAVE_BNDCSR   4
+#define XSAVE_Opmask   5
+#define XSAVE_ZMM_Hi2566
+#define XSAVE_Hi16_ZMM 7
+#define XSAVE_PT   8
+#define XSAVE_PKRU 9
+#define XSAVE_HDC  10
+
+/*
+ * Highest XSAVE component enabled by XCR0_FPU.
+ */
+#define XSAVE_MAX_COMPONENT XSAVE_Hi16_ZMM
+
 /*
  * CPUID "features" bits
  */
diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c
index 9037fb2673fd..9714865bfb43 100644
--- a/sys/arch/x86/x86/identcpu.c
+++ b/sys/arch/x86/x86/identcpu.c
@@ -74,6 +74,8 @@ char cpu_brand_string[49];
 int x86_fpu_save __read_mostly;
 unsigned int x86_fpu_save_size __read_mostly = sizeof(struct save87);
 uint64_t x86_xsave_features __read_mostly = 0;
+size_t x86_xsave_offsets[XSAVE_MAX_COMPONENT] __read_mostly;
+size_t x86_xsave_sizes[XSAVE_MAX_COMPONENT] __read_mostly;
 
 /*
  * Note: these are just the ones that may not have a cpuid instruction.
@@ -755,6 +757,7 @@ static void
 cpu_probe_fpu(struct cpu_info *ci)
 {
u_int descs[4];
+   int i;
 
x86_fpu_eager = true;
x86_fpu_save = FPU_SAVE_FSAVE;
@@ -816,6 +819,15 @@ cpu_probe_fpu(struct cpu_info *ci)
x86_fpu_save_size = descs[2];
 
x86_xsave_features = (uint64_t)descs[3] << 32 | descs[0];
+
+   /* Get component offsets and sizes for the save area */
+   for (i = XSAVE_YMM_Hi128; i < __arraycount(x86_xsave_offsets); i++) {
+   if (x86_xsave_features & ((uint64_t)1 << i)) {
+   x86_cpuid2(0xd, i, descs);
+   x86_xsave_offsets[i] = descs[1];
+   x86_xsave_sizes[i] = descs[0];
+   }
+   }
 }
 
 void
-- 
2.22.0.rc3



[PATCH 2/2] Implement PT_GETXSTATE and PT_SETXSTATE

2019-06-04 Thread Michał Górny
Introduce two new ptrace() requests: PT_GETXSTATE and PT_SETXSTATE,
that provide access to the extended (and extensible) set of FPU
registers on amd64 and i386.  At the moment, this covers AVX (YMM)
and AVX-512 (ZMM, opmask) registers.  It can be easily extended
to cover further register types without breaking backwards
compatibility.

PT_GETXSTATE issues the XSAVE instruction with all kernel-supported
extended components enabled.  The data is copied into 'struct xstate'
(which -- unlike the XSAVE area itself -- has stable format
and offsets).

PT_SETXSTATE issues the XRSTOR instruction to restore the register
values from user-provided 'struct xstate'.  The function replaces only
the specific XSAVE components that are listed in 'xs_xstate_bv' field,
making it possible to issue partial updates.

Both syscalls take a 'struct iovec' pointer rather than a direct
argument.  This requires the caller to explicitly specify the buffer
size.  As a result, existing code will continue to work correctly
when the structure is extended (performing partial reads/updates).

TODO:
- add more tests (ymm write, zmm*),
- update for compat32 PT_* mapping once that patch is merged,
- add compatibility with FXSAVE and FSAVE systems.
---
 sys/arch/amd64/amd64/process_machdep.c| 137 +++
 sys/arch/amd64/include/ptrace.h   |  27 +-
 sys/arch/i386/i386/process_machdep.c  |  97 +
 sys/arch/i386/include/ptrace.h|  13 +-
 sys/arch/x86/include/cpu_extended_state.h |  52 +++
 sys/arch/x86/include/fpu.h|   4 +
 sys/arch/x86/x86/fpu.c| 120 ++
 tests/lib/libc/sys/t_ptrace_wait.c|   2 +
 tests/lib/libc/sys/t_ptrace_x86_wait.h| 448 +-
 9 files changed, 895 insertions(+), 5 deletions(-)

diff --git a/sys/arch/amd64/amd64/process_machdep.c 
b/sys/arch/amd64/amd64/process_machdep.c
index c204556c9168..17c6328700c0 100644
--- a/sys/arch/amd64/amd64/process_machdep.c
+++ b/sys/arch/amd64/amd64/process_machdep.c
@@ -84,6 +84,9 @@ __KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.39 
2019/02/11 14:59:32 cherry
 #include 
 #include 
 
+#include 
+
+#include 
 #include 
 #include 
 #include 
@@ -288,3 +291,137 @@ process_set_pc(struct lwp *l, void *addr)
 
return 0;
 }
+
+#ifdef __HAVE_PTRACE_MACHDEP
+static int
+process_machdep_read_xstate(struct lwp *l, struct xstate *regs)
+{
+   return process_read_xstate(l, regs);
+}
+
+static int
+process_machdep_write_xstate(struct lwp *l, const struct xstate *regs)
+{
+   int error;
+
+   /*
+* Check for security violations.
+*/
+   error = process_verify_xstate(regs);
+   if (error != 0)
+   return error;
+
+   return process_write_xstate(l, regs);
+}
+
+int
+ptrace_machdep_dorequest(
+struct lwp *l,
+struct lwp *lt,
+int req,
+void *addr,
+int data
+)
+{
+   struct uio uio;
+   struct iovec iov;
+   int write = 0;
+
+   switch (req) {
+   case PT_SETXSTATE:
+   write = 1;
+
+   /* FALLTHROUGH */
+   case PT_GETXSTATE:
+   /* write = 0 done above. */
+   if (!process_machdep_validxstate(lt->l_proc))
+   return (EINVAL);
+   else {
+   struct vmspace *vm;
+   int error;
+
+   if (__predict_false(l->l_proc->p_flag & PK_32)) {
+   struct netbsd32_iovec *user_iov;
+   user_iov = (struct netbsd32_iovec*)addr;
+   iov.iov_base = 
NETBSD32PTR64(user_iov->iov_base);
+   iov.iov_len = user_iov->iov_len;
+   } else {
+   struct iovec *user_iov;
+   user_iov = (struct iovec*)addr;
+   iov.iov_base = user_iov->iov_base;
+   iov.iov_len = user_iov->iov_len;
+   }
+
+   error = proc_vmspace_getref(l->l_proc, &vm);
+   if (error)
+   return error;
+   if (iov.iov_len > sizeof(struct xstate))
+   iov.iov_len = sizeof(struct xstate);
+   uio.uio_iov = &iov;
+   uio.uio_iovcnt = 1;
+   uio.uio_offset = 0;
+   uio.uio_resid = iov.iov_len;
+   uio.uio_rw = write ? UIO_WRITE : UIO_READ;
+   uio.uio_vmspace = vm;
+   error = process_machdep_doxstate(l, lt, &uio);
+   uvmspace_free(vm);
+   return error;
+   }
+   }
+
+#ifdef DIAGNOSTIC
+   panic("ptrace_machdep: impossible");
+#endif
+
+   return (0);
+}
+
+/*
+ * The following functions are used by both ptrace(2) and procfs.
+ */
+
+int
+process_machdep_doxstate(s

[PATCH 0/2] PT_{GET,SET}XSTATE implementation, WIP v1

2019-06-04 Thread Michał Górny
Hi,

Here's my first complete version of PT_GETXSTATE/PT_SETXSTATE impl.
It's still WIP but it's functionally complete for XSAVE systems,
and it works correctly on i386 and amd64.

It follows variant b from [1], simply because my previous work was
easier to convert to this variant.  If you really believe we should
be going for variant c instead, I can update the proposal but I honestly
see no gain from multiplicating ptrace requests over necessity.

Patches sent as replies (git format).  More info in patch bodies.

[1] http://mail-index.netbsd.org/tech-kern/2019/05/28/msg025062.html

--
Best regards,
Michał Górny


Michał Górny (2):
  Fetch XSAVE area component offsets and sizes when initializing x86 CPU
  Implement PT_GETXSTATE and PT_SETXSTATE

 sys/arch/amd64/amd64/process_machdep.c| 137 +++
 sys/arch/amd64/include/ptrace.h   |  27 +-
 sys/arch/i386/i386/process_machdep.c  |  97 +
 sys/arch/i386/include/ptrace.h|  13 +-
 sys/arch/x86/include/cpu.h|   2 +
 sys/arch/x86/include/cpu_extended_state.h |  52 +++
 sys/arch/x86/include/fpu.h|   4 +
 sys/arch/x86/include/specialreg.h |  20 +
 sys/arch/x86/x86/fpu.c| 120 ++
 sys/arch/x86/x86/identcpu.c   |  12 +
 tests/lib/libc/sys/t_ptrace_wait.c|   2 +
 tests/lib/libc/sys/t_ptrace_x86_wait.h| 448 +-
 12 files changed, 929 insertions(+), 5 deletions(-)

-- 
2.22.0.rc2



[PATCH 1/2] Fetch XSAVE area component offsets and sizes when initializing x86 CPU

2019-06-04 Thread Michał Górny
Introduce two new arrays, x86_xsave_offsets and x86_xsave_sizes,
and initialize them with XSAVE area component offsets and sizes queried
via CPUID.  This will be needed to implement getters and setters for
additional register types.

While at it, add XSAVE_* constants corresponding to specific XSAVE
components.
---
 sys/arch/x86/include/cpu.h|  2 ++
 sys/arch/x86/include/specialreg.h | 20 
 sys/arch/x86/x86/identcpu.c   | 12 
 3 files changed, 34 insertions(+)

diff --git a/sys/arch/x86/include/cpu.h b/sys/arch/x86/include/cpu.h
index 143ae3c5c5ec..589f179ce758 100644
--- a/sys/arch/x86/include/cpu.h
+++ b/sys/arch/x86/include/cpu.h
@@ -459,6 +459,8 @@ extern int x86_fpu_save;
 #defineFPU_SAVE_XSAVEOPT   3
 extern unsigned int x86_fpu_save_size;
 extern uint64_t x86_xsave_features;
+extern size_t x86_xsave_offsets[];
+extern size_t x86_xsave_sizes[];
 extern uint32_t x86_fpu_mxcsr_mask;
 extern bool x86_fpu_eager;
 
diff --git a/sys/arch/x86/include/specialreg.h 
b/sys/arch/x86/include/specialreg.h
index 4f8c4cca6db7..1c0e8c972b07 100644
--- a/sys/arch/x86/include/specialreg.h
+++ b/sys/arch/x86/include/specialreg.h
@@ -146,6 +146,26 @@
 #define XCR0_FPU   (XCR0_X87 | XCR0_SSE | XCR0_YMM_Hi128 | \
 XCR0_Opmask | XCR0_ZMM_Hi256 | XCR0_Hi16_ZMM)
 
+/*
+ * XSAVE component indices.
+ */
+#define XSAVE_X87  0
+#define XSAVE_SSE  1
+#define XSAVE_YMM_Hi1282
+#define XSAVE_BNDREGS  3
+#define XSAVE_BNDCSR   4
+#define XSAVE_Opmask   5
+#define XSAVE_ZMM_Hi2566
+#define XSAVE_Hi16_ZMM 7
+#define XSAVE_PT   8
+#define XSAVE_PKRU 9
+#define XSAVE_HDC  10
+
+/*
+ * Highest XSAVE component enabled by XCR0_FPU.
+ */
+#define XSAVE_MAX_COMPONENT XSAVE_Hi16_ZMM
+
 /*
  * CPUID "features" bits
  */
diff --git a/sys/arch/x86/x86/identcpu.c b/sys/arch/x86/x86/identcpu.c
index 9037fb2673fd..9714865bfb43 100644
--- a/sys/arch/x86/x86/identcpu.c
+++ b/sys/arch/x86/x86/identcpu.c
@@ -74,6 +74,8 @@ char cpu_brand_string[49];
 int x86_fpu_save __read_mostly;
 unsigned int x86_fpu_save_size __read_mostly = sizeof(struct save87);
 uint64_t x86_xsave_features __read_mostly = 0;
+size_t x86_xsave_offsets[XSAVE_MAX_COMPONENT] __read_mostly;
+size_t x86_xsave_sizes[XSAVE_MAX_COMPONENT] __read_mostly;
 
 /*
  * Note: these are just the ones that may not have a cpuid instruction.
@@ -755,6 +757,7 @@ static void
 cpu_probe_fpu(struct cpu_info *ci)
 {
u_int descs[4];
+   int i;
 
x86_fpu_eager = true;
x86_fpu_save = FPU_SAVE_FSAVE;
@@ -816,6 +819,15 @@ cpu_probe_fpu(struct cpu_info *ci)
x86_fpu_save_size = descs[2];
 
x86_xsave_features = (uint64_t)descs[3] << 32 | descs[0];
+
+   /* Get component offsets and sizes for the save area */
+   for (i = XSAVE_YMM_Hi128; i < __arraycount(x86_xsave_offsets); i++) {
+   if (x86_xsave_features & ((uint64_t)1 << i)) {
+   x86_cpuid2(0xd, i, descs);
+   x86_xsave_offsets[i] = descs[1];
+   x86_xsave_sizes[i] = descs[0];
+   }
+   }
 }
 
 void
-- 
2.22.0.rc2



[PATCH v3 2/2] compat32: Implement PT_GETDBREGS and PT_SETDBREGS

2019-05-31 Thread Michał Górny
Uncomment and improve the implementation of compat32 support for
PT_GETDBREGS and PT_SETDBREGS requests.

The new implementation uses x86_dbregs_read() and x86_dbregs_write()
function instead of accessing pcb directly.  While this might be
a little slower, it guarantees that the needed pcb field is allocated
correctly.

Furthermore, the code introduces necessary sanity checks
for PT_SETDBREGS arguments.
---
 sys/arch/amd64/amd64/netbsd32_machdep.c | 52 +
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c 
b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 1b895e378c1e..d6b07ed6c0e0 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -395,23 +395,19 @@ netbsd32_process_read_fpregs(struct lwp *l, struct 
fpreg32 *regs, size_t *sz)
 int
 netbsd32_process_read_dbregs(struct lwp *l, struct dbreg32 *regs, size_t *sz)
 {
-#if notyet
-   struct pcb *pcb;
-
-   pcb = lwp_getpcb(l);
+   struct dbreg regs64;
 
-   regs->dr[0] = pcb->pcb_dbregs->dr[0] & 0x;
-   regs->dr[1] = pcb->pcb_dbregs->dr[1] & 0x;
-   regs->dr[2] = pcb->pcb_dbregs->dr[2] & 0x;
-   regs->dr[3] = pcb->pcb_dbregs->dr[3] & 0x;
+   x86_dbregs_read(l, ®s64);
+   memset(regs, 0, sizeof(*regs));
+   regs->dr[0] = regs64.dr[0] & 0x;
+   regs->dr[1] = regs64.dr[1] & 0x;
+   regs->dr[2] = regs64.dr[2] & 0x;
+   regs->dr[3] = regs64.dr[3] & 0x;
 
-   regs->dr[6] = pcb->pcb_dbregs->dr[6] & 0x;
-   regs->dr[7] = pcb->pcb_dbregs->dr[7] & 0x;
+   regs->dr[6] = regs64.dr[6] & 0x;
+   regs->dr[7] = regs64.dr[7] & 0x;
 
return 0;
-#else
-   return ENOTSUP;
-#endif
 }
 
 int
@@ -478,23 +474,29 @@ int
 netbsd32_process_write_dbregs(struct lwp *l, const struct dbreg32 *regs,
 size_t sz)
 {
-#if notyet
-   struct pcb *pcb;
+   size_t i;
+   struct dbreg regs64;
 
-   pcb = lwp_getpcb(l);
+   /* Check that DR0-DR3 contain user-space address */
+   for (i = 0; i < X86_DBREGS; i++) {
+   if (regs->dr[i] >= VM_MAXUSER_ADDRESS32)
+   return EINVAL;
+   }
 
-   pcb->pcb_dbregs->dr[0] = regs->dr[0];
-   pcb->pcb_dbregs->dr[1] = regs->dr[1];
-   pcb->pcb_dbregs->dr[2] = regs->dr[2];
-   pcb->pcb_dbregs->dr[3] = regs->dr[3];
+   if (regs->dr[7] & X86_DR7_GENERAL_DETECT_ENABLE) {
+   return EINVAL;
+   }
 
-   pcb->pcb_dbregs->dr[6] = regs->dr[6];
-   pcb->pcb_dbregs->dr[7] = regs->dr[7];
+   regs64.dr[0] = regs->dr[0];
+   regs64.dr[1] = regs->dr[1];
+   regs64.dr[2] = regs->dr[2];
+   regs64.dr[3] = regs->dr[3];
 
+   regs64.dr[6] = regs->dr[6];
+   regs64.dr[7] = regs->dr[7];
+
+   x86_dbregs_write(l, ®s64);
return 0;
-#else
-   return ENOTSUP;
-#endif
 }
 
 int
-- 
2.22.0.rc1



[PATCH v3 1/2] compat32: translate userland PT_* request values into kernel

2019-05-31 Thread Michał Górny
Currently, the compat32 passes PT_* request values to kernel functions
without translation.  This works fine for low PT_* requests that happen
to have the same values both on i386 and amd64.  However, for requests
higher than PT_SETFPREGS, the value passed from userland (matching i386
const) does not match the correct kernel (amd64) request.  As a result,
e.g. when compat32 process calls PT_GETDBREGS, kernel actually processes
it as PT_SETSTEP.

To resolve this, introduce support for compat32 PT_* request
translation.  The interface is based on PTRACE_TRANSLATE_REQUEST32 macro
that is defined to a mapping function on architectures needing it.
In case of amd64, this function maps userland i386 PT_* values into
appropriate amd64 PT_* values.

For the time being, the two additional PT_GETXMMREGS and PT_SETXMMREGS
requests are unsupported due to lack of matching free amd64 constant.
---
 sys/arch/amd64/amd64/netbsd32_machdep.c   | 22 ++
 sys/arch/amd64/include/netbsd32_machdep.h | 19 +++
 sys/arch/amd64/include/ptrace.h   |  2 ++
 sys/compat/netbsd32/netbsd32_ptrace.c | 12 +++-
 4 files changed, 54 insertions(+), 1 deletion(-)

Changes in v3:
after all, I went for defining PT32_* constants.  This should make it
easier to sync them with i386/.

diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c 
b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 39d671a4b9b6..1b895e378c1e 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -335,6 +335,28 @@ cpu_coredump32(struct lwp *l, struct coredump_iostate 
*iocookie,
 }
 #endif
 
+int
+netbsd32_ptrace_translate_request(int req)
+{
+
+   switch (req)
+   {
+   case 0 ... PT_FIRSTMACH - 1:return req;
+   case PT32_STEP: return PT_STEP;
+   case PT32_GETREGS:  return PT_GETREGS;
+   case PT32_SETREGS:  return PT_SETREGS;
+   case PT32_GETFPREGS:return PT_GETFPREGS;
+   case PT32_SETFPREGS:return PT_SETFPREGS;
+   case PT32_GETXMMREGS:   return -1;
+   case PT32_SETXMMREGS:   return -1;
+   case PT32_GETDBREGS:return PT_GETDBREGS;
+   case PT32_SETDBREGS:return PT_SETDBREGS;
+   case PT32_SETSTEP:  return PT_SETSTEP;
+   case PT32_CLEARSTEP:return PT_CLEARSTEP;
+   default:return -1;
+   }
+}
+
 int
 netbsd32_process_read_regs(struct lwp *l, struct reg32 *regs)
 {
diff --git a/sys/arch/amd64/include/netbsd32_machdep.h 
b/sys/arch/amd64/include/netbsd32_machdep.h
index 51691eb7c90f..dd66e0b8962e 100644
--- a/sys/arch/amd64/include/netbsd32_machdep.h
+++ b/sys/arch/amd64/include/netbsd32_machdep.h
@@ -7,6 +7,22 @@
 #include 
 #include 
 
+/*
+ * i386 ptrace constants
+ * Please keep in sync with sys/arch/i386/include/ptrace.h.
+ */
+#definePT32_STEP   (PT_FIRSTMACH + 0)
+#definePT32_GETREGS(PT_FIRSTMACH + 1)
+#definePT32_SETREGS(PT_FIRSTMACH + 2)
+#definePT32_GETFPREGS  (PT_FIRSTMACH + 3)
+#definePT32_SETFPREGS  (PT_FIRSTMACH + 4)
+#definePT32_GETXMMREGS (PT_FIRSTMACH + 5)
+#definePT32_SETXMMREGS (PT_FIRSTMACH + 6)
+#definePT32_GETDBREGS  (PT_FIRSTMACH + 7)
+#definePT32_SETDBREGS  (PT_FIRSTMACH + 8)
+#definePT32_SETSTEP(PT_FIRSTMACH + 9)
+#definePT32_CLEARSTEP  (PT_FIRSTMACH + 10)
+
 #define NETBSD32_POINTER_TYPE uint32_t
 typedefstruct { NETBSD32_POINTER_TYPE i32; } netbsd32_pointer_t;
 
@@ -151,6 +167,9 @@ struct x86_64_set_mtrr_args32 {
 
 #define NETBSD32_MID_MACHINE MID_I386
 
+/* Translate ptrace() PT_* request from 32-bit userland to kernel. */
+int netbsd32_ptrace_translate_request(int);
+
 int netbsd32_process_read_regs(struct lwp *, struct reg32 *);
 int netbsd32_process_read_fpregs(struct lwp *, struct fpreg32 *, size_t *);
 int netbsd32_process_read_dbregs(struct lwp *, struct dbreg32 *, size_t *);
diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h
index c5cef941fd16..8d0f54dbad24 100644
--- a/sys/arch/amd64/include/ptrace.h
+++ b/sys/arch/amd64/include/ptrace.h
@@ -87,6 +87,8 @@
 #define process_reg32  struct reg32
 #define process_fpreg32struct fpreg32
 #define process_dbreg32struct dbreg32
+
+#define PTRACE_TRANSLATE_REQUEST32(x) netbsd32_ptrace_translate_request(x)
 #endif /* COMPAT_NETBSD32 */
 #endif /* _KERNEL_OPT */
 
diff --git a/sys/compat/netbsd32/netbsd32_ptrace.c 
b/sys/compat/netbsd32/netbsd32_ptrace.c
index 614d363c3126..ff5025ceea44 100644
--- a/sys/compat/netbsd32/netbsd32_ptrace.c
+++ b/sys/compat/netbsd32/netbsd32_ptrace.c
@@ -47,6 +47,10 @@ __KERNEL_RCSID(0, "$NetBSD: netbsd32_ptrace.c,v 1.6 
2019/01/27 02:08:40 pgoyette
 #include 
 #include 
 
+#ifndef PTRACE_TRANS

[PATCH v2 2/2] compat32: Implement PT_GETDBREGS and PT_SETDBREGS

2019-05-30 Thread Michał Górny
Uncomment and improve the implementation of compat32 support for
PT_GETDBREGS and PT_SETDBREGS requests.

The new implementation uses x86_dbregs_read() and x86_dbregs_write()
function instead of accessing pcb directly.  While this might be
a little slower, it guarantees that the needed pcb field is allocated
correctly.

Furthermore, the code introduces necessary sanity checks
for PT_SETDBREGS arguments.
---
 sys/arch/amd64/amd64/netbsd32_machdep.c | 52 +
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c 
b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 06f6cbdf3703..31940f0af146 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -410,23 +410,19 @@ netbsd32_process_read_fpregs(struct lwp *l, struct 
fpreg32 *regs, size_t *sz)
 int
 netbsd32_process_read_dbregs(struct lwp *l, struct dbreg32 *regs, size_t *sz)
 {
-#if notyet
-   struct pcb *pcb;
-
-   pcb = lwp_getpcb(l);
+   struct dbreg regs64;
 
-   regs->dr[0] = pcb->pcb_dbregs->dr[0] & 0x;
-   regs->dr[1] = pcb->pcb_dbregs->dr[1] & 0x;
-   regs->dr[2] = pcb->pcb_dbregs->dr[2] & 0x;
-   regs->dr[3] = pcb->pcb_dbregs->dr[3] & 0x;
+   x86_dbregs_read(l, ®s64);
+   memset(regs, 0, sizeof(*regs));
+   regs->dr[0] = regs64.dr[0] & 0x;
+   regs->dr[1] = regs64.dr[1] & 0x;
+   regs->dr[2] = regs64.dr[2] & 0x;
+   regs->dr[3] = regs64.dr[3] & 0x;
 
-   regs->dr[6] = pcb->pcb_dbregs->dr[6] & 0x;
-   regs->dr[7] = pcb->pcb_dbregs->dr[7] & 0x;
+   regs->dr[6] = regs64.dr[6] & 0x;
+   regs->dr[7] = regs64.dr[7] & 0x;
 
return 0;
-#else
-   return ENOTSUP;
-#endif
 }
 
 int
@@ -493,23 +489,29 @@ int
 netbsd32_process_write_dbregs(struct lwp *l, const struct dbreg32 *regs,
 size_t sz)
 {
-#if notyet
-   struct pcb *pcb;
+   size_t i;
+   struct dbreg regs64;
 
-   pcb = lwp_getpcb(l);
+   /* Check that DR0-DR3 contain user-space address */
+   for (i = 0; i < X86_DBREGS; i++) {
+   if (regs->dr[i] >= VM_MAXUSER_ADDRESS32)
+   return EINVAL;
+   }
 
-   pcb->pcb_dbregs->dr[0] = regs->dr[0];
-   pcb->pcb_dbregs->dr[1] = regs->dr[1];
-   pcb->pcb_dbregs->dr[2] = regs->dr[2];
-   pcb->pcb_dbregs->dr[3] = regs->dr[3];
+   if (regs->dr[7] & X86_DR7_GENERAL_DETECT_ENABLE) {
+   return EINVAL;
+   }
 
-   pcb->pcb_dbregs->dr[6] = regs->dr[6];
-   pcb->pcb_dbregs->dr[7] = regs->dr[7];
+   regs64.dr[0] = regs->dr[0];
+   regs64.dr[1] = regs->dr[1];
+   regs64.dr[2] = regs->dr[2];
+   regs64.dr[3] = regs->dr[3];
 
+   regs64.dr[6] = regs->dr[6];
+   regs64.dr[7] = regs->dr[7];
+
+   x86_dbregs_write(l, ®s64);
return 0;
-#else
-   return ENOTSUP;
-#endif
 }
 
 int
-- 
2.22.0.rc1



[PATCH v2 1/2] compat32: translate userland PT_* request values into kernel

2019-05-30 Thread Michał Górny
Currently, the compat32 passes PT_* request values to kernel functions
without translation.  This works fine for low PT_* requests that happen
to have the same values both on i386 and amd64.  However, for requests
higher than PT_SETFPREGS, the value passed from userland (matching i386
const) does not match the correct kernel (amd64) request.  As a result,
e.g. when compat32 process calls PT_GETDBREGS, kernel actually processes
it as PT_SETSTEP.

To resolve this, introduce support for compat32 PT_* request
translation.  The interface is based on PTRACE_TRANSLATE_REQUEST32 macro
that is defined to a mapping function on architectures needing it.
In case of amd64, this function maps userland i386 PT_* values into
appropriate amd64 PT_* values.

For the time being, the two additional PT_GETXMMREGS and PT_SETXMMREGS
requests are unsupported due to lack of matching free amd64 constant.
---
 sys/arch/amd64/amd64/netbsd32_machdep.c   | 37 +++
 sys/arch/amd64/include/netbsd32_machdep.h |  3 ++
 sys/arch/amd64/include/ptrace.h   |  2 ++
 sys/compat/netbsd32/netbsd32_ptrace.c | 12 +++-
 4 files changed, 53 insertions(+), 1 deletion(-)

Changed in v2:
- PTRACE_TRANSLATE_REQUEST32(x) fallback definition has been moved
  from ptrace.h to netbsd32_ptrace.c

diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c 
b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 39d671a4b9b6..06f6cbdf3703 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -335,6 +335,43 @@ cpu_coredump32(struct lwp *l, struct coredump_iostate 
*iocookie,
 }
 #endif
 
+int
+netbsd32_ptrace_translate_request(int req)
+{
+
+   switch (req)
+   {
+   case 0 ... PT_FIRSTMACH - 1:
+   return req;
+   case PT_FIRSTMACH + 0:
+   return PT_STEP;
+   case PT_FIRSTMACH + 1:
+   return PT_GETREGS;
+   case PT_FIRSTMACH + 2:
+   return PT_SETREGS;
+   case PT_FIRSTMACH + 3:
+   return PT_GETFPREGS;
+   case PT_FIRSTMACH + 4:
+   return PT_SETFPREGS;
+   case PT_FIRSTMACH + 5:
+   /* PT_GETXMMREGS */
+   return -1;
+   case PT_FIRSTMACH + 6:
+   /* PT_SETXMMREGS */
+   return -1;
+   case PT_FIRSTMACH + 7:
+   return PT_GETDBREGS;
+   case PT_FIRSTMACH + 8:
+   return PT_SETDBREGS;
+   case PT_FIRSTMACH + 9:
+   return PT_SETSTEP;
+   case PT_FIRSTMACH + 10:
+   return PT_CLEARSTEP;
+   default:
+   return -1;
+   }
+}
+
 int
 netbsd32_process_read_regs(struct lwp *l, struct reg32 *regs)
 {
diff --git a/sys/arch/amd64/include/netbsd32_machdep.h 
b/sys/arch/amd64/include/netbsd32_machdep.h
index 51691eb7c90f..37019d29134d 100644
--- a/sys/arch/amd64/include/netbsd32_machdep.h
+++ b/sys/arch/amd64/include/netbsd32_machdep.h
@@ -151,6 +151,9 @@ struct x86_64_set_mtrr_args32 {
 
 #define NETBSD32_MID_MACHINE MID_I386
 
+/* Translate ptrace() PT_* request from 32-bit userland to kernel. */
+int netbsd32_ptrace_translate_request(int);
+
 int netbsd32_process_read_regs(struct lwp *, struct reg32 *);
 int netbsd32_process_read_fpregs(struct lwp *, struct fpreg32 *, size_t *);
 int netbsd32_process_read_dbregs(struct lwp *, struct dbreg32 *, size_t *);
diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h
index c5cef941fd16..8d0f54dbad24 100644
--- a/sys/arch/amd64/include/ptrace.h
+++ b/sys/arch/amd64/include/ptrace.h
@@ -87,6 +87,8 @@
 #define process_reg32  struct reg32
 #define process_fpreg32struct fpreg32
 #define process_dbreg32struct dbreg32
+
+#define PTRACE_TRANSLATE_REQUEST32(x) netbsd32_ptrace_translate_request(x)
 #endif /* COMPAT_NETBSD32 */
 #endif /* _KERNEL_OPT */
 
diff --git a/sys/compat/netbsd32/netbsd32_ptrace.c 
b/sys/compat/netbsd32/netbsd32_ptrace.c
index 614d363c3126..ff5025ceea44 100644
--- a/sys/compat/netbsd32/netbsd32_ptrace.c
+++ b/sys/compat/netbsd32/netbsd32_ptrace.c
@@ -47,6 +47,10 @@ __KERNEL_RCSID(0, "$NetBSD: netbsd32_ptrace.c,v 1.6 
2019/01/27 02:08:40 pgoyette
 #include 
 #include 
 
+#ifndef PTRACE_TRANSLATE_REQUEST32
+#define PTRACE_TRANSLATE_REQUEST32(x) x
+#endif
+
 /*
  * PTRACE methods
  */
@@ -243,6 +247,8 @@ int
 netbsd32_ptrace(struct lwp *l, const struct netbsd32_ptrace_args *uap,
 register_t *retval)
 {
+   int req;
+
/* {
syscallarg(int) req;
syscallarg(pid_t) pid;
@@ -250,7 +256,11 @@ netbsd32_ptrace(struct lwp *l, const struct 
netbsd32_ptrace_args *uap,
syscallarg(int) data;
} */
 
-   return do_ptrace(&netbsd32_ptm, l, SCARG(uap, req), SCARG(uap, pid),
+   req = PTRACE_TRANSLATE_REQUEST32(SCARG(uap, req));
+   if (req == -1)
+   return EOPNOTSUPP;
+
+   return do_ptrace(&netbsd32_ptm, l, req, SCARG(uap, pid),
SCARG_P32(uap, addr), 

Re: [PATCH 1/2] compat32: translate userland PT_* request values into kernel

2019-05-29 Thread Michał Górny
On Thu, 2019-05-30 at 03:15 +1000, matthew green wrote:
> thanks for working on this.
> 
> > +   case PT_FIRSTMACH + 0:
> > +   return PT_STEP;
> > +   case PT_FIRSTMACH + 1:
> > +   return PT_GETREGS;
> [ ... ]
> 
> these magic numbers are a little ugly.  can you avoid them?

I know they are, however I don't think it's really possible.  One option
is to declare additional set of constants in the amd64 headers but that
doesn't really gain us anything, and ends up in kinda-absurd:

  case PT32_STEP: return PT_STEP;

or alike.

> is there a way to have amd64 have direct access to the i386
> values?

I don't think so.

> > --- a/sys/sys/ptrace.h
> > +++ b/sys/sys/ptrace.h
> > @@ -283,6 +283,10 @@ intptrace_machdep_dorequest(struct lwp *, struct 
> > lwp *, int,
> >  #define FIX_SSTEP(p)
> >  #endif
> >  
> > +#ifndef PTRACE_TRANSLATE_REQUEST32
> > +#define PTRACE_TRANSLATE_REQUEST32(x) x
> > +#endif
> > +
> 
> can this part live in sys/compat(/netbsd32)?  no need for the
> public ptrace header to get this, right?
> 

I'll try and see.

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


Re: [RFC] Design considerations for XSAVE Extended Area getters/setters

2019-05-28 Thread Michał Górny
On Tue, 2019-05-28 at 20:08 +0200, Martin Husemann wrote:
> On Tue, May 28, 2019 at 07:50:34PM +0200, Micha? Górny wrote:
> > Well, if we are only to consider new registers, then we're talking about
> > 16 'pure' ymm registers + 32 zmm registers + 8 kN registers + 1 state
> > register, multiply by two... 114 PT_* requests?
> 
> Integers are plenty, but the core file format issue makes this aproach
> unusable anyway.
> 
> Still I think we should not create too many random processor register groups.
> 
> We already have very strange ones (XMMREGS and VECREGS). Maybe we should just
> have one ALLREGS thing (identical to the core note) and then discuss how
> to properly make that sanely versioned and self describing?
> 

That is somewhat the idea of option b., and what Intel&co seem to be
aiming it.  With the exception of the two old register groups being left
separate, PT_*XSTATE would cover everything else.


-- 
Best regards,
Michał Górny



Re: [RFC] Design considerations for XSAVE Extended Area getters/setters

2019-05-28 Thread Michał Górny
On Tue, 2019-05-28 at 19:37 +0200, Martin Husemann wrote:
> Stupid question: since this is all very rare and non-performance critical,
> why isn't it done as a single register per call? Adding more registers
> when thy arrive in newer cpu variants, and not worrying about how they
> are saved (XSAVE or similar) nor what format is used in the kernel?
> 
> So a debugger would do a single PT_GETREG_$REGISTER call from userland
> for each register it is interested in. The current state with register
> groups (regs, fpregs, dbregs, xmmmregs, vecregs, ...) looks like a hack
> to me.
> 
> The first two make some sense (across various cpu architectures), and
> maybe performance and debug registers could be a third reasonable group
> (especially given the security implications), but everything else is
> arbitrary and may just as well be individual registers.
> 

Well, if we are only to consider new registers, then we're talking about
16 'pure' ymm registers + 32 zmm registers + 8 kN registers + 1 state
register, multiply by two... 114 PT_* requests?

-- 
Best regards,
Michał Górny



Re: [RFC] Design considerations for XSAVE Extended Area getters/setters

2019-05-28 Thread Michał Górny
On Tue, 2019-05-28 at 19:26 +0200, Kamil Rytarowski wrote:
> On 28.05.2019 18:34, Michał Górny wrote:
> > There is no difference in internal layout or logic between b. and c.
> > In either case, we need to perform XSAVE, process it and copy the data
> > into internal structure.  The only difference is that in b. we handle it
> > all in one request, and in c. we do three requests copying different
> > parts of XSAVE to three different buffers.
> > 
> 
> I see. So (b) and (c) are the same except that XSAVE is a struct of a
> dynamic size with normalized registers instead of explicit AVX, AVX512
> calls.
> 

The only real difference is the amount of work when adding new register
types.

With b., you just add a new field to the struct and some code to x86
that copies data to that field.

With c., you need to add the new PT_* request with all associated
functions.


-- 
Best regards,
Michał Górny



Re: [RFC] Design considerations for XSAVE Extended Area getters/setters

2019-05-28 Thread Michał Górny
On Tue, 2019-05-28 at 18:08 +0200, Kamil Rytarowski wrote:
> On 28.05.2019 15:20, Michał Górny wrote:
> > Hi,
> > 
> > After implementing most of PT_GETXSTATE/PT_SETXSTATE and getting some
> > comments requiring major changes anyway, I'm starting to wonder whether
> > the approach I've followed is actually the best one.  This is especially
> > important now that I'm pretty sure that we can't rely on fixed offsets
> > in XSAVE area (courtesy of CPUID data from AVX512 CPU).  I'd like to
> > query your opinion on what would be the best API to provide.
> > 
> > I see three main options:
> > 
> > a. 'raw' XSAVE data,
> > 
> > b. 'normalized' XSAVE data,
> > 
> > c. split calls for individual components.
> > 
> > So far I've followed idea a.  I'll describe each one shortly.
> > 
> > 
> > The problem
> > ---
> > I'm trying to expose additional register types supported by newer x86
> > processors to the debugger.  Currently we're limited to SSE registers,
> > however extended XSAVE area provides support for AVX, AVX-512
> > and possible future extensions.
> > 
> > The XSAVE area is dynamically sized, with controllable extensions to
> > include.  We need to perform a bunch of CPUID calls to determine
> > the appropriate buffer size and offsets/sizes of individual data
> > components inside it.  The exact offsets depend on the processor
> > in question, and may be different depending on interim extensions
> > implemented.
> > 
> > 
> > a. 'raw' XSAVE data
> > ---
> > This is the solution used by Linux and FreeBSD.  It adds new PT_*XSTATE
> > requests that expose and operate on raw data used by XSAVE/XRSTOR
> > instructions.
> > 
> > Since it uses the processor's data format, userland can reuse the code
> > for other platforms.  In fact, if new XSAVE components are added,
> > the support in userland can be developed in parallel to kernel
> > development.
> > 
> > On the minus side, it shifts the whole burden of implementation
> > on userland.  This didn't go well for Linux and FreeBSD -- both of those
> > implementations did not account for providing component offsets via API,
> > and so require userland to establish them manually by CPUID.  This isn't
> > going to work well for storing XSAVE data in coredumps.
> > 
> > In my opinion, to implement this correctly we'd need another
> > dynamically-sized PT_* request exposing component offsets (and sizes?)
> > from CPUID.  However, at this point I start doubting whether the added
> > complexity is worth it given no clear advantage of sticking to pure
> > machine format.  Hence, option b.
> > 
> > 
> > b. 'normalized' XSAVE data
> > --
> > This is similar to the previous option, except that rather than exposing
> > the processor's raw XSAVE structure we use a fixed 'struct xstate'. 
> > Kernel is reponsible for establishing correct offsets, and moving data
> > to and from 'struct xstate'.  Userland can trivially access the struct
> > fields, and does not need to be concerned about internals.  Core dumps
> > use fixed offsets.
> > 
> > Similarly to the previous option, I'd keep the dynamic size for
> > the structure -- make it possible to add more fields in the future,
> > and expose them to userland when it provides a large enough buffer.
> > 
> > I don't see any significant disadvantages to option a., and the little
> > performance loss does not seem to be significant here.  We lose
> > the option to add support for new XSAVE types in userland before
> > the kernel but it's probably a moot point anyway.
> > 
> > 
> > c. Split calls for individual components
> > 
> > This is a completely different option which will probably make
> > the implementation a bit simpler at the cost of more repeated code.
> > In this model, we add PT_* requests for each interesting XSAVE
> > component.  At the moment, this would mean 4 new requests:
> > 
> > - PT_*AVXREGS, for the AVX component,
> > 
> > - PT_*AVX512REGS for the 3 AVX-512 components.
> > 
> > Unlike the other options, we don't use extensible buffer but just fixed
> > structs.  We need to add new requests when new components are added. 
> > Userland applications need to issue multiple PT_* calls if they need
> > specific data.  For example, t

[RFC] Design considerations for XSAVE Extended Area getters/setters

2019-05-28 Thread Michał Górny
r getting FPU registers.


Summary
---
In my opinion, the best solution here is b.  It's extensible to future
register types, easy to use from userland perspective and reduces
the number of ptrace() calls debugger needs to issue.  It also
circumvents compat32 limitations.

What do you think?

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


[PATCH 2/2] compat32: Implement PT_GETDBREGS and PT_SETDBREGS

2019-05-27 Thread Michał Górny
Uncomment and improve the implementation of compat32 support for
PT_GETDBREGS and PT_SETDBREGS requests.

The new implementation uses x86_dbregs_read() and x86_dbregs_write()
function instead of accessing pcb directly.  While this might be
a little slower, it guarantees that the needed pcb field is allocated
correctly.

Furthermore, the code introduces necessary sanity checks
for PT_SETDBREGS arguments.
---
 sys/arch/amd64/amd64/netbsd32_machdep.c | 52 +
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c 
b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 06f6cbdf3703..31940f0af146 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -410,23 +410,19 @@ netbsd32_process_read_fpregs(struct lwp *l, struct 
fpreg32 *regs, size_t *sz)
 int
 netbsd32_process_read_dbregs(struct lwp *l, struct dbreg32 *regs, size_t *sz)
 {
-#if notyet
-   struct pcb *pcb;
-
-   pcb = lwp_getpcb(l);
+   struct dbreg regs64;
 
-   regs->dr[0] = pcb->pcb_dbregs->dr[0] & 0x;
-   regs->dr[1] = pcb->pcb_dbregs->dr[1] & 0x;
-   regs->dr[2] = pcb->pcb_dbregs->dr[2] & 0x;
-   regs->dr[3] = pcb->pcb_dbregs->dr[3] & 0x;
+   x86_dbregs_read(l, ®s64);
+   memset(regs, 0, sizeof(*regs));
+   regs->dr[0] = regs64.dr[0] & 0x;
+   regs->dr[1] = regs64.dr[1] & 0x;
+   regs->dr[2] = regs64.dr[2] & 0x;
+   regs->dr[3] = regs64.dr[3] & 0x;
 
-   regs->dr[6] = pcb->pcb_dbregs->dr[6] & 0x;
-   regs->dr[7] = pcb->pcb_dbregs->dr[7] & 0x;
+   regs->dr[6] = regs64.dr[6] & 0x;
+   regs->dr[7] = regs64.dr[7] & 0x;
 
return 0;
-#else
-   return ENOTSUP;
-#endif
 }
 
 int
@@ -493,23 +489,29 @@ int
 netbsd32_process_write_dbregs(struct lwp *l, const struct dbreg32 *regs,
 size_t sz)
 {
-#if notyet
-   struct pcb *pcb;
+   size_t i;
+   struct dbreg regs64;
 
-   pcb = lwp_getpcb(l);
+   /* Check that DR0-DR3 contain user-space address */
+   for (i = 0; i < X86_DBREGS; i++) {
+   if (regs->dr[i] >= VM_MAXUSER_ADDRESS32)
+   return EINVAL;
+   }
 
-   pcb->pcb_dbregs->dr[0] = regs->dr[0];
-   pcb->pcb_dbregs->dr[1] = regs->dr[1];
-   pcb->pcb_dbregs->dr[2] = regs->dr[2];
-   pcb->pcb_dbregs->dr[3] = regs->dr[3];
+   if (regs->dr[7] & X86_DR7_GENERAL_DETECT_ENABLE) {
+   return EINVAL;
+   }
 
-   pcb->pcb_dbregs->dr[6] = regs->dr[6];
-   pcb->pcb_dbregs->dr[7] = regs->dr[7];
+   regs64.dr[0] = regs->dr[0];
+   regs64.dr[1] = regs->dr[1];
+   regs64.dr[2] = regs->dr[2];
+   regs64.dr[3] = regs->dr[3];
 
+   regs64.dr[6] = regs->dr[6];
+   regs64.dr[7] = regs->dr[7];
+
+   x86_dbregs_write(l, ®s64);
return 0;
-#else
-   return ENOTSUP;
-#endif
 }
 
 int
-- 
2.22.0.rc1



[PATCH 1/2] compat32: translate userland PT_* request values into kernel

2019-05-27 Thread Michał Górny
Currently, the compat32 passes PT_* request values to kernel functions
without translation.  This works fine for low PT_* requests that happen
to have the same values both on i386 and amd64.  However, for requests
higher than PT_SETFPREGS, the value passed from userland (matching i386
const) does not match the correct kernel (amd64) request.  As a result,
e.g. when compat32 process calls PT_GETDBREGS, kernel actually processes
it as PT_SETSTEP.

To resolve this, introduce support for compat32 PT_* request
translation.  The interface is based on PTRACE_TRANSLATE_REQUEST32 macro
that is defined to a mapping function on architectures needing it.
In case of amd64, this function maps userland i386 PT_* values into
appropriate amd64 PT_* values.

For the time being, the two additional PT_GETXMMREGS and PT_SETXMMREGS
requests are unsupported due to lack of matching free amd64 constant.
---
 sys/arch/amd64/amd64/netbsd32_machdep.c   | 37 +++
 sys/arch/amd64/include/netbsd32_machdep.h |  3 ++
 sys/arch/amd64/include/ptrace.h   |  2 ++
 sys/compat/netbsd32/netbsd32_ptrace.c |  8 -
 sys/sys/ptrace.h  |  4 +++
 5 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/sys/arch/amd64/amd64/netbsd32_machdep.c 
b/sys/arch/amd64/amd64/netbsd32_machdep.c
index 39d671a4b9b6..06f6cbdf3703 100644
--- a/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ b/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -335,6 +335,43 @@ cpu_coredump32(struct lwp *l, struct coredump_iostate 
*iocookie,
 }
 #endif
 
+int
+netbsd32_ptrace_translate_request(int req)
+{
+
+   switch (req)
+   {
+   case 0 ... PT_FIRSTMACH - 1:
+   return req;
+   case PT_FIRSTMACH + 0:
+   return PT_STEP;
+   case PT_FIRSTMACH + 1:
+   return PT_GETREGS;
+   case PT_FIRSTMACH + 2:
+   return PT_SETREGS;
+   case PT_FIRSTMACH + 3:
+   return PT_GETFPREGS;
+   case PT_FIRSTMACH + 4:
+   return PT_SETFPREGS;
+   case PT_FIRSTMACH + 5:
+   /* PT_GETXMMREGS */
+   return -1;
+   case PT_FIRSTMACH + 6:
+   /* PT_SETXMMREGS */
+   return -1;
+   case PT_FIRSTMACH + 7:
+   return PT_GETDBREGS;
+   case PT_FIRSTMACH + 8:
+   return PT_SETDBREGS;
+   case PT_FIRSTMACH + 9:
+   return PT_SETSTEP;
+   case PT_FIRSTMACH + 10:
+   return PT_CLEARSTEP;
+   default:
+   return -1;
+   }
+}
+
 int
 netbsd32_process_read_regs(struct lwp *l, struct reg32 *regs)
 {
diff --git a/sys/arch/amd64/include/netbsd32_machdep.h 
b/sys/arch/amd64/include/netbsd32_machdep.h
index 51691eb7c90f..37019d29134d 100644
--- a/sys/arch/amd64/include/netbsd32_machdep.h
+++ b/sys/arch/amd64/include/netbsd32_machdep.h
@@ -151,6 +151,9 @@ struct x86_64_set_mtrr_args32 {
 
 #define NETBSD32_MID_MACHINE MID_I386
 
+/* Translate ptrace() PT_* request from 32-bit userland to kernel. */
+int netbsd32_ptrace_translate_request(int);
+
 int netbsd32_process_read_regs(struct lwp *, struct reg32 *);
 int netbsd32_process_read_fpregs(struct lwp *, struct fpreg32 *, size_t *);
 int netbsd32_process_read_dbregs(struct lwp *, struct dbreg32 *, size_t *);
diff --git a/sys/arch/amd64/include/ptrace.h b/sys/arch/amd64/include/ptrace.h
index c5cef941fd16..8d0f54dbad24 100644
--- a/sys/arch/amd64/include/ptrace.h
+++ b/sys/arch/amd64/include/ptrace.h
@@ -87,6 +87,8 @@
 #define process_reg32  struct reg32
 #define process_fpreg32struct fpreg32
 #define process_dbreg32struct dbreg32
+
+#define PTRACE_TRANSLATE_REQUEST32(x) netbsd32_ptrace_translate_request(x)
 #endif /* COMPAT_NETBSD32 */
 #endif /* _KERNEL_OPT */
 
diff --git a/sys/compat/netbsd32/netbsd32_ptrace.c 
b/sys/compat/netbsd32/netbsd32_ptrace.c
index 614d363c3126..feb2b99c8d54 100644
--- a/sys/compat/netbsd32/netbsd32_ptrace.c
+++ b/sys/compat/netbsd32/netbsd32_ptrace.c
@@ -243,6 +243,8 @@ int
 netbsd32_ptrace(struct lwp *l, const struct netbsd32_ptrace_args *uap,
 register_t *retval)
 {
+   int req;
+
/* {
syscallarg(int) req;
syscallarg(pid_t) pid;
@@ -250,7 +252,11 @@ netbsd32_ptrace(struct lwp *l, const struct 
netbsd32_ptrace_args *uap,
syscallarg(int) data;
} */
 
-   return do_ptrace(&netbsd32_ptm, l, SCARG(uap, req), SCARG(uap, pid),
+   req = PTRACE_TRANSLATE_REQUEST32(SCARG(uap, req));
+   if (req == -1)
+   return EOPNOTSUPP;
+
+   return do_ptrace(&netbsd32_ptm, l, req, SCARG(uap, pid),
SCARG_P32(uap, addr), SCARG(uap, data), retval);
 }
 
diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h
index b213b38f5c6b..484f0ff6d268 100644
--- a/sys/sys/ptrace.h
+++ b/sys/sys/ptrace.h
@@ -283,6 +283,10 @@ intptrace_machdep_dorequest(struct lwp *, struct 
lwp *, int,
 #define FIX_SSTEP(p)
 #endif
 
+#ifndef PTRACE_TRANS

Re: kernel frameworks for jumbo frame

2019-03-11 Thread Michał Górny
On Mon, 2019-03-11 at 04:37 -0400, John Franklin wrote:
> On Mar 10, 2019, at 15:33, Gert Doering  wrote:
> > Hi,
> > 
> > On Sun, Mar 10, 2019 at 12:14:54PM -0700, Brian Buhrow wrote:
> > >   hello.  I'm not saying anything that anyone here doesn't already know,
> > > but I'll add that Linux seems to have taken the position that all ethernet
> > > interfaces should be called eth0, eth1, etc. 
> > 
> > This argument surprises me a bit, as Linux has moved *away* from doing
> > exactly this a few years ago...
> 
> Depending on who you ask, this was either poorly received by the community or 
> “has mixed reviews.” I’ve yet to meet someone who thought it was the best 
> thing ever.
> 

As with everything, I'd expect some people to consider X better, some to
consider Y better, and a few to take this opportunity to remind everyone
that there's also an alternative of Z.  I'd also expect a major group of
neutral people who don't really care either way.

What really brought major 'poor reception' is *change* rather than
the idea itself.  I think it's meaningful to distinguish between people
who actually prefer Y over X because they think it's better, and people
who don't like change from Y to X because it means they have to do
'unnecessary' work to adjust their systems and/or change their habits. 
Then, of course there would be some people who would actually consider
this an opportunity to improve their setups.

What I'm saying is that you can't expect neither wholly negative or
wholly positive reception here.  People who are unhappy/angry tend to be
more verbose about it.  Most of the people won't go out their way to
praise interface naming solution even if they liked it.  Most of
the people won't really care as long as things work out of the box.

One fun fact about the new naming scheme is that if you have USB wifi
dongle, you can expect the name to depend on which port you plug it
into.  So much for consistent naming ;-).

-- 
Best regards,
Michał Górny



signature.asc
Description: This is a digitally signed message part


[PATCH v3] kern/tty_pty: Fix reporting EOF via kevent and add a test case

2019-02-07 Thread Michał Górny
Fix the kernel pty driver to report closed slave via master's kevent
EVFILT_READ.  This behavior matches the behavior for pipes, is
consistent with how FreeBSD implements it and is relied upon by LLDB's
main loop implementation.
---
 sys/kern/tty_pty.c  |  4 +++
 tests/kernel/kqueue/read/t_ttypty.c | 43 +++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/sys/kern/tty_pty.c b/sys/kern/tty_pty.c
index 1d60596124d3..94270381b5ab 100644
--- a/sys/kern/tty_pty.c
+++ b/sys/kern/tty_pty.c
@@ -938,6 +938,10 @@ filt_ptcread(struct knote *kn, long hint)
((pti->pt_flags & PF_UCNTL) && pti->pt_ucntl))
kn->kn_data++;
}
+   if (!ISSET(tp->t_state, TS_CARR_ON)) {
+   kn->kn_flags |= EV_EOF;
+   canread = 1;
+   }
 
if ((hint & NOTE_SUBMIT) == 0) {
mutex_spin_exit(&tty_lock);
diff --git a/tests/kernel/kqueue/read/t_ttypty.c 
b/tests/kernel/kqueue/read/t_ttypty.c
index 89b1dc2ec4ca..2e90aefbb5d4 100644
--- a/tests/kernel/kqueue/read/t_ttypty.c
+++ b/tests/kernel/kqueue/read/t_ttypty.c
@@ -1,7 +1,7 @@
 /* $NetBSD: t_ttypty.c,v 1.2 2017/01/13 21:30:41 christos Exp $ */
 
 /*-
- * Copyright (c) 2002, 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2002, 2008, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -30,11 +30,12 @@
  */
 
 #include 
-__COPYRIGHT("@(#) Copyright (c) 2008\
+__COPYRIGHT("@(#) Copyright (c) 2008, 2019\
  The NetBSD Foundation, inc. All rights reserved.");
 __RCSID("$NetBSD: t_ttypty.c,v 1.2 2017/01/13 21:30:41 christos Exp $");
 
 #include 
+#include 
 #include 
 
 #include 
@@ -135,10 +136,48 @@ ATF_TC_BODY(slave, tc)
h_check(false);
 }
 
+ATF_TC(closed_slave);
+ATF_TC_HEAD(closed_slave, tc)
+{
+   atf_tc_set_md_var(tc, "descr",
+   "Checks EVFILT_READ reporting for slave tty being closed");
+}
+ATF_TC_BODY(closed_slave, tc)
+{
+   char slavetty[1024];
+   struct kevent event[1];
+   int amaster, aslave;
+   int kq, n;
+   struct timespec timeout = {5, 0};
+
+   RL(openpty(&amaster, &aslave, slavetty, NULL, NULL));
+
+   (void)printf("tty: openpty master %d slave %d tty '%s'\n",
+   amaster, aslave, slavetty);
+
+   RL(kq = kqueue());
+
+   EV_SET(&event[0], amaster, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
+   RL(kevent(kq, event, 1, NULL, 0, NULL));
+
+   RL(close(aslave));
+
+   RL(n = kevent(kq, NULL, 0, event, 1, &timeout));
+
+   (void)printf("kevent num %d filt %d flags: %#x, fflags: %#x, "
+   "data: %" PRId64 "\n", n, event[0].filter, event[0].flags,
+   event[0].fflags, event[0].data);
+
+   ATF_REQUIRE_EQ(n, 1);
+   ATF_REQUIRE_EQ(event[0].filter, EVFILT_READ);
+   ATF_REQUIRE_EQ(event[0].flags & EV_EOF, EV_EOF);
+}
+
 ATF_TP_ADD_TCS(tp)
 {
ATF_TP_ADD_TC(tp, master);
ATF_TP_ADD_TC(tp, slave);
+   ATF_TP_ADD_TC(tp, closed_slave);
 
return atf_no_error();
 }
-- 
2.20.1



[PATCH v2] kern/tty_pty: Fix reporting EOF via kevent and add a test case

2019-02-07 Thread Michał Górny
Fix the kernel pty driver to report closed slave via master's kevent
EVFILT_READ.  This behavior matches the behavior for pipes, is
consistent with how FreeBSD implements it and is relied upon by LLDB's
main loop implementation.
---
 sys/kern/tty_pty.c|  4 +
 tests/kernel/kqueue/read/Makefile |  2 +
 tests/kernel/kqueue/read/t_pty_closed_slave.c | 94 +++
 3 files changed, 100 insertions(+)
 create mode 100644 tests/kernel/kqueue/read/t_pty_closed_slave.c

diff --git a/sys/kern/tty_pty.c b/sys/kern/tty_pty.c
index 1d60596124d3..94270381b5ab 100644
--- a/sys/kern/tty_pty.c
+++ b/sys/kern/tty_pty.c
@@ -938,6 +938,10 @@ filt_ptcread(struct knote *kn, long hint)
((pti->pt_flags & PF_UCNTL) && pti->pt_ucntl))
kn->kn_data++;
}
+   if (!ISSET(tp->t_state, TS_CARR_ON)) {
+   kn->kn_flags |= EV_EOF;
+   canread = 1;
+   }
 
if ((hint & NOTE_SUBMIT) == 0) {
mutex_spin_exit(&tty_lock);
diff --git a/tests/kernel/kqueue/read/Makefile 
b/tests/kernel/kqueue/read/Makefile
index 510ed45c486c..d1a321aeb003 100644
--- a/tests/kernel/kqueue/read/Makefile
+++ b/tests/kernel/kqueue/read/Makefile
@@ -10,8 +10,10 @@ TESTS_C= t_fifo
 TESTS_C+=  t_file
 TESTS_C+=  t_file2
 TESTS_C+=  t_pipe
+TESTS_C+=  t_pty_closed_slave
 TESTS_C+=  t_ttypty
 
 LDADD.t_ttypty=-lutil
+LDADD.t_pty_closed_slave=-lutil
 
 .include 
diff --git a/tests/kernel/kqueue/read/t_pty_closed_slave.c 
b/tests/kernel/kqueue/read/t_pty_closed_slave.c
new file mode 100644
index ..1972103a2085
--- /dev/null
+++ b/tests/kernel/kqueue/read/t_pty_closed_slave.c
@@ -0,0 +1,94 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2019 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+__COPYRIGHT("@(#) Copyright (c) 2019\
+ The NetBSD Foundation, inc. All rights reserved.");
+__RCSID("$NetBSD$");
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "h_macros.h"
+
+ATF_TC(kevent);
+ATF_TC_HEAD(kevent, tc)
+{
+   atf_tc_set_md_var(tc, "descr",
+   "Checks EVFILT_READ reporting for slave tty being closed");
+}
+ATF_TC_BODY(kevent, tc)
+{
+   char slavetty[1024];
+   struct kevent event[1];
+   int amaster, aslave;
+   int kq, n;
+#if 0
+   int fl;
+#endif
+   struct timespec timeout = {5, 0};
+
+   RL(openpty(&amaster, &aslave, slavetty, NULL, NULL));
+
+   (void)printf("tty: openpty master %d slave %d tty '%s'\n",
+   amaster, aslave, slavetty);
+
+   RL(kq = kqueue());
+
+   EV_SET(&event[0], amaster, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
+   RL(kevent(kq, event, 1, NULL, 0, NULL));
+
+   RL(close(aslave));
+
+   RL(n = kevent(kq, NULL, 0, event, 1, &timeout));
+
+   (void)printf("kevent num %d filt %d flags: %#x, fflags: %#x, "
+   "data: %" PRId64 "\n", n, event[0].filter, event[0].flags,
+   event[0].fflags, event[0].data);
+
+   ATF_REQUIRE_EQ(n, 1);
+   ATF_REQUIRE_EQ(event[0].filter, EVFILT_READ);
+   ATF_REQUIRE_EQ(event[0].flags & EV_EOF, EV_EOF);
+
+   (void)printf("tty: successful end\n");
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+   ATF_TP_ADD_TC(tp, kevent);
+
+   return atf_no_error();
+}
-- 
2.20.1



Re: kevent() not reporting closed slave pty (via master pty)

2019-02-07 Thread Michał Górny
On Thu, 2019-02-07 at 08:58 +0700, Robert Elz wrote:
> Date:Wed, 06 Feb 2019 20:29:32 +0100
> From:=?UTF-8?Q?Micha=C5=82_G=C3=B3rny?= 
> Message-ID:  <1549481372.983.14.ca...@gentoo.org>
> 
> 
>   | + } else if (!ISSET(tp->t_state, TS_ISOPEN) &&
>   | +!ISSET(tp->t_state, TS_CARR_ON)) {
>   | + kn->kn_flags |= EV_EOF;
>   | + canread = 1;
>   |   }
> 
> The logic there looks incorrect, though I'd expect it to work
> for your test case.   It might always work, and yet still not
> really be correct (lead to readers of the code wondering why,
> and perhaps copying this, elsewhere.)
> 
> TS_ISOPEN and TS_CARR_ON are (in this scenario) essentially
> clones of each other.Testing both should be superfluous.
> 
> They're both cleared when the slave pts is closed, and as
> long as the master ptc remains open (which it does for your
> test) they're both set again when the pts opens again.
> 
> The one time they differ, is when the pts opens with the ptc
> closed (TS_ISOPEN is set, TS_CARR_ON is clear) but that should
> be irrelevant/impossible when you're testing using an open ptc
> fd (ie: we know the ptc is open.)
> 
> In ptcread() the read will return EOF (0) if !TS_CARR_ON
> regardless of TS_ISOPEN (unless there is actually data to
> be read).   The same is true in ptcpoll()
> where
>   revents |= POLLHUP;
> if !TS_CARR_ON, again regardless of TS_ISOPEN.
> 
> It will take someone with far more kqueue clue than I have to
> be sure (some pty and tty clues would help too) but I suspect
> that all that might be needed is:
> 
>   if (!ISSET(tp->t_state, TS_CARR_ON)) {
>   kn->kn_flags |= EV_EOF;
>   canread = 1;
>   }
> 
> (no "else" and no TS_ISOPEN test).Testing TS_CARR_ON rather
> that TS_ISOPEN merely for consistency with the other code.
> 
> Omitting the "else" is OK only if kqueue can handle the EOF
> return, along with "this much data can be read" in the same
> event.  Otherwise the "else" before the "if" should remain.

Thank you.  I will update the code as requested.

> ps: please indent the code in the body of the "if" to the same
> level as the code in the "if" that precedes it (the "if (canread)"
> not the one inside its block) (even if the "else" remains).

I'm sorry about this, looks like my vim misfired.  My mistake for not
looking at the diff close enough.


-- 
Best regards,
Michał Górny


signature.asc
Description: This is a digitally signed message part


[PATCH] kern/tty_pty: Fix reporting EOF via kevent and add a test case

2019-02-06 Thread Michał Górny
---
 sys/kern/tty_pty.c|  4 +
 tests/kernel/kqueue/read/Makefile |  2 +
 tests/kernel/kqueue/read/t_pty_closed_slave.c | 97 +++
 3 files changed, 103 insertions(+)
 create mode 100644 tests/kernel/kqueue/read/t_pty_closed_slave.c

diff --git a/sys/kern/tty_pty.c b/sys/kern/tty_pty.c
index 1d60596124d3..0a235cc41aaa 100644
--- a/sys/kern/tty_pty.c
+++ b/sys/kern/tty_pty.c
@@ -937,6 +937,10 @@ filt_ptcread(struct knote *kn, long hint)
if (((pti->pt_flags & PF_PKT) && pti->pt_send) ||
((pti->pt_flags & PF_UCNTL) && pti->pt_ucntl))
kn->kn_data++;
+   } else if (!ISSET(tp->t_state, TS_ISOPEN) &&
+  !ISSET(tp->t_state, TS_CARR_ON)) {
+   kn->kn_flags |= EV_EOF;
+   canread = 1;
}
 
if ((hint & NOTE_SUBMIT) == 0) {
diff --git a/tests/kernel/kqueue/read/Makefile 
b/tests/kernel/kqueue/read/Makefile
index 510ed45c486c..d1a321aeb003 100644
--- a/tests/kernel/kqueue/read/Makefile
+++ b/tests/kernel/kqueue/read/Makefile
@@ -10,8 +10,10 @@ TESTS_C= t_fifo
 TESTS_C+=  t_file
 TESTS_C+=  t_file2
 TESTS_C+=  t_pipe
+TESTS_C+=  t_pty_closed_slave
 TESTS_C+=  t_ttypty
 
 LDADD.t_ttypty=-lutil
+LDADD.t_pty_closed_slave=-lutil
 
 .include 
diff --git a/tests/kernel/kqueue/read/t_pty_closed_slave.c 
b/tests/kernel/kqueue/read/t_pty_closed_slave.c
new file mode 100644
index ..f7bbcf9a9fa4
--- /dev/null
+++ b/tests/kernel/kqueue/read/t_pty_closed_slave.c
@@ -0,0 +1,97 @@
+/* $NetBSD$ */
+
+/*-
+ * Copyright (c) 2019 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Luke Mewburn and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include 
+__COPYRIGHT("@(#) Copyright (c) 2019\
+ The NetBSD Foundation, inc. All rights reserved.");
+__RCSID("$NetBSD$");
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "h_macros.h"
+
+ATF_TC(kevent);
+ATF_TC_HEAD(kevent, tc)
+{
+   atf_tc_set_md_var(tc, "descr",
+   "Checks EVFILT_READ reporting for slave tty being closed");
+}
+ATF_TC_BODY(kevent, tc)
+{
+   char slavetty[1024];
+   struct kevent event[1];
+   int amaster, aslave;
+   int kq, n;
+#if 0
+   int fl;
+#endif
+   struct timespec timeout = {5, 0};
+
+   RL(openpty(&amaster, &aslave, slavetty, NULL, NULL));
+
+   (void)printf("tty: openpty master %d slave %d tty '%s'\n",
+   amaster, aslave, slavetty);
+
+   RL(kq = kqueue());
+
+   EV_SET(&event[0], amaster, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
+   RL(kevent(kq, event, 1, NULL, 0, NULL));
+
+   RL(close(aslave));
+
+   RL(n = kevent(kq, NULL, 0, event, 1, &timeout));
+
+   (void)printf("kevent num %d filt %d flags: %#x, fflags: %#x, "
+   "data: %" PRId64 "\n", n, event[0].filter, event[0].flags,
+   event[0].fflags, event[0].data);
+
+   ATF_REQUIRE_EQ(n, 1);
+   ATF_REQUIRE_EQ(event[0].filter, EVFILT_READ);
+   ATF_REQUIRE_EQ(event[0].flags & EV_EOF, EV_EOF);
+
+   (void)printf("tty: successful end\n");
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+   ATF_TP_ADD_TC(tp, kevent);
+
+   return atf_no_error();
+}
-- 
2.20.1



Re: kevent() not reporting closed slave pty (via master pty)

2019-02-06 Thread Michał Górny
On Wed, 2019-02-06 at 17:42 +0100, Michał Górny wrote:
> Hi,
> 
> I've been debugging a hanging test in LLDB.  The particular test is
> meant to verify that LLDB handles EOF on pty correctly.  For this
> purpose, it creates a pty and monitors it via kevent, then opens a slave
> and closes it.  Apparently, it expects kevent to report that master pty
> has no longer any slaves open.
> 
> I've tested this a bit and verified that read() from master pty returns
> immediately if slave pty is closed.  However, it seems that closing
> the slave pty prevents kevent from reporting anything, therefore making
> it wait forever.
> 

And here's a proposed patch, based on kern/sys_pipe.c.  I still have to
write an ATF test for it.


diff --git a/sys/kern/tty_pty.c b/sys/kern/tty_pty.c
index 1d60596124d3..0a235cc41aaa 100644
--- a/sys/kern/tty_pty.c
+++ b/sys/kern/tty_pty.c
@@ -920,40 +920,44 @@ filt_ptcread(struct knote *kn, long hint)
 
if ((hint & NOTE_SUBMIT) == 0) {
mutex_spin_enter(&tty_lock);
}
 
canread = (ISSET(tp->t_state, TS_ISOPEN) &&
((tp->t_outq.c_cc > 0 && !ISSET(tp->t_state, TS_TTSTOP)) ||
 ((pti->pt_flags & PF_PKT) && pti->pt_send) ||
 ((pti->pt_flags & PF_UCNTL) && pti->pt_ucntl)));
 
if (canread) {
/*
 * c_cc is number of characters after output post-processing;
 * the amount of data actually read(2) depends on
 * setting of input flags for the terminal.
 */
kn->kn_data = tp->t_outq.c_cc;
if (((pti->pt_flags & PF_PKT) && pti->pt_send) ||
((pti->pt_flags & PF_UCNTL) && pti->pt_ucntl))
kn->kn_data++;
+   } else if (!ISSET(tp->t_state, TS_ISOPEN) &&
+  !ISSET(tp->t_state, TS_CARR_ON)) {
+   kn->kn_flags |= EV_EOF;
+   canread = 1;
}
 
if ((hint & NOTE_SUBMIT) == 0) {
mutex_spin_exit(&tty_lock);
}
 
return canread;
 }
 
 static void
 filt_ptcwdetach(struct knote *kn)
 {
struct pt_softc *pti;
 
    pti = kn->kn_hook;
 
mutex_spin_enter(&tty_lock);
SLIST_REMOVE(&pti->pt_selw.sel_klist, kn, knote, kn_selnext);
mutex_spin_exit(&tty_lock);
 }



-- 
Best regards,
Michał Górny


signature.asc
Description: This is a digitally signed message part


kevent() not reporting closed slave pty (via master pty)

2019-02-06 Thread Michał Górny
Hi,

I've been debugging a hanging test in LLDB.  The particular test is
meant to verify that LLDB handles EOF on pty correctly.  For this
purpose, it creates a pty and monitors it via kevent, then opens a slave
and closes it.  Apparently, it expects kevent to report that master pty
has no longer any slaves open.

I've tested this a bit and verified that read() from master pty returns
immediately if slave pty is closed.  However, it seems that closing
the slave pty prevents kevent from reporting anything, therefore making
it wait forever.

For comparison, on FreeBSD kevent() reports the fd with EOF flag:

  kevent(5,0x0,0,{ 3,EVFILT_READ,EV_EOF,0x0,0x0,0x0 },4,{ 2.0 }) = 1 
(0x1)

The relevant passage from kqueue(2) seems to be:

Fifos, Pipes
Returns when there is data to read; data contains the
number of bytes available.

When the last writer disconnects, the filter will set
EV_EOF in flags.  This may be cleared by passing in
EV_CLEAR, at which point the filter will resume
waiting for data to become available before returning.

Though I'm not sure if I'm interpreting this correctly.  Is returning
with EV_EOF expected behavior here?

'Minimal' reproducer:

#include 
#include 
#include 
#include 
#include 
#include 
#include 

int main() {
  int master_pty = posix_openpt(O_RDWR);
  int ret;
  assert(master_pty != -1);
  ret = grantpt(master_pty);
  assert(ret != -1);
  ret = unlockpt(master_pty);
  assert(ret != -1);

  const char* slave_name = ptsname(master_pty);
  assert(slave_name);
  int slave_pty = open(slave_name, O_RDWR | O_NOCTTY);
  assert(slave_pty != -1);

  int kq = kqueue();
  assert(kq != -1);

  struct kevent ev;
  struct kevent out_ev[4];
  const struct timespec timeout = {2,0};
  EV_SET(&ev, master_pty, EVFILT_READ, EV_ADD, 0, 0, 0);
  ret = kevent(kq, &ev, 1, out_ev, 4, &timeout);
  printf("kev = %d\n", ret);

  ret = close(slave_pty);
  assert(ret != -1);

  ret = kevent(kq, 0, 0, out_ev, 4, &timeout);
  // expected: 1
  printf("kev = %d\n", ret);

  return 0;
}

-- 
Best regards,
Michał Górny


signature.asc
Description: This is a digitally signed message part


Re: Page fault when reading EFI systbl on Asus X550C

2019-01-17 Thread Michał Górny
On Thu, 2019-01-17 at 20:34 +0100, Maxime Villard wrote:
> Le 17/01/2019 à 19:50, Michał Górny a écrit :
> > On Thu, 2019-01-17 at 18:51 +0100, Maxime Villard wrote:
> > > Le 17/01/2019 à 18:05, Michał Górny a écrit :
> > > > Hi,
> > > > 
> > > > [...]
> > > > 
> > > > After a lot of bisecting, and comparing stable 8 branch (which works)
> > > > with trunk, I've came to the conclusion that the commit 'Limit the size
> > > > of the direct map with a 2MB granularity' [2] is actually causing
> > > > the fault.  I haven't been able to revert it on top of current trunk;
> > > > however, I've been able to verify that with a later size fix included
> > > > ([3]), the commit preceding it works and this one starts failing.
> > > 
> > > Please add some printfs like these right after 'va = efi_getva(pa)':
> > > 
> > >   printf("pmap_direct_base = %p\n", (void *)pmap_direct_base);
> > >   printf("pmap_direct_end = %p\n", (void *)pmap_direct_end);
> > >   printf("va = %p\n", (void *)va);
> > >   printf("pa = %p\n", (void *)pa);
> > 
> > pmap_direct_base = 0xd4312780
> > pmap_direct_end  = 0xd431f1799000
> > va   = 0xd431f18d9f18
> > pa   = 0xca0d9f18
> 
> Obviously, you are outside of the direct map, that's why it crashes.
> 
> The reason you're outside is because the max number of segments is 32, 
> therefore
> the kernel truncates your physical memory. Because of the truncation, the efi
> table is not in the kernel phys map. And because the direct map has been sized
> down to chunks of 2MB, the associated VA is not mapped anymore. On NetBSD-8, 
> by
> luck, the direct map 1GB padding covers the table, that's why you don't see a
> crash.
> 
> I've increased the max number to 64, so now it should work, please test. I
> guess it is worth a pullup-8, relying on luck is not a particularly good idea.

Thank you a lot.  I can confirm that it works now.

-- 
Best regards,
Michał Górny


signature.asc
Description: This is a digitally signed message part


Re: Page fault when reading EFI systbl on Asus X550C

2019-01-17 Thread Michał Górny
On Thu, 2019-01-17 at 18:51 +0100, Maxime Villard wrote:
> Le 17/01/2019 à 18:05, Michał Górny a écrit :
> > Hi,
> > 
> > [...]
> > 
> > After a lot of bisecting, and comparing stable 8 branch (which works)
> > with trunk, I've came to the conclusion that the commit 'Limit the size
> > of the direct map with a 2MB granularity' [2] is actually causing
> > the fault.  I haven't been able to revert it on top of current trunk;
> > however, I've been able to verify that with a later size fix included
> > ([3]), the commit preceding it works and this one starts failing.
> 
> Please add some printfs like these right after 'va = efi_getva(pa)':
> 
>   printf("pmap_direct_base = %p\n", (void *)pmap_direct_base);
>   printf("pmap_direct_end = %p\n", (void *)pmap_direct_end);
>   printf("va = %p\n", (void *)va);
>   printf("pa = %p\n", (void *)pa);

pmap_direct_base = 0xd4312780
pmap_direct_end  = 0xd431f1799000
va   = 0xd431f18d9f18
pa   = 0xca0d9f18


-- 
Best regards,
Michał Górny


signature.asc
Description: This is a digitally signed message part


Page fault when reading EFI systbl on Asus X550C

2019-01-17 Thread Michał Górny
Hi,

I've been having trouble booting NetBSD-9 on my UEFI-enabled Asus
laptop.  Long story short, the boot procedure crashes early with
the following dmesg output:

  efi: systbl at pa ca0d9f18
  uvm_fault(0x81714c20, 0xe0f8b46d9000, 1) -> e
  fatal page fault in supervisor mode
  trap type 6 code 0 rip 0x8023cac7 cs 0x8 rflags 0x10246
cr2 0xe0f8b46d9f28 ilevel 0x8 rsp 0x81b06e50
  curlwp 0x816574a0 pid 0.1 lowest kstack 0x81b022c0

With help on IRC, we've been able to debug this a bit.  This rip points
out to line 350 of efi.c [1], that is the first aprint_debug()
in efi_is32x64 condition.  jak suggested that this condition is probably
evaluated wrongly as I'm using the 64-bit bootloader.  However, when I
explicitly forced it to false, the code faulted in the aprint_debug()
just below the conditional.  In any case, it looked like any access to
efi sysbtl faults.

After a lot of bisecting, and comparing stable 8 branch (which works)
with trunk, I've came to the conclusion that the commit 'Limit the size
of the direct map with a 2MB granularity' [2] is actually causing
the fault.  I haven't been able to revert it on top of current trunk;
however, I've been able to verify that with a later size fix included
([3]), the commit preceding it works and this one starts failing.

On jak's suggestion, I've obtained memmap for my system (from Linux).
Full map is included at the end of the mail; however, the relevant area
seems to be:

0xca035000  0xca5d9fff  Reserved

jak tipped that the fact that systbl resides in 'reserved' area of
memory may be the culprit.  Could you advise how to solve this or debug
it further?  TIA.


[1]:https://github.com/NetBSD/src/blob/b0e10a0335adcd63e293858cf20d17bcea7bfe2b/sys/arch/x86/x86/efi.c#L350
[2]:https://github.com/NetBSD/src/commit/c813be5ea6a9535bf7242ba0c10e0b1f64f225a9
[3]:https://github.com/NetBSD/src/commit/213657f3c015a98382dc30519d6206d116367f2f

Full memmap:

0x0 0x9dfff System RAM
0x9e000 0x9 Reserved
0x100x1fff  System RAM
0x2000  0x201f  Reserved
0x2020  0x40003fff  System RAM
0x40004000  0x40004fff  Reserved
0x40005000  0xc9713fff  System RAM
0xc9714000  0xc9715fff  Reserved
0xc9716000  0xc9721fff  System RAM
0xc9722000  0xc9725fff  Reserved
0xc9726000  0xc972afff  System RAM
0xc972b000  0xc9d2bfff  ACPI Non-volatile Storage
0xc9d2c000  0xc9d2efff  Reserved
0xc9d2f000  0xc9d3efff  System RAM
0xc9d3f000  0xc9d42fff  Reserved
0xc9d43000  0xc9d43fff  System RAM
0xc9d44000  0xc9d49fff  Reserved
0xc9d4a000  0xc9ef4fff  System RAM
0xc9ef5000  0xc9ef8fff  Reserved
0xc9ef9000  0xc9f41fff  System RAM
0xc9f42000  0xc9f43fff  Reserved
0xc9f44000  0xc9f45fff  System RAM
0xc9f46000  0xc9f59fff  Reserved
0xc9f5a000  0xc9f5cfff  System RAM
0xc9f5d000  0xc9f5efff  Reserved
0xc9f5f000  0xc9f75fff  System RAM
0xc9f76000  0xc9f7bfff  Reserved
0xc9f7c000  0xc9f89fff  System RAM
0xc9f8a000  0xc9f8afff  Reserved
0xc9f8b000  0xc9f8cfff  System RAM
0xc9f8d000  0xc9f8dfff  Reserved
0xc9f8e000  0xc9f98fff  System RAM
0xc9f99000  0xc9f9dfff  Reserved
0xc9f9e000  0xc9fc9fff  System RAM
0xc9fca000  0xc9fcafff  Reserved
0xc9fcb000  0xc9fdafff  System RAM
0xc9fdb000  0xca000fff  Reserved
0xca001000  0xca013fff  System RAM
0xca014000  0xca014fff  Reserved
0xca015000  0xca015fff  System RAM
0xca016000  0xca017fff  Reserved
0xca018000  0xca018fff  System RAM
0xca019000  0xca01dfff  Reserved
0xca01e000  0xca034fff  System RAM
0xca035000  0xca5d9fff  Reserved
0xca5da000  0xca859fff  ACPI Non-volatile Storage
0xca85a000  0xca85efff  ACPI Tables
0xca85f000  0xca85  System RAM
0xca86  0xca8a2fff  ACPI Non-volatile Storage
0xca8a3000  0xcacbafff  System RAM
0xcacbb000  0xcaff3fff  Reserved
0xcaff4000  0xcaff  System RAM
0xcbc0  0xcfdf  Reserved
0xf800  0xfbff  Reserved
0xfec0  0xfec00fff  Reserved
0xfed0  0xfed03fff  Reserved
0xfed1c000  0xfed1  Reserved
0xfee0  0xfee00fff  Reserved
0xff00  0x  Reserved
0x10000 0x12f1f System RAM


-- 
Best regards,
Michał Górny


signature.asc
Description: This is a digitally signed message part


Support for tv_sec=-1 (one second before the epoch) timestamps?

2018-12-12 Thread Michał Górny
Hi,

While researching libc++ test failures, I've discovered that NetBSD
suffers from the same issue as FreeBSD -- that is, both the userspace
tooling and the kernel have problems with (time_t)-1 timestamp,
i.e. one second before the epoch.

For example:

$ TZ=UTC touch -t 196912312359.59 test
touch: out of range or illegal time specification: [[CC]YY]MMDDhhmm[.SS]

The problem is that -1 is being used as a special value (error return
here) while it is also a technically valid time_t value.  While I don't
think this is a major issue, I feel it's a bad design to exclude
a single-second period in the middle of allowed time range.

The particular issue in touch(1) is easy to fix -- I can set 'errno = 0'
before mktime() value, and add a check for non-zero errno to verify
whether -1 indicates an error or just happens to be the return value.

Sadly, this does not solve the problem entirely since kernel also
special-cases the value of '-1'.  FWICS, the vattr structure used to
update filesystem information uses the value of VNOVAL or -1 to indicate
that the particular attribute should not be changed.  As a result, any
attempt to set a timestamp with tv_sec=-1 is silently ignored
by the kernel.

I can think of a few possible ways out of this:

a. Use a different tv_sec value to indicate 'no change', e.g. largest
negative integer.  This doesn't exactly solve all the problems but moves
it to the edge of allowed range rather than in the middle of it.

b. Use out-of-range tv_nsec values to indicate 'no change', like
utimensat() does.

c. Add an extra flag to va_vaflags that explicitly indicates timestamps
are supposed to be changed; always ignore tv_sec when it's unset,
otherwise respect tv_sec independently of value.

What do you think?

-- 
Best regards,
Michał Górny


signature.asc
Description: This is a digitally signed message part


noatime mounts inhibiting atime updates via utime()

2018-12-04 Thread Michał Górny
Hello,

Today me and Kamil noticed some more LLVM tests failing due to the host
filesystem being mounted with 'noatime' option.  Upon closer
investigation, it turned out that on NetBSD noatime inhibits not only
implicit atime updates but also prevents the utime() family of functions
from updating it.

Is there a specific reason for this behavior?  I'm not saying it's
incorrect (in fact, I'm pretty sure it's allowed by POSIX) but it's
somewhat surprising and contrary to what other tested systems do (Linux,
FreeBSD).

FWIU the main purpose of using 'noatime' is to inhibit inode updates
on file accesses and therefore avoid spurious writes.  However, I don't
think this applies if the program sets atime explicitly; especially if
it simultaneously updates mtime, therefore requiring an inode write
anyway.

-- 
Best regards,
Michał Górny


signature.asc
Description: This is a digitally signed message part