On Fri, 27 Feb 2015, Philip Guenther wrote:
> I don't have a system to test this, but I suspect this diff will fix the 
> problem: the i386_space_copy() macro can set the direction flag but 
> doesn't clear it then, so functions using this macro can violate the ABI 
> by returning with the direction flag set.

The insane^Wdaring will also try this diff, which deletes 44 cld's to 
match the one added.  Not for 5.7--not enough time to exercise all the 
cases before then--but afterwards we should go the rest of the way.  
amd64 will get a similar diff after unlock...


Philip


Index: i386/bus_space.c
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/i386/bus_space.c,v
retrieving revision 1.8
diff -u -p -r1.8 bus_space.c
--- i386/bus_space.c    17 Oct 2014 20:37:57 -0000      1.8
+++ i386/bus_space.c    27 Feb 2015 09:44:25 -0000
@@ -303,7 +303,7 @@ i386_bus_space_io_read_region_1(bus_spac
        void *_addr = a;
        int _port = h + o;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     inb %w2,%%al                            ;"
        "       stosb                                   ;"
        "       incl %2                                 ;"
@@ -320,7 +320,7 @@ i386_bus_space_io_read_region_2(bus_spac
        void *_addr = a;
        int _port = h + o;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     inw %w2,%%ax                            ;"
        "       stosw                                   ;"
        "       addl $2,%2                              ;"
@@ -337,7 +337,7 @@ i386_bus_space_io_read_region_4(bus_spac
        void *_addr = a;
        int _port = h + o;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     inl %w2,%%eax                           ;"
        "       stosl                                   ;"
        "       addl $4,%2                              ;"
@@ -393,7 +393,7 @@ i386_bus_space_io_write_region_1(bus_spa
        const void *_addr = a;
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     lodsb                                   ;"
        "       outb %%al,%w0                           ;"
        "       incl %0                                 ;"
@@ -410,7 +410,7 @@ i386_bus_space_io_write_region_2(bus_spa
        const void *_addr = a;
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     lodsw                                   ;"
        "       outw %%ax,%w0                           ;"
        "       addl $2,%0                              ;"
@@ -427,7 +427,7 @@ i386_bus_space_io_write_region_4(bus_spa
        const void *_addr = a;
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     lodsl                                   ;"
        "       outl %%eax,%w0                          ;"
        "       addl $4,%0                              ;"
@@ -442,7 +442,7 @@ i386_bus_space_io_set_multi_1(bus_space_
 {
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     outb %b2, %w1                           ;"
        "       loop 1b"                                :
            "+c" (_cnt) : "d" (h + o), "a" (v)          :
@@ -455,7 +455,7 @@ i386_bus_space_io_set_multi_2(bus_space_
 {
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     outw %w2, %w1                           ;"
        "       loop 1b"                                :
            "+c" (_cnt) : "d" (h + o), "a" (v)  :
@@ -468,7 +468,7 @@ i386_bus_space_io_set_multi_4(bus_space_
 {
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     outl %2,%w1                             ;"
        "       loop 1b"                                :
            "+c" (_cnt) : "d" (h + o), "a" (v)  :
@@ -617,7 +617,7 @@ i386_bus_space_mem_read_multi_1(bus_spac
 {
        void *_addr=a;
        int _cnt=cnt;
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     movb (%2),%%al                          ;"
        "       stosb                                   ;"
        "       loop 1b"                                :
@@ -631,7 +631,7 @@ i386_bus_space_mem_read_multi_2(bus_spac
 {
        void *_addr=a;
        int _cnt=cnt;
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     movw (%2),%%ax                          ;"
        "       stosw                                   ;"
        "       loop 1b"                                :
@@ -645,7 +645,7 @@ i386_bus_space_mem_read_multi_4(bus_spac
 {
        void *_addr=a;
        int _cnt=cnt;
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     movl (%2),%%eax                         ;"
        "       stosl                                   ;"
        "       loop 1b"                                :
@@ -711,7 +711,7 @@ i386_bus_space_mem_write_multi_1(bus_spa
        const void *_addr=a;
        int _cnt=cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     lodsb                                   ;"
        "       movb %%al,(%2)                          ;"
        "       loop 1b"                                :
@@ -726,7 +726,7 @@ i386_bus_space_mem_write_multi_2(bus_spa
        const void *_addr = a;
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     lodsw                                   ;"
        "       movw %%ax,(%2)                          ;"
        "       loop 1b"                                :
@@ -741,7 +741,7 @@ i386_bus_space_mem_write_multi_4(bus_spa
        const void *_addr=a;
        int _cnt=cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     lodsl                                   ;"
        "       movl %%eax,(%2)                         ;"
        "       loop 1b"                                :
@@ -788,7 +788,7 @@ i386_bus_space_mem_set_multi_1(bus_space
 {
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     movb %b2, (%1)                          ;"
        "       loop 1b"                                :
            "+c" (_cnt) : "D" (h + o), "a" (v)          :
@@ -801,7 +801,7 @@ i386_bus_space_mem_set_multi_2(bus_space
 {
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     movw %w2, (%1)                          ;"
        "       loop 1b"                                :
            "+c" (_cnt) : "D" (h + o), "a" (v)          :
@@ -814,7 +814,7 @@ i386_bus_space_mem_set_multi_4(bus_space
 {
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "1:     movl %2,(%1)                            ;"
        "       loop 1b"                                :
            "+c" (_cnt) : "D" (h + o), "a" (v)  :
@@ -828,7 +828,7 @@ i386_bus_space_mem_set_region_1(bus_spac
        int _port = h + o;
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "       repne                                   ;"
        "       stosb"                                  :
            "+D" (_port), "+c" (_cnt) : "a" (v) :
@@ -842,7 +842,7 @@ i386_bus_space_mem_set_region_2(bus_spac
        int _port = h + o;
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "       repne                                   ;"
        "       stosw"                                  :
            "+D" (_port), "+c" (_cnt) : "a" (v) :
@@ -856,7 +856,7 @@ i386_bus_space_mem_set_region_4(bus_spac
        int _port = h + o;
        int _cnt = cnt;
 
-       __asm volatile("cld                             ;"
+       __asm volatile(
        "       repne                                   ;"
        "       stosl"                                  :
            "+D" (_port), "+c" (_cnt) : "a" (v) :
Index: i386/kvm86call.S
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/i386/kvm86call.S,v
retrieving revision 1.6
diff -u -p -r1.6 kvm86call.S
--- i386/kvm86call.S    5 Apr 2010 22:42:43 -0000       1.6
+++ i386/kvm86call.S    27 Feb 2015 09:49:21 -0000
@@ -74,7 +74,6 @@ ENTRY(kvm86_call)
        movl    vm86frame,%edi          /* target frame location */
        movl    SCRARGFRAME,%esi        /* source (set on entry) */
        movl    $FRAMESIZE/4,%ecx       /* sizeof(struct trapframe)/4 */
-       cld
        rep
        movsl                           /* copy frame to new stack */
 
@@ -163,7 +162,6 @@ ENTRY(kvm86_ret)
        movl    8(%ebp),%esi            /* source */
        movl    SCRARGFRAME,%edi        /* destination */
        movl    $FRAMESIZE/4,%ecx       /* size */
-       cld
        rep
        movsl                           /* copy frame to original frame */
 
Index: i386/locore.s
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/i386/locore.s,v
retrieving revision 1.150
diff -u -p -r1.150 locore.s
--- i386/locore.s       11 Feb 2015 00:16:07 -0000      1.150
+++ i386/locore.s       27 Feb 2015 09:48:58 -0000
@@ -525,7 +525,6 @@ try586:     /* Use the `cpuid' instruction. 
        subl    %edi,%ecx                       # size of tables
        shrl    $2,%ecx
        xorl    %eax, %eax
-       cld
        rep
        stosl
 
@@ -731,8 +730,7 @@ ENTRY(kcopy)
        subl    %esi,%eax
        cmpl    %ecx,%eax               # overlapping?
        jb      1f
-       cld                             # nope, copy forward
-       shrl    $2,%ecx                 # copy by 32-bit words
+       shrl    $2,%ecx                 # nope, copy forward by 32-bit words
        rep
        movsl
        movl    24+FPADD(%esp),%ecx
@@ -821,7 +819,6 @@ _C_LABEL(_copyout_stac):
        SMAP_NOP
 
        /* bcopy(%esi, %edi, %eax); */
-       cld
        movl    %eax,%ecx
        shrl    $2,%ecx
        rep
@@ -876,7 +873,6 @@ _C_LABEL(_copyin_stac):
        ja      _C_LABEL(copy_fault)
 
        /* bcopy(%esi, %edi, %eax); */
-       cld
        movl    %eax,%ecx
        shrl    $2,%ecx
        rep
@@ -951,7 +947,6 @@ _C_LABEL(_copyoutstr_stac):
        movl    %eax,20+FPADD(%esp)
 
 1:     incl    %edx
-       cld
 
 1:     decl    %edx
        jz      2f
@@ -1009,7 +1004,6 @@ _C_LABEL(_copyinstr_stac):
        movl    %eax,20+FPADD(%esp)
 
 1:     incl    %edx
-       cld
 
 1:     decl    %edx
        jz      2f
@@ -1071,7 +1065,6 @@ ENTRY(copystr)
        movl    16+FPADD(%esp),%edi             # edi = to
        movl    20+FPADD(%esp),%edx             # edx = maxlen
        incl    %edx
-       cld
 
 1:     decl    %edx
        jz      4f
@@ -1519,7 +1512,6 @@ ENTRY(bzero)
        movl    8(%esp),%edi
        movl    12(%esp),%edx
 
-       cld                             /* set fill direction forward */
        xorl    %eax,%eax               /* set fill data to 0 */
 
        /*
@@ -1599,7 +1591,6 @@ ENTRY(i686_pagezero)
 
        movl    12(%esp), %edi
        movl    $1024, %ecx
-       cld
 
        ALIGN_TEXT
 1:
Index: i386/vector.s
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/i386/vector.s,v
retrieving revision 1.18
diff -u -p -r1.18 vector.s
--- i386/vector.s       28 Nov 2013 19:30:46 -0000      1.18
+++ i386/vector.s       27 Feb 2015 09:49:11 -0000
@@ -49,10 +49,6 @@
  * If the interrupt frame is made more flexible,  INTR can push %eax first and
  * decide the ipending case with less overhead, e.g., by avoiding loading the
  * segment registers.
- *
- * XXX
- * Should we do a cld on every system entry to avoid the requirement for
- * scattered cld's?
  */
 
        .globl  _C_LABEL(isa_strayintr)
Index: include/bus.h
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/include/bus.h,v
retrieving revision 1.65
diff -u -p -r1.65 bus.h
--- include/bus.h       24 Jan 2015 15:13:55 -0000      1.65
+++ include/bus.h       27 Feb 2015 09:42:56 -0000
@@ -446,23 +446,26 @@ struct i386_bus_space_ops {
                        !!! bus_space_copy_8 unimplemented !!!
 #endif
 
-#define        i386_space_copy1(a1, a2, cnt, movs, df)         \
-       __asm volatile(df "\n\trep\n\t" movs :          \
+#define        i386_space_copy1up(a1, a2, cnt, movs)           \
+       __asm volatile("rep\n\t" movs :                 \
+           "+S" (a1), "+D" (a2), "+c" (cnt)    :: "memory", "cc");
+#define        i386_space_copy1down(a1, a2, cnt, movs)         \
+       __asm volatile("std\n\trep\n\t" movs "\n\tcld": \
            "+S" (a1), "+D" (a2), "+c" (cnt)    :: "memory", "cc");
 
 #define        i386_space_copy(a1, a2, sz, cnt) do {                           
\
        if ((void *)(a1) < (void *)(a2)) {                              \
                a1 += ((cnt) - 1) * (sz); a2 += ((cnt) - 1) * (sz);     \
                switch (sz) {                                           \
-               case 1: i386_space_copy1(a1,a2,cnt,"movsb","std");break;\
-               case 2: i386_space_copy1(a1,a2,cnt,"movsw","std");break;\
-               case 4: i386_space_copy1(a1,a2,cnt,"movsl","std");break;\
+               case 1: i386_space_copy1down(a1,a2,cnt,"movsb"); break; \
+               case 2: i386_space_copy1down(a1,a2,cnt,"movsw"); break; \
+               case 4: i386_space_copy1down(a1,a2,cnt,"movsl"); break; \
                }                                                       \
        } else                                                          \
                switch (sz) {                                           \
-               case 1: i386_space_copy1(a1,a2,cnt,"movsb","cld");break;\
-               case 2: i386_space_copy1(a1,a2,cnt,"movsw","cld");break;\
-               case 4: i386_space_copy1(a1,a2,cnt,"movsl","cld");break;\
+               case 1: i386_space_copy1up(a1,a2,cnt,"movsb"); break;   \
+               case 2: i386_space_copy1up(a1,a2,cnt,"movsw"); break;   \
+               case 4: i386_space_copy1up(a1,a2,cnt,"movsl"); break;   \
                }                                                       \
 } while (0)
 
Index: include/pio.h
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/i386/include/pio.h,v
retrieving revision 1.11
diff -u -p -r1.11 pio.h
--- include/pio.h       29 Mar 2014 18:09:29 -0000      1.11
+++ include/pio.h       27 Feb 2015 09:49:56 -0000
@@ -76,7 +76,7 @@ __inb(int port)
 static __inline void
 insb(int port, void *addr, int cnt)
 {
-       __asm volatile("cld\n\trepne\n\tinsb"
+       __asm volatile("repne\n\tinsb"
            : "+D" (addr), "+c" (cnt) : "d" (port) : "memory", "cc");
 }
 
@@ -102,7 +102,7 @@ __inw(int port)
 static __inline void
 insw(int port, void *addr, int cnt)
 {
-       __asm volatile("cld\n\trepne\n\tinsw"
+       __asm volatile("repne\n\tinsw"
            : "+D" (addr), "+c" (cnt) : "d" (port) : "memory", "cc");
 }
 
@@ -128,7 +128,7 @@ __inl(int port)
 static __inline void
 insl(int port, void *addr, int cnt)
 {
-       __asm volatile("cld\n\trepne\n\tinsl"
+       __asm volatile("repne\n\tinsl"
            : "+D" (addr), "+c" (cnt) : "d" (port) : "memory", "cc");
 }
 
@@ -150,7 +150,7 @@ __outb(int port, u_int8_t data)
 static __inline void
 outsb(int port, const void *addr, int cnt)
 {
-       __asm volatile("cld\n\trepne\n\toutsb"
+       __asm volatile("repne\n\toutsb"
            : "+S" (addr), "+c" (cnt) : "d" (port) : "cc");
 }
 
@@ -172,7 +172,7 @@ __outw(int port, u_int16_t data)
 static __inline void
 outsw(int port, const void *addr, int cnt)
 {
-       __asm volatile("cld\n\trepne\n\toutsw"
+       __asm volatile("repne\n\toutsw"
            : "+S" (addr), "+c" (cnt) : "d" (port) : "cc");
 }
 
@@ -194,7 +194,7 @@ __outl(int port, u_int32_t data)
 static __inline void
 outsl(int port, const void *addr, int cnt)
 {
-       __asm volatile("cld\n\trepne\n\toutsl"
+       __asm volatile("repne\n\toutsl"
            : "+S" (addr), "+c" (cnt) : "d" (port) : "cc");
 }
 

Reply via email to