Provide these so that the assembler users can be oblivious about
whether this is PIC or non-PIC, 64-bit or 32-bit, etc.

It is important to use a real call and return to implement the
obtaining of the %pc as part of the PIC sequence.  Sequences
such as:

        call    . + 8
        mov     %o7, %PIC_REG

are to be avoided at all costs on UltraSPARC cpus.  This is because
such a sequence flushes the Return Address Stack (RAS) because the
call is not paired with a return.

Every time a call or jmpl with RD=%o7 is performed, the chip pushes
the PC+8 onto the top of the RAS.  The next "jmpl %o7 + 8" or "return
%i7 + 8" the chip sees will cause it to pop the top entry off the RAS
and begin fetching down that path.  If there is a mis-match the entire
pipeline is flushed and the chip restarts fetching down the correct
path.

Therefore, the above discouraged sequence will cause all of the RAS
entries to mismatch and there will therefore be a full pipeline flush
on every subsequent function return.

It is also highly discourgaged to use "rd %pc, %PIC_REG" because that
is extremely slow on UltraSPARC cpus.  The cost of a RDPC instruction
amounts essentially to a pipeline flush.

Signed-off-by: David S. Miller <[email protected]>
---
 crypto/sparc_arch.h |   70 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h
index 3ece96a..bcb4829 100644
--- a/crypto/sparc_arch.h
+++ b/crypto/sparc_arch.h
@@ -25,4 +25,74 @@ extern int OPENSSL_sparcv9cap_P;
 #define SPARCV9_MONTSQR                (1<<17)
 #define SPARCV9_CRC32C         (1<<18)
 
+#if __ASSEMBLER__
+
+#ifdef __PIC__
+#define SPARC_PIC_THUNK(reg)   \
+       .align  32;             \
+.Lpic_thunk:                   \
+       jmp     %o7 + 8;        \
+        add    %o7, %##reg, %##reg;
+#else
+#define SPARC_PIC_THUNK(reg)
+#endif
+
+#define SPARC_PIC_THUNK_CALL(reg)                      \
+       sethi   %hi(_GLOBAL_OFFSET_TABLE_-4), %##reg;   \
+       call    .Lpic_thunk;                            \
+        or     %##reg, %lo(_GLOBAL_OFFSET_TABLE_+4), %##reg;
+
+#define SPARC_SETUP_PIC_REG(reg)       \
+       SPARC_PIC_THUNK_CALL(reg)
+
+#define SPARC_SETUP_PIC_REG_LEAF(reg, tmp)     \
+       mov     %o7, %##tmp;                    \
+       SPARC_PIC_THUNK_CALL(reg);              \
+       mov     %##tmp, %o7;
+
+#ifdef __arch64__
+#define LDPTR          ldx
+#else
+#define LDPTR          ld
 #endif
+
+#ifdef __PIC__
+
+#define SPARC_LOAD_ADDRESS(SYM, reg, tmp)      \
+       SPARC_SETUP_PIC_REG(reg);               \
+       sethi   %hi(SYM), %##tmp;               \
+       or      %##tmp, %lo(SYM), %##tmp;       \
+       LDPTR   [%##reg + %##tmp], %##reg;
+
+#define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \
+       SPARC_SETUP_PIC_REG_LEAF(reg, tmp);     \
+       sethi   %hi(SYM), %##tmp;               \
+       or      %##tmp, %lo(SYM), %##tmp;       \
+       LDPTR   [%##reg + %##tmp], %##reg;
+
+#else
+
+#ifdef __arch64__
+#define SPARC_LOAD_ADDRESS(SYM, reg, tmp)      \
+       setx    SYM, %##tmp, %##reg;
+#else
+#define SPARC_LOAD_ADDRESS(SYM, reg, tmp)      \
+       set     SYM, %##reg;
+#endif
+
+#define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \
+       SPARC_LOAD_ADDRESS(SYM, reg, tmp)
+
+#endif
+
+#define SPARC_LOAD_V9_CAPS(reg, tmp)                           \
+       SPARC_LOAD_ADDRESS(OPENSSL_sparcv9cap_P, reg, tmp);     \
+       ld      [%##reg], %##reg;
+
+#define SPARC_LOAD_V9_CAPS_LEAF(reg, tmp)                              \
+       SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P, reg, tmp);        \
+       ld      [%##reg], %##reg;
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __SPARC_ARCH_H__ */
-- 
1.7.10.4

______________________________________________________________________
OpenSSL Project                                 http://www.openssl.org
Development Mailing List                       [email protected]
Automated List Manager                           [email protected]

Reply via email to