Generate instructions to perform the endian conversion using registers,
rather than generating two memory accesses.

The "way easier and faster" comment was obviously for the author, not
the processor.

Signed-off-by: Naveen N. Rao <naveen.n....@linux.vnet.ibm.com>
---
 arch/powerpc/net/bpf_jit_comp64.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 1e313db..0413a89 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -599,16 +599,22 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 
*image,
                                break;
                        case 64:
                                /*
-                                * Way easier and faster(?) to store the value
-                                * into stack and then use ldbrx
+                                * We'll split it up into two words, swap those
+                                * independently and then merge them back.
                                 *
-                                * ctx->seen will be reliable in pass2, but
-                                * the instructions generated will remain the
-                                * same across all passes
+                                * First up, let's swap the most-significant 
word.
                                 */
-                               PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx));
-                               PPC_ADDI(b2p[TMP_REG_1], 1, 
bpf_jit_stack_local(ctx));
-                               PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
+                               PPC_RLDICL(b2p[TMP_REG_1], dst_reg, 32, 32);
+                               PPC_RLWINM(b2p[TMP_REG_2], b2p[TMP_REG_1], 8, 
0, 31);
+                               PPC_RLWIMI(b2p[TMP_REG_2], b2p[TMP_REG_1], 24, 
0, 7);
+                               PPC_RLWIMI(b2p[TMP_REG_2], b2p[TMP_REG_1], 24, 
16, 23);
+                               /* Then, the second half */
+                               PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31);
+                               PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7);
+                               PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23);
+                               /* Merge back */
+                               PPC_RLDICR(dst_reg, b2p[TMP_REG_1], 32, 31);
+                               PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_2]);
                                break;
                        }
                        break;
-- 
2.10.2

Reply via email to