This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-rng.git

commit b947c91fb8e96d85c8dbf1d9a696173146ba27b0
Author: Alex Herbert <[email protected]>
AuthorDate: Thu Feb 26 14:59:10 2026 +0000

    RNG-188: Add Philox generation performance
    
    Benchmark use of a method handle to call java.lang.Math 128-bit
    multiplication functions for 64-bit arguments.
    
    Add variants of the 32-bit generator to test performance.
    
    Add variants of the 64-bit generator to call Math 128-bit multiplication
    functions.
---
 .../commons/rng/core/source64/LXMSupport.java      |   3 +-
 commons-rng-examples/examples-jmh/pom.xml          |   3 +-
 .../rng/examples/jmh/core/LXMBenchmark.java        |  93 ++-
 .../jmh/core/PhiloxGenerationPerformance.java      | 886 +++++++++++++++++++++
 .../rng/examples/jmh/core/LXMBenchmarkTest.java    |  13 +-
 5 files changed, 989 insertions(+), 9 deletions(-)

diff --git 
a/commons-rng-core/src/main/java/org/apache/commons/rng/core/source64/LXMSupport.java
 
b/commons-rng-core/src/main/java/org/apache/commons/rng/core/source64/LXMSupport.java
index fa23f247..c64e6458 100644
--- 
a/commons-rng-core/src/main/java/org/apache/commons/rng/core/source64/LXMSupport.java
+++ 
b/commons-rng-core/src/main/java/org/apache/commons/rng/core/source64/LXMSupport.java
@@ -110,8 +110,9 @@ final class LXMSupport {
      * }</pre>
      *
      * <p>Note: The method {@code Math.multiplyHigh} was added in JDK 9
-     * and should be used as above when the source code targets Java 11
+     * and could be used as above when the source code targets Java 11
      * to exploit the intrinsic method.
+     * Benchmarks are available in the Commons RNG JMH module.
      *
      * <p>Note: The method {@code Math.unsignedMultiplyHigh} was added in JDK 
18
      * and should be used when the source code target allows.
diff --git a/commons-rng-examples/examples-jmh/pom.xml 
b/commons-rng-examples/examples-jmh/pom.xml
index bf377d43..871367a2 100644
--- a/commons-rng-examples/examples-jmh/pom.xml
+++ b/commons-rng-examples/examples-jmh/pom.xml
@@ -80,6 +80,8 @@
     <pmd.skip>true</pmd.skip>
     <cpd.skip>true</cpd.skip>
     <spotbugs.skip>true</spotbugs.skip>
+    <!-- Disable to allow use of MethodHandle to higher JDK version methods. 
-->
+    <animal.sniffer.skip>true</animal.sniffer.skip>
 
     <!-- Skip tests. This avoids the lengthy test of the ziggurat samplers. 
Override using:
        mvn test -DskipTests=false
@@ -94,7 +96,6 @@
     <maven.compiler.source>11</maven.compiler.source>
     <maven.compiler.target>11</maven.compiler.target>
     <commons.compiler.release>11</commons.compiler.release>
-    <animal.sniffer.skip>true</animal.sniffer.skip>
     -->
 
   </properties>
diff --git 
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmark.java
 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmark.java
index 0935c7e3..14637e6e 100644
--- 
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmark.java
+++ 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmark.java
@@ -17,8 +17,12 @@
 
 package org.apache.commons.rng.examples.jmh.core;
 
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.TimeUnit;
+import java.util.function.LongBinaryOperator;
 import java.util.function.LongSupplier;
 import java.util.stream.LongStream;
 import org.apache.commons.rng.JumpableUniformRandomProvider;
@@ -77,12 +81,61 @@ public class LXMBenchmark {
      */
     @State(Scope.Benchmark)
     public static class UnsignedMultiplyHighSource {
+        /** Method name for Math.multiplyHigh. */
+        private static final String MULTIPLY_HIGH = "multiplyHigh";
+        /** Method name for Math.unsignedMultiplyHigh. */
+        private static final String UNSIGNED_MULTIPLY_HIGH = 
"unsignedMultiplyHigh";
         /** A mask to convert an {@code int} to an unsigned integer stored as 
a {@code long}. */
         private static final long INT_TO_UNSIGNED_BYTE_MASK = 0xffff_ffffL;
         /** Precomputed upper split (32-bits) of the low half of the 128-bit 
multiplier constant. */
         private static final long X = ML >>> 32;
         /** Precomputed lower split (32-bits) of the low half of the 128-bit 
multiplier constant. */
         private static final long Y = ML & INT_TO_UNSIGNED_BYTE_MASK;
+        /** Method handle to unsigned multiply using
+         * java.lang.Math.multiplyHigh if Java 9, otherwise
+         * a default implementation. */
+        private static final LongBinaryOperator MATH_MULTIPLY_HIGH;
+        /** Method handle to unsigned multiply using
+         * java.lang.Math.unsignedMultiplyHigh if Java 18, otherwise
+         * a default implementation. */
+        private static final LongBinaryOperator MATH_UNSIGNED_MULTIPLY_HIGH;
+
+        static {
+            final MethodHandles.Lookup lookup = MethodHandles.lookup();
+            LongBinaryOperator op1;
+            // Allow catch of Throwable
+            // CHECKSTYLE: stop IllegalCatch
+            try {
+                final MethodHandle h1 = lookup.findStatic(Math.class, 
MULTIPLY_HIGH,
+                    MethodType.methodType(long.class, long.class, long.class));
+                op1 = (a, b) -> {
+                    try {
+                        return (long) h1.invokeExact(a, b) + ((a >> 63) & b) + 
((b >> 63) & a);
+                    } catch (Throwable e) {
+                        throw new RuntimeException(e);
+                    }
+                };
+            } catch (NoSuchMethodException | IllegalAccessException ignored) {
+                op1 = UnsignedMultiplyHighSource::unsignedMultiplyHigh;
+            }
+            MATH_MULTIPLY_HIGH = op1;
+            LongBinaryOperator op2;
+            try {
+                final MethodHandle h2 = lookup.findStatic(Math.class, 
UNSIGNED_MULTIPLY_HIGH,
+                    MethodType.methodType(long.class, long.class, long.class));
+                op2 = (a, b) -> {
+                    try {
+                        return (long) h2.invokeExact(a, b);
+                    } catch (Throwable e) {
+                        throw new RuntimeException(e);
+                    }
+                };
+            } catch (NoSuchMethodException | IllegalAccessException ignored) {
+                op2 = UnsignedMultiplyHighSource::unsignedMultiplyHigh;
+            }
+            // CHECKSTYLE: resume IllegalCatch
+            MATH_UNSIGNED_MULTIPLY_HIGH = op2;
+        }
 
         /**
          * The method to compute the value.
@@ -92,9 +145,10 @@ public class LXMBenchmark {
             //"mathMultiplyHigh", "mathMultiplyHighWithML",
             // Require JDK 18+
             //"mathUnsignedMultiplyHigh", "mathUnsignedMultiplyHighWithML",
-            "unsignedMultiplyHigh", "unsignedMultiplyHighWithML",
+            UNSIGNED_MULTIPLY_HIGH, "unsignedMultiplyHighWithML",
             "unsignedMultiplyHighML",
             "unsignedMultiplyHighPlusMultiplyLow", 
"unsignedMultiplyHighAndLow",
+            "mhMultiplyHigh", "mhUnsignedMultiplyHigh",
             })
         private String method;
 
@@ -155,7 +209,7 @@ public class LXMBenchmark {
             if (BASELINE1.equals(method)) {
                 gen = () -> ga.getAsLong();
             } else if (BASELINE2.equals(method)) {
-                gen = () -> ga.getAsLong() ^ gb.getAsLong();
+                gen = () -> ga.getAsLong() * gb.getAsLong();
             } else if ("mathMultiplyHigh".equals(method)) {
                 gen = () -> mathMultiplyHigh(ga.getAsLong(), gb.getAsLong());
             } else if ("mathMultiplyHighWithML".equals(method)) {
@@ -164,7 +218,7 @@ public class LXMBenchmark {
                 gen = () -> mathUnsignedMultiplyHigh(ga.getAsLong(), 
gb.getAsLong());
             } else if ("mathUnsignedMultiplyHighWithML".equals(method)) {
                 gen = () -> mathUnsignedMultiplyHigh(ga.getAsLong(), ML);
-            } else if ("unsignedMultiplyHigh".equals(method)) {
+            } else if (UNSIGNED_MULTIPLY_HIGH.equals(method)) {
                 gen = () -> unsignedMultiplyHigh(ga.getAsLong(), 
gb.getAsLong());
             } else if ("unsignedMultiplyHighWithML".equals(method)) {
                 gen = () -> unsignedMultiplyHigh(ga.getAsLong(), ML);
@@ -192,6 +246,11 @@ public class LXMBenchmark {
                     final long hi = unsignedMultiplyHigh(a, b, lo);
                     return hi ^ lo[0];
                 };
+            // Method handles
+            } else if ("mhMultiplyHigh".equals(method)) {
+                gen = () -> mhMultiplyHigh(ga.getAsLong(), gb.getAsLong());
+            } else if ("mhUnsignedMultiplyHigh".equals(method)) {
+                gen = () -> mhUnsignedMultiplyHigh(ga.getAsLong(), 
gb.getAsLong());
             } else {
                 throw new IllegalStateException("Unknown method: " + method);
             }
@@ -365,6 +424,30 @@ public class LXMBenchmark {
 
             return (bx >>> 32) + (carry >>> 32) + ax;
         }
+
+        /**
+         * Compute the unsigned multiply of two values using a method handle to
+         * Math.multiplyHigh. If not using JDK 9 this reverts to a default 
implementation.
+         *
+         * @param a First value
+         * @param b Second value
+         * @return the upper 64-bits of the 128-bit result
+         */
+        static long mhMultiplyHigh(long a, long b) {
+            return MATH_MULTIPLY_HIGH.applyAsLong(a, b);
+        }
+
+        /**
+         * Compute the unsigned multiply of two values using a method handle to
+         * Math.unsignedMultiplyHigh. If not using JDK 18 this reverts to a 
default implementation.
+         *
+         * @param a First value
+         * @param b Second value
+         * @return the upper 64-bits of the 128-bit result
+         */
+        static long mhUnsignedMultiplyHigh(long a, long b) {
+            return MATH_UNSIGNED_MULTIPLY_HIGH.applyAsLong(a, b);
+        }
     }
 
     /**
@@ -451,9 +534,9 @@ public class LXMBenchmark {
             }
 
             if (BASELINE2.equals(method)) {
-                gen = () -> ga.getAsLong() ^ gb.getAsLong();
+                gen = () -> ga.getAsLong() * gb.getAsLong();
             } else if (BASELINE4.equals(method)) {
-                gen = () -> ga.getAsLong() ^ gb.getAsLong() ^ gc.getAsLong() ^ 
gd.getAsLong();
+                gen = () -> ga.getAsLong() * gb.getAsLong() + gc.getAsLong() * 
gd.getAsLong();
             } else if ("unsignedMultiplyHighPlusProducts".equals(method)) {
                 gen = () -> {
                     final long a = ga.getAsLong();
diff --git 
a/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/PhiloxGenerationPerformance.java
 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/PhiloxGenerationPerformance.java
new file mode 100644
index 00000000..cd21a39b
--- /dev/null
+++ 
b/commons-rng-examples/examples-jmh/src/main/java/org/apache/commons/rng/examples/jmh/core/PhiloxGenerationPerformance.java
@@ -0,0 +1,886 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.rng.examples.jmh.core;
+
+import java.util.Arrays;
+import java.util.concurrent.ThreadLocalRandom;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.core.util.NumberFactory;
+import 
org.apache.commons.rng.examples.jmh.core.LXMBenchmark.UnsignedMultiplyHighSource;
+import org.apache.commons.rng.simple.RandomSource;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+
+/**
+ * Executes benchmark to compare the speed of generation of random numbers 
from the
+ * various Philox providers.
+ *
+ * <p>Performance Notes
+ *
+ * <p>Some improvements to performance are observed on JDK 8 for the inline 
and zero
+ * variations of the 32-bit generator. On higher JDKs the performance of the 
original
+ * implementation is consistently the best and the modified variants are 
either the
+ * same speed but can be significantly slower. This typically occurs for the 
inline
+ * variant which may not be efficiently targeted by the JVM optimisations.
+ * The variations are left for future development.
+ *
+ * <p>The 64-bit generator can be made significantly faster if it uses the 
64-bit
+ * multiplication methods available in the {@link Math} class (see RNG-188).
+ */
+public class PhiloxGenerationPerformance extends AbstractBenchmark {
+    /** The int value. Must NOT be final to prevent JVM optimisation! */
+    private int intValue;
+    /** The long value. Must NOT be final to prevent JVM optimisation! */
+    private long longValue;
+
+    /**
+     * The benchmark state (retrieve the various "RandomSource"s).
+     */
+    @State(Scope.Benchmark)
+    public static class Sources {
+        /**
+         * RNG providers.
+         */
+        @Param({"PHILOX_4X32_ORIGINAL",
+                "PHILOX_4X32_CONST",
+                "PHILOX_4X32_ZERO",
+                "PHILOX_4X32_INLINE",
+                "PHILOX_4X32",
+                "PHILOX_4X64_ORIGINAL",
+                "PHILOX_4X64_MH",
+                "PHILOX_4X64_UMH",
+                "PHILOX_4X64"})
+        private String randomSourceName;
+
+        /** RNG. */
+        private UniformRandomProvider provider;
+
+        /**
+         * Gets the generator.
+         *
+         * @return the RNG.
+         */
+        public UniformRandomProvider getGenerator() {
+            return provider;
+        }
+
+        /** Instantiates generator. This need only be done once per set of 
iterations. */
+        @Setup(Level.Trial)
+        public void setup() {
+            if ("PHILOX_4X32_ORIGINAL".equals(randomSourceName)) {
+                provider = new Philox4x32Original(intSeed());
+            } else if ("PHILOX_4X32_CONST".equals(randomSourceName)) {
+                provider = new Philox4x32Const(intSeed());
+            } else if ("PHILOX_4X32_ZERO".equals(randomSourceName)) {
+                provider = new Philox4x32Zero(intSeed());
+            } else if ("PHILOX_4X32_INLINE".equals(randomSourceName)) {
+                provider = new Philox4x32Inline(intSeed());
+            } else if ("PHILOX_4X64_ORIGINAL".equals(randomSourceName)) {
+                provider = new Philox4x64Original(longSeed());
+            } else if ("PHILOX_4X64_MH".equals(randomSourceName)) {
+                provider = new Philox4x64MH(longSeed());
+            } else if ("PHILOX_4X64_UMH".equals(randomSourceName)) {
+                provider = new Philox4x64UMH(longSeed());
+            } else {
+                final RandomSource randomSource = 
RandomSource.valueOf(randomSourceName);
+                provider = randomSource.create();
+            }
+        }
+
+        /**
+         * @return the int[] seed
+         */
+        private static int[] intSeed() {
+            return ThreadLocalRandom.current().ints(6).toArray();
+        }
+
+        /**
+         * @return the long[] seed
+         */
+        private static long[] longSeed() {
+            return ThreadLocalRandom.current().longs(6).toArray();
+        }
+    }
+
+    /**
+     * Class adapted from the original implementation of Philox4x32.
+     */
+    static class Philox4x32Original implements UniformRandomProvider {
+        /** Philox 32-bit mixing constant for counter 0. */
+        protected static final int K_PHILOX_10_A = 0x9E3779B9;
+        /** Philox 32-bit mixing constant for counter 1. */
+        protected static final int K_PHILOX_10_B = 0xBB67AE85;
+        /** Philox 32-bit constant for key 0. */
+        protected static final int K_PHILOX_SA = 0xD2511F53;
+        /** Philox 32-bit constant for key 1. */
+        protected static final int K_PHILOX_SB = 0xCD9E8D57;
+        /** Internal buffer size. */
+        protected static final int PHILOX_BUFFER_SIZE = 4;
+
+        /** Counter 0. */
+        protected int counter0;
+        /** Counter 1. */
+        protected int counter1;
+        /** Counter 2. */
+        protected int counter2;
+        /** Counter 3. */
+        protected int counter3;
+        /** Output buffer. */
+        protected final int[] buffer = new int[PHILOX_BUFFER_SIZE];
+        /** Key low bits. */
+        protected int key0;
+        /** Key high bits. */
+        protected int key1;
+        /** Output buffer index. When at the end of the buffer the counter is
+         * incremented and the buffer regenerated. */
+        protected int bufferPosition;
+
+        /**
+         * Creates a new instance based on an array of int containing, key 
(first two ints) and
+         * the counter (next 4 ints, low bits = first int). The counter is not 
scrambled and may
+         * be used to create contiguous blocks with size a multiple of 4 ints. 
For example,
+         * setting seed[2] = 1 is equivalent to start with seed[2]=0 and 
calling {@link #next()} 4 times.
+         *
+         * @param seed Array of size 6 defining 
key0,key1,counter0,counter1,counter2,counter3.
+         *             If the size is smaller, zero values are assumed.
+         */
+        Philox4x32Original(int[] seed) {
+            final int[] input = seed.length < 6 ? Arrays.copyOf(seed, 6) : 
seed;
+            key0 = input[0];
+            key1 = input[1];
+            counter0 = input[2];
+            counter1 = input[3];
+            counter2 = input[4];
+            counter3 = input[5];
+            bufferPosition = PHILOX_BUFFER_SIZE;
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public int nextInt() {
+            final int p = bufferPosition;
+            if (p < PHILOX_BUFFER_SIZE) {
+                bufferPosition = p + 1;
+                return buffer[p];
+            }
+            incrementCounter();
+            rand10();
+            bufferPosition = 1;
+            return buffer[0];
+        }
+
+        /**
+         * Increment the counter by one.
+         */
+        protected void incrementCounter() {
+            if (++counter0 != 0) {
+                return;
+            }
+            if (++counter1 != 0) {
+                return;
+            }
+            if (++counter2 != 0) {
+                return;
+            }
+            ++counter3;
+        }
+
+        /**
+         * Perform 10 rounds, using counter0, counter1, counter2, counter3 as 
starting point.
+         * It updates the buffer member variable, but no others.
+         */
+        protected void rand10() {
+            buffer[0] = counter0;
+            buffer[1] = counter1;
+            buffer[2] = counter2;
+            buffer[3] = counter3;
+
+            int k0 = key0;
+            int k1 = key1;
+
+            //unrolled loop for performance
+            singleRound(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRound(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRound(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRound(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRound(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRound(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRound(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRound(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRound(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRound(buffer, k0, k1);
+        }
+
+        /**
+         * Performs a single round of philox.
+         *
+         * @param counter Counter, which will be updated after each call.
+         * @param key0 Key low bits.
+         * @param key1 Key high bits.
+         */
+        protected static void singleRound(int[] counter, int key0, int key1) {
+            final long product0 = (K_PHILOX_SA & 0xffff_ffffL) * (counter[0] & 
0xffff_ffffL);
+            final int hi0 = (int) (product0 >>> 32);
+            final long product1 = (K_PHILOX_SB & 0xffff_ffffL) * (counter[2] & 
0xffff_ffffL);
+            final int hi1 = (int) (product1 >>> 32);
+
+            counter[0] = hi1 ^ counter[1] ^ key0;
+            counter[1] = (int) product1;
+            counter[2] = hi0 ^ counter[3] ^ key1;
+            counter[3] = (int) product0;
+        }
+
+        @Override
+        public long nextLong() {
+            return NumberFactory.makeLong(nextInt(), nextInt());
+        }
+    }
+
+    /**
+     * Updated to use pre-computed long constants.
+     */
+    static final class Philox4x32Const extends Philox4x32Original {
+        /** Philox 32-bit constant for key 0 as an unsigned integer. */
+        private static final long K_PHILOX_SAL = 
Integer.toUnsignedLong(0xD2511F53);
+        /** Philox 32-bit constant for key 1 as an unsigned integer. */
+        private static final long K_PHILOX_SBL = 
Integer.toUnsignedLong(0xCD9E8D57);
+
+        /**
+         * Creates a new instance.
+         *
+         * @param seed Seed.
+         */
+        Philox4x32Const(int[] seed) {
+            super(seed);
+        }
+
+        @Override
+        protected void rand10() {
+            buffer[0] = counter0;
+            buffer[1] = counter1;
+            buffer[2] = counter2;
+            buffer[3] = counter3;
+
+            int k0 = key0;
+            int k1 = key1;
+
+            //unrolled loop for performance
+            singleRoundL(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRoundL(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRoundL(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRoundL(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRoundL(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRoundL(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRoundL(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRoundL(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRoundL(buffer, k0, k1);
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            singleRoundL(buffer, k0, k1);
+        }
+
+        /**
+         * Performs a single round of philox.
+         *
+         * @param counter Counter, which will be updated after each call.
+         * @param key0 Key low bits.
+         * @param key1 Key high bits.
+         */
+        private static void singleRoundL(int[] counter, int key0, int key1) {
+            final long product0 = K_PHILOX_SAL * (counter[0] & 0xffff_ffffL);
+            final int hi0 = (int) (product0 >>> 32);
+            final long product1 = K_PHILOX_SBL * (counter[2] & 0xffff_ffffL);
+            final int hi1 = (int) (product1 >>> 32);
+
+            counter[0] = hi1 ^ counter[1] ^ key0;
+            counter[1] = (int) product1;
+            counter[2] = hi0 ^ counter[3] ^ key1;
+            counter[3] = (int) product0;
+        }
+    }
+
+    /**
+     * Updated to use count the buffer position down to zero.
+     *
+     * <p>Note: This implementation will output the incorrect order for each
+     * block of 4. This is for simplicity during testing.
+     */
+    static final class Philox4x32Zero extends Philox4x32Original {
+        /**
+         * Creates a new instance.
+         *
+         * @param seed Seed.
+         */
+        Philox4x32Zero(int[] seed) {
+            super(seed);
+            bufferPosition = 0;
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public int nextInt() {
+            int p = bufferPosition - 1;
+            if (p < 0) {
+                incrementCounter();
+                rand10();
+                p = 3;
+            }
+            bufferPosition = p;
+            return buffer[p];
+        }
+    }
+
+    /**
+     * Updated to inline the entire buffer update with pre-computed long 
constants.
+     */
+    static final class Philox4x32Inline extends Philox4x32Original {
+        /**
+         * Creates a new instance.
+         *
+         * @param seed Seed.
+         */
+        Philox4x32Inline(int[] seed) {
+            super(seed);
+        }
+
+        /**
+         * Perform 10 rounds, using counter0, counter1, counter2, counter3 as 
starting point.
+         * It updates the buffer member variable, but no others.
+         */
+        protected void rand10() {
+            final long sa = K_PHILOX_SA & 0xffff_ffffL;
+            final long sb = K_PHILOX_SB & 0xffff_ffffL;
+
+            long c0 = counter0;
+            int c1 = counter1;
+            long c2 = counter2;
+            int c3 = counter3;
+
+            long product0;
+            long product1;
+            int hi0;
+            int hi1;
+
+            // Unrolled loop for performance
+            int k0 = key0;
+            int k1 = key1;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            k0 += K_PHILOX_10_A;
+            k1 += K_PHILOX_10_B;
+            product0 = sa * (c0 & 0xffff_ffffL);
+            hi0 = (int) (product0 >>> 32);
+            product1 = sb * (c2 & 0xffff_ffffL);
+            hi1 = (int) (product1 >>> 32);
+
+            c0 = hi1 ^ c1 ^ k0;
+            c1 = (int) product1;
+            c2 = hi0 ^ c3 ^ k1;
+            c3 = (int) product0;
+
+            buffer[0] = (int) c0;
+            buffer[1] = c1;
+            buffer[2] = (int) c2;
+            buffer[3] = c3;
+        }
+    }
+
+    /**
+     * Class adapted from the original implementation of Philox4x64.
+     */
+    static class Philox4x64Original implements UniformRandomProvider {
+        /** Philox 64-bit mixing constant for counter 0. */
+        protected static final long PHILOX_M0 = 0xD2E7470EE14C6C93L;
+        /** Philox 64-bit mixing constant for counter 1. */
+        protected static final long PHILOX_M1 = 0xCA5A826395121157L;
+        /** Philox 64-bit constant for key 0. */
+        protected static final long PHILOX_W0 = 0x9E3779B97F4A7C15L;
+        /** Philox 64-bit constant for key 1. */
+        protected static final long PHILOX_W1 = 0xBB67AE8584CAA73BL;
+        /** Internal buffer size. */
+        protected static final int PHILOX_BUFFER_SIZE = 4;
+
+        /** Counter 0. */
+        protected long counter0;
+        /** Counter 1. */
+        protected long counter1;
+        /** Counter 2. */
+        protected long counter2;
+        /** Counter 3. */
+        protected long counter3;
+        /** Output buffer. */
+        protected final long[] buffer = new long[PHILOX_BUFFER_SIZE];
+        /** Key low bits. */
+        protected long key0;
+        /** Key high bits. */
+        protected long key1;
+        /** Output buffer index. When at the end of the buffer the counter is
+         * incremented and the buffer regenerated. */
+        protected int bufferPosition;
+
+        /**
+         * Creates a new instance given 6 long numbers containing, key (first 
two longs) and
+         * the counter (next 4 longs, low bits = first long). The counter is 
not scrambled and may
+         * be used to create contiguous blocks with size a multiple of 4 
longs. For example,
+         * setting seed[2] = 1 is equivalent to start with seed[2]=0 and 
calling {@link #next()} 4 times.
+         *
+         * @param seed Array of size 6 defining 
key0,key1,counter0,counter1,counter2,counter3.
+         *             If the size is smaller, zero values are assumed.
+         */
+        Philox4x64Original(long[] seed) {
+            final long[] input = seed.length < 6 ? Arrays.copyOf(seed, 6) : 
seed;
+            key0 = input[0];
+            key1 = input[1];
+            counter0 = input[2];
+            counter1 = input[3];
+            counter2 = input[4];
+            counter3 = input[5];
+            bufferPosition = PHILOX_BUFFER_SIZE;
+        }
+
+        /** {@inheritDoc} */
+        @Override
+        public long nextLong() {
+            final int p = bufferPosition;
+            if (p < PHILOX_BUFFER_SIZE) {
+                bufferPosition = p + 1;
+                return buffer[p];
+            }
+            incrementCounter();
+            rand10();
+            bufferPosition = 1;
+            return buffer[0];
+        }
+
+        /**
+         * Increment the counter by one.
+         */
+        protected void incrementCounter() {
+            if (++counter0 != 0) {
+                return;
+            }
+            if (++counter1 != 0) {
+                return;
+            }
+            if (++counter2 != 0) {
+                return;
+            }
+            ++counter3;
+        }
+
+        /**
+         * Perform 10 rounds, using counter0, counter1, counter2, counter3 as 
starting point.
+         * It updates the buffer member variable, but no others.
+         */
+        protected void rand10() {
+            buffer[0] = counter0;
+            buffer[1] = counter1;
+            buffer[2] = counter2;
+            buffer[3] = counter3;
+
+            long k0 = key0;
+            long k1 = key1;
+
+            //unrolled loop for performance
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+        }
+
+
+        /**
+         * Performs a single round of philox.
+         *
+         * @param counter Counter, which will be updated after each call.
+         * @param key0 Key low bits.
+         * @param key1 Key high bits.
+         */
+        private static void singleRound(long[] counter, long key0, long key1) {
+            final long lo0 = PHILOX_M0 * counter[0];
+            final long hi0 = 
UnsignedMultiplyHighSource.unsignedMultiplyHigh(PHILOX_M0, counter[0]);
+            final long lo1 = PHILOX_M1 * counter[2];
+            final long hi1 = 
UnsignedMultiplyHighSource.unsignedMultiplyHigh(PHILOX_M1, counter[2]);
+
+            counter[0] = hi1 ^ counter[1] ^ key0;
+            counter[1] = lo1;
+            counter[2] = hi0 ^ counter[3] ^ key1;
+            counter[3] = lo0;
+        }
+    }
+
+    /**
+     * Updated to use a MethodHandle to Math.multiplyHigh (UMH) (Java 9+).
+     * If not available then revert to a default implementation.
+     */
+    static final class Philox4x64MH extends Philox4x64Original {
+        /**
+         * Creates a new instance.
+         *
+         * @param seed Seed.
+         */
+        Philox4x64MH(long[] seed) {
+            super(seed);
+        }
+
+        @Override
+        protected void rand10() {
+            buffer[0] = counter0;
+            buffer[1] = counter1;
+            buffer[2] = counter2;
+            buffer[3] = counter3;
+
+            long k0 = key0;
+            long k1 = key1;
+
+            //unrolled loop for performance
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+        }
+
+
+        /**
+         * Performs a single round of philox.
+         *
+         * @param counter Counter, which will be updated after each call.
+         * @param key0 Key low bits.
+         * @param key1 Key high bits.
+         */
+        private static void singleRound(long[] counter, long key0, long key1) {
+            final long lo0 = PHILOX_M0 * counter[0];
+            final long hi0 = 
UnsignedMultiplyHighSource.mhMultiplyHigh(PHILOX_M0, counter[0]);
+            final long lo1 = PHILOX_M1 * counter[2];
+            final long hi1 = 
UnsignedMultiplyHighSource.mhMultiplyHigh(PHILOX_M1, counter[2]);
+
+            counter[0] = hi1 ^ counter[1] ^ key0;
+            counter[1] = lo1;
+            counter[2] = hi0 ^ counter[3] ^ key1;
+            counter[3] = lo0;
+        }
+    }
+
+    /**
+     * Updated to use a MethodHandle to Math.unsignedMultiplyHigh (UMH) (Java 
18+).
+     * If not available then revert to a default implementation.
+     */
+    static final class Philox4x64UMH extends Philox4x64Original {
+        /**
+         * Creates a new instance.
+         *
+         * @param seed Seed.
+         */
+        Philox4x64UMH(long[] seed) {
+            super(seed);
+        }
+
+        @Override
+        protected void rand10() {
+            buffer[0] = counter0;
+            buffer[1] = counter1;
+            buffer[2] = counter2;
+            buffer[3] = counter3;
+
+            long k0 = key0;
+            long k1 = key1;
+
+            //unrolled loop for performance
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+            k0 += PHILOX_W0;
+            k1 += PHILOX_W1;
+            singleRound(buffer, k0, k1);
+        }
+
+
+        /**
+         * Performs a single round of philox.
+         *
+         * @param counter Counter, which will be updated after each call.
+         * @param key0 Key low bits.
+         * @param key1 Key high bits.
+         */
+        private static void singleRound(long[] counter, long key0, long key1) {
+            final long lo0 = PHILOX_M0 * counter[0];
+            final long hi0 = 
UnsignedMultiplyHighSource.mhUnsignedMultiplyHigh(PHILOX_M0, counter[0]);
+            final long lo1 = PHILOX_M1 * counter[2];
+            final long hi1 = 
UnsignedMultiplyHighSource.mhUnsignedMultiplyHigh(PHILOX_M1, counter[2]);
+
+            counter[0] = hi1 ^ counter[1] ^ key0;
+            counter[1] = lo1;
+            counter[2] = hi0 ^ counter[3] ^ key1;
+            counter[3] = lo0;
+        }
+    }
+
+    /**
+     * Baseline for a JMH method call returning an {@code int}.
+     *
+     * @return the value
+     */
+    @Benchmark
+    public int baselineInt() {
+        return intValue;
+    }
+
+    /**
+     * Baseline for a JMH method call returning a {@code long}.
+     *
+     * @return the value
+     */
+    @Benchmark
+    public long baselineLong() {
+        return longValue;
+    }
+
+    /**
+     * Exercise the {@link UniformRandomProvider#nextInt()} method.
+     *
+     * @param sources Source of randomness.
+     * @return the int
+     */
+    @Benchmark
+    public int nextInt(Sources sources) {
+        return sources.getGenerator().nextInt();
+    }
+
+    /**
+     * Exercise the {@link UniformRandomProvider#nextLong()} method.
+     *
+     * @param sources Source of randomness.
+     * @return the long
+     */
+    @Benchmark
+    public long nextLong(Sources sources) {
+        return sources.getGenerator().nextLong();
+    }
+}
diff --git 
a/commons-rng-examples/examples-jmh/src/test/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmarkTest.java
 
b/commons-rng-examples/examples-jmh/src/test/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmarkTest.java
index 5bc314b4..e8dbd41b 100644
--- 
a/commons-rng-examples/examples-jmh/src/test/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmarkTest.java
+++ 
b/commons-rng-examples/examples-jmh/src/test/java/org/apache/commons/rng/examples/jmh/core/LXMBenchmarkTest.java
@@ -80,8 +80,7 @@ class LXMBenchmarkTest {
     }
 
     @Test
-    // Note: JAVA_18 is not in the enum
-    @EnabledForJreRange(min = JRE.OTHER)
+    @EnabledForJreRange(min = JRE.JAVA_18)
     void testMathUnsignedMultiplyHigh() {
         try {
             
assertUnsignedMultiply(UnsignedMultiplyHighSource::mathUnsignedMultiplyHigh, 
null);
@@ -101,6 +100,16 @@ class LXMBenchmarkTest {
         assertUnsignedMultiply((a, b) -> 
UnsignedMultiplyHighSource.unsignedMultiplyHigh(a, b, lo), lo);
     }
 
+    @Test
+    void testMhMultiplyHigh() {
+        assertUnsignedMultiply(UnsignedMultiplyHighSource::mhMultiplyHigh, 
null);
+    }
+
+    @Test
+    void testMhUnsignedMultiplyHigh() {
+        
assertUnsignedMultiply(UnsignedMultiplyHighSource::mhUnsignedMultiplyHigh, 
null);
+    }
+
     private static void assertUnsignedMultiply(LongLongFunction fun, long[] 
lo) {
         final UniformRandomProvider rng = RandomSource.JSF_64.create();
         for (int i = 0; i < 100; i++) {


Reply via email to