This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-lang.git
The following commit(s) were added to refs/heads/master by this push:
new c2260f094 [LANG-1772] Restrict size of cache to prevent overflow
errors (#1379)
c2260f094 is described below
commit c2260f094d78c236bb8a2e3e153d422c7d7ad6bf
Author: James Winters <[email protected]>
AuthorDate: Sun May 25 07:53:37 2025 +1000
[LANG-1772] Restrict size of cache to prevent overflow errors (#1379)
* LANG-1772 restrict size of cache to prevent overflow errors
* LANG-1772 Adding a largeheap maven profile. Skipping the testHugeStrings
method in all other profiles.
* LANG-1772 Adding javadoc as per request. Also moving max cache size
inside the CachedRandomBits constructor - also checking if the padding produces
overflow. No longer using an arbitrary value but being more precise.
* LANG-1772 Suggestions from PR implemented
* LANG-1772 Using a long for the intermediate calculation and restricting
the result to MAX_INT/5, also restricting max cache length to MAX_INT/3, there
are now no opportunities for overflow. The test checks at the boundary
condition
* Close HTML tag
* LANG-1772 Introduced many constants, and attempted to add comprehensive
documentation around the nextBits method and the size allocation for the cache
* Update src/main/java/org/apache/commons/lang3/RandomStringUtils.java
You're right, should be outside the min
Co-authored-by: Piotr P. Karwasz <[email protected]>
---------
Co-authored-by: James Winters <[email protected]>
Co-authored-by: Gary Gregory <[email protected]>
Co-authored-by: Piotr P. Karwasz <[email protected]>
---
pom.xml | 14 ++++-
.../org/apache/commons/lang3/CachedRandomBits.java | 60 +++++++++++++++++-----
.../apache/commons/lang3/RandomStringUtils.java | 15 +++++-
.../commons/lang3/RandomStringUtilsTest.java | 13 +++++
4 files changed, 87 insertions(+), 15 deletions(-)
diff --git a/pom.xml b/pom.xml
index f7cae3ab1..5d320b70a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -110,7 +110,10 @@
</distributionManagement>
<properties>
- <argLine>-Xmx512m</argLine>
+ <heapSize>-Xmx512m</heapSize>
+ <extraArgs/>
+ <systemProperties/>
+ <argLine>${heapSize} ${extraArgs} ${systemProperties}</argLine>
<project.build.sourceEncoding>ISO-8859-1</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<!-- project.build.outputTimestamp is managed by Maven plugins, see
https://maven.apache.org/guides/mini/guide-reproducible-builds.html -->
@@ -460,7 +463,7 @@
<properties>
<!-- LANG-1265: allow tests to access private fields/methods of
java.base classes via reflection -->
<!-- LANG-1667: allow tests to access private fields/methods of
java.base/java.util such as ArrayList via reflection -->
- <argLine>-Xmx512m --add-opens java.base/java.lang.reflect=ALL-UNNAMED
--add-opens java.base/java.lang=ALL-UNNAMED --add-opens
java.base/java.util=ALL-UNNAMED --add-opens java.base/java.time=ALL-UNNAMED
--add-opens java.base/java.time.chrono=ALL-UNNAMED</argLine>
+ <extraArgs>--add-opens java.base/java.lang.reflect=ALL-UNNAMED
--add-opens java.base/java.lang=ALL-UNNAMED --add-opens
java.base/java.util=ALL-UNNAMED --add-opens java.base/java.time=ALL-UNNAMED
--add-opens java.base/java.time.chrono=ALL-UNNAMED</extraArgs>
</properties>
</profile>
<profile>
@@ -522,6 +525,13 @@
</plugins>
</build>
</profile>
+ <profile>
+ <id>largeheap</id>
+ <properties>
+ <heapSize>-Xmx1024m</heapSize>
+ <systemProperties>-Dtest.large.heap=true</systemProperties>
+ </properties>
+ </profile>
</profiles>
<developers>
<developer>
diff --git a/src/main/java/org/apache/commons/lang3/CachedRandomBits.java
b/src/main/java/org/apache/commons/lang3/CachedRandomBits.java
index 92b561f38..5b90a0ecb 100644
--- a/src/main/java/org/apache/commons/lang3/CachedRandomBits.java
+++ b/src/main/java/org/apache/commons/lang3/CachedRandomBits.java
@@ -52,6 +52,20 @@ final class CachedRandomBits {
*/
private int bitIndex;
+ /**
+ * The maximum size of the cache.
+ *
+ * <p>
+ * This is to prevent the possibility of overflow in the {@code if
(bitIndex >> 3 >= cache.length)} in the {@link #nextBits(int)} method.
+ * </p>
+ */
+ private static final int MAX_CACHE_SIZE = Integer.MAX_VALUE >> 3;
+ /** Maximum number of bits that can be generated (size of an int) */
+ private static final int MAX_BITS = 32;
+ /** Mask to extract the bit offset within a byte (0-7) */
+ private static final int BIT_INDEX_MASK = 0x7;
+ /** Number of bits in a byte */
+ private static final int BITS_PER_BYTE = 8;
/**
* Creates a new instance.
*
@@ -62,7 +76,7 @@ final class CachedRandomBits {
if (cacheSize <= 0) {
throw new IllegalArgumentException("cacheSize must be positive");
}
- this.cache = new byte[cacheSize];
+ this.cache = cacheSize <= MAX_CACHE_SIZE ? new byte[cacheSize] : new
byte[MAX_CACHE_SIZE];
this.random = Objects.requireNonNull(random, "random");
this.random.nextBytes(this.cache);
this.bitIndex = 0;
@@ -71,28 +85,50 @@ final class CachedRandomBits {
/**
* Generates a random integer with the specified number of bits.
*
- * @param bits number of bits to generate, MUST be between 1 and 32
- * @return random integer with {@code bits} bits
+ * <p>This method efficiently generates random bits by using a byte cache
and bit manipulation:
+ * <ul>
+ * <li>Uses a byte array cache to avoid frequent calls to the underlying
random number generator</li>
+ * <li>Extracts bits from each byte using bit shifting and masking</li>
+ * <li>Handles partial bytes to avoid wasting random bits</li>
+ * <li>Accumulates bits until the requested number is reached</li>
+ * </ul>
+ * </p>
+ *
+ * @param bits number of bits to generate, MUST be between 1 and 32
(inclusive)
+ * @return random integer containing exactly the requested number of
random bits
+ * @throws IllegalArgumentException if bits is not between 1 and 32
*/
public int nextBits(final int bits) {
- if (bits > 32 || bits <= 0) {
- throw new IllegalArgumentException("number of bits must be between
1 and 32");
+ if (bits > MAX_BITS || bits <= 0) {
+ throw new IllegalArgumentException("number of bits must be between
1 and " + MAX_BITS);
}
int result = 0;
int generatedBits = 0; // number of generated bits up to now
while (generatedBits < bits) {
+ // Check if we need to refill the cache
+ // Convert bitIndex to byte index by dividing by 8 (right shift by
3)
if (bitIndex >> 3 >= cache.length) {
- // we exhausted the number of bits in the cache
- // this should only happen if the bitIndex is exactly matching
the cache length
- assert bitIndex == cache.length * 8;
+ // We exhausted the number of bits in the cache
+ // This should only happen if the bitIndex is exactly matching
the cache length
+ assert bitIndex == cache.length * BITS_PER_BYTE;
random.nextBytes(cache);
bitIndex = 0;
}
- // generatedBitsInIteration is the number of bits that we will
generate
- // in this iteration of the while loop
- final int generatedBitsInIteration = Math.min(8 - (bitIndex &
0x7), bits - generatedBits);
+ // Calculate how many bits we can extract from the current byte
+ // 1. Get current position within byte (0-7) using bitIndex & 0x7
+ // 2. Calculate remaining bits in byte: 8 - (position within byte)
+ // 3. Take minimum of remaining bits in byte and bits still needed
+ final int generatedBitsInIteration = Math.min(
+ BITS_PER_BYTE - (bitIndex & BIT_INDEX_MASK),
+ bits - generatedBits);
+ // Shift existing result left to make room for new bits
result = result << generatedBitsInIteration;
- result |= cache[bitIndex >> 3] >> (bitIndex & 0x7) & (1 <<
generatedBitsInIteration) - 1;
+ // Extract and append new bits:
+ // 1. Get byte from cache (bitIndex >> 3 converts bit index to
byte index)
+ // 2. Shift right by bit position within byte (bitIndex & 0x7)
+ // 3. Mask to keep only the bits we want ((1 <<
generatedBitsInIteration) - 1)
+ result |= cache[bitIndex >> 3] >> (bitIndex & BIT_INDEX_MASK) &
((1 << generatedBitsInIteration) - 1);
+ // Update counters
generatedBits += generatedBitsInIteration;
bitIndex += generatedBitsInIteration;
}
diff --git a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java
b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java
index a75c7e85d..6f415f679 100644
--- a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java
+++ b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java
@@ -98,6 +98,10 @@ public class RandomStringUtils {
private static final int ASCII_A = 'A';
private static final int ASCII_z = 'z';
+ private static final int CACHE_PADDING_BITS = 3;
+ private static final int BITS_TO_BYTES_DIVISOR = 5;
+ private static final int BASE_CACHE_SIZE_PADDING = 10;
+
/**
* Gets the singleton instance based on {@link
ThreadLocalRandom#current()}; <b>which is not cryptographically
* secure</b>; use {@link #secure()} to use an algorithms/providers
specified in the
@@ -329,7 +333,16 @@ public static String random(int count, int start, int end,
final boolean letters
// Ideally the cache size depends on multiple factor, including the
cost of generating x bytes
// of randomness as well as the probability of rejection. It is
however not easy to know
// those values programmatically for the general case.
- final CachedRandomBits arb = new CachedRandomBits((count * gapBits +
3) / 5 + 10, random);
+ // Calculate cache size:
+ // 1. Multiply count by bits needed per character (gapBits)
+ // 2. Add padding bits (3) to handle partial bytes
+ // 3. Divide by 5 to convert to bytes (normally this would be by 8,
dividing by 5 allows for about 60% extra space)
+ // 4. Add base padding (10) to handle small counts efficiently
+ // 5. Ensure we don't exceed Integer.MAX_VALUE / 5 + 10 to provide a
good balance between overflow prevention and
+ // making the cache extremely large
+ final long desiredCacheSize = ((long) count * gapBits +
CACHE_PADDING_BITS) / BITS_TO_BYTES_DIVISOR + BASE_CACHE_SIZE_PADDING;
+ final int cacheSize = (int) Math.min(desiredCacheSize,
Integer.MAX_VALUE / BITS_TO_BYTES_DIVISOR + BASE_CACHE_SIZE_PADDING);
+ final CachedRandomBits arb = new CachedRandomBits(cacheSize, random);
while (count-- != 0) {
// Generate a random value between start (included) and end
(excluded)
diff --git a/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java
b/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java
index 186358e09..0281ceece 100644
--- a/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java
+++ b/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java
@@ -29,8 +29,10 @@
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
/**
* Tests {@link RandomStringUtils}.
@@ -38,6 +40,9 @@
public class RandomStringUtilsTest extends AbstractLangTest {
private static final int LOOP_COUNT = 1_000;
+ /** Maximum safe value for count to avoid overflow: (21x + 3) / 5 + 10 <
0x0FFF_FFFF */
+ private static final int MAX_SAFE_COUNT = 63_913_201;
+
static Stream<RandomStringUtils> randomProvider() {
return Stream.of(RandomStringUtils.secure(),
RandomStringUtils.secureStrong(), RandomStringUtils.insecure());
@@ -802,4 +807,12 @@ public void testRandomWithChars(final RandomStringUtils
rsu) {
assertNotEquals(r1, r3);
assertNotEquals(r2, r3);
}
+
+ @ParameterizedTest
+ @ValueSource(ints = {MAX_SAFE_COUNT, MAX_SAFE_COUNT + 1})
+ @EnabledIfSystemProperty(named = "test.large.heap", matches = "true")
+ public void testHugeStrings(final int expectedLength) {
+ final String hugeString = RandomStringUtils.random(expectedLength);
+ assertEquals(expectedLength, hugeString.length(), "hugeString.length()
== expectedLength");
+ }
}