@@ -24,6 +24,111 @@ inline int fastrand() {
2424#define def_fastrand
2525#endif
2626
27+ /* *
28+ * @brief Thread-local state for the xoshiro128++ PRNG.
29+ *
30+ * s[0..3] hold the internal 128-bit state. Keeping it thread_local guarantees
31+ * that each thread uses an independent sequence without synchronization.
32+ */
33+ static thread_local uint32_t s[4 ];
34+
35+ /* *
36+ * @brief Thread-local flag indicating whether the PRNG state has been seeded.
37+ *
38+ * Lazy initialization is used to seed the state on the first use per thread.
39+ */
40+ static thread_local uint8_t seeded = 0 ;
41+
42+ /* *
43+ * @brief Initialize the thread-local PRNG state.
44+ *
45+ * Seeds the 128-bit xoshiro state using a mix of the monotonic clock and the
46+ * calling thread identifier. A splitmix-like mixing function is applied to
47+ * produce well-dispersed bits. Ensures the state is not all zeros.
48+ *
49+ * Important:
50+ * - Uses CLOCK_MONOTONIC to reduce susceptibility to wall-clock changes.
51+ * - Not cryptographically secure. Do not use for security-sensitive code.
52+ */
53+ static void init_seed (void ) {
54+ struct timespec ts;
55+ clock_gettime (CLOCK_MONOTONIC, &ts);
56+
57+ uint64_t t = ((uint64_t )ts.tv_nsec ) ^ ((uint64_t )ts.tv_sec << 32 );
58+ uint64_t tid = (uintptr_t )pthread_self ();
59+
60+ // Simple mixing: XOR, shifts, multiplies
61+ uint64_t x = t ^ tid;
62+ x ^= x >> 33 ;
63+ x *= 0xff51afd7ed558ccdULL ;
64+ x ^= x >> 33 ;
65+ x *= 0xc4ceb9fe1a85ec53ULL ;
66+ x ^= x >> 33 ;
67+
68+ // Split into four 32-bit words
69+ s[0 ] = (uint32_t )x;
70+ s[1 ] = (uint32_t )(x >> 32 );
71+ s[2 ] = ~s[0 ]; // invert for extra diversity
72+ s[3 ] = ~s[1 ];
73+
74+ // avoid all-zero state
75+ if (!s[0 ] && !s[1 ] && !s[2 ] && !s[3 ])
76+ s[0 ] = 1 ;
77+
78+ seeded = 1 ;
79+ }
80+
81+ /* *
82+ * @brief Rotate left utility.
83+ *
84+ * @param x Value to rotate.
85+ * @param k Rotation amount in bits (0..31).
86+ * @return x rotated left by k bits.
87+ */
88+ static inline uint32_t rotl (uint32_t x, int k) {
89+ return (x << k) | (x >> (32 - k));
90+ }
91+
92+ /* *
93+ * @brief xoshiro128++ PRNG round function.
94+ *
95+ * This is the "++" variant: result = rotl(s0 + s3, 7) + s0.
96+ * It updates the internal state using xorshift operations and a rotation.
97+ * The algorithm is designed for speed and statistical quality.
98+ *
99+ * Thread safety:
100+ * - Uses thread-local state; no locks required.
101+ *
102+ * @return A 32-bit pseudo-random number.
103+ */
104+ static uint32_t xoshiro128_plus_plus (void ) {
105+ if (!seeded) init_seed ();
106+
107+ const uint32_t result = rotl (s[0 ] + s[3 ], 7 ) + s[0 ];
108+ const uint32_t t = s[1 ] << 9 ;
109+
110+ s[2 ] ^= s[0 ];
111+ s[3 ] ^= s[1 ];
112+ s[1 ] ^= s[2 ];
113+ s[0 ] ^= s[3 ];
114+ s[2 ] ^= t;
115+ s[3 ] = rotl (s[3 ], 11 );
116+
117+ return result;
118+ }
119+
120+ /* *
121+ * @brief Fast, non-cryptographic random number generator.
122+ *
123+ * Convenience wrapper over xoshiro128_plus_plus(). Returns a 32-bit
124+ * pseudo-random value. On first call per thread, the generator is seeded.
125+ *
126+ * @return A 32-bit pseudo-random number.
127+ */
128+ static inline uint32_t rand_fast () {
129+ return xoshiro128_plus_plus ();
130+ }
131+
27132class PtrArray {
28133 private:
29134 void expand (unsigned int more) {
0 commit comments