Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
// MODULES //

var bench = require( '@stdlib/bench' );
var minstd = require( '@stdlib/random/base/minstd' );
var discreteUniform = require( '@stdlib/random/array/discrete-uniform' );
var isnan = require( '@stdlib/math/base/assert/is-nan' );
var format = require( '@stdlib/string/format' );
var pkg = require( './../package.json' ).name;
Expand All @@ -33,12 +33,17 @@ var umuldw = require( './../lib' );
bench( pkg, function benchmark( b ) {
var x;
var y;
var z;
var i;

x = discreteUniform( 100, 0x10000, 0x10000000, {
'dtype': 'uint32'
});

b.tic();
for ( i = 0; i < b.iterations; i++ ) {
x = minstd();
y = umuldw( x, x );
z = x[ i%x.length ];
y = umuldw( z, z );
if ( isnan( y[0] ) ) {
b.fail( 'should not return NaN' );
}
Expand All @@ -55,14 +60,19 @@ bench( format( '%s:assign', pkg ), function benchmark( b ) {
var out;
var x;
var y;
var z;
var i;

out = [ 0.0, 0.0];
x = discreteUniform( 100, 0x10000, 0x10000000, {
'dtype': 'uint32'
});

out = [ 0.0, 0.0 ];

b.tic();
for ( i = 0; i < b.iterations; i++ ) {
x = minstd();
y = umuldw.assign( x, x, out, 1, 0 );
z = x[ i%x.length ];
y = umuldw.assign( z, z, out, 1, 0 );
if ( isnan( y[0] ) ) {
b.fail( 'should not return NaN' );
}
Expand Down
11 changes: 2 additions & 9 deletions lib/node_modules/@stdlib/number/uint32/base/muldw/lib/assign.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

// MODULES //

var isnan = require( '@stdlib/math/base/assert/is-nan' );
var umul = require( '@stdlib/number/uint32/base/mul' );


// VARIABLES //
Expand Down Expand Up @@ -49,19 +49,13 @@ var LOW_WORD_MASK = 0x0000ffff>>>0; // asm type annotation
function umuldw(a, b, out, stride, offset ) {
var w1;
var w2;
var w3;
var ha;
var hb;
var la;
var lb;
var t;
var k;

if ( isnan( a ) || isnan( b ) ) {
out[ offset ] = NaN;
out[ offset + stride ] = NaN;
return out;
}
a >>>= 0; // asm type annotation
b >>>= 0; // asm type annotation

Expand All @@ -72,7 +66,6 @@ function umuldw(a, b, out, stride, offset ) {
lb = ( b & LOW_WORD_MASK ) >>> 0;

t = ( la*lb ) >>> 0;
w3 = ( t & LOW_WORD_MASK ) >>> 0;
k = ( t >>> 16 ) >>> 0;

t = ( ( ha*lb ) + k ) >>> 0;
Expand All @@ -83,7 +76,7 @@ function umuldw(a, b, out, stride, offset ) {
k = ( t >>> 16 ) >>> 0;

out[ offset ] = ( ( ha*hb ) + w1 + k ) >>> 0; // compute the higher 32 bits and cast to an unsigned 32-bit integer
out[ offset + stride ] = ( ( t << 16 ) + w3) >>> 0; // compute the lower 32 bits and cast to an unsigned 32-bit integer
out[ offset + stride ] = umul( a, b ) >>> 0; // compute the lower 32 bits and cast to an unsigned 32-bit integer
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@impawstarlight I am a bit dense, but how does this manage to produce the same result? Previously, the logic for computing the lower 32 bits doesn't exceed the max uint32, but, here, a*b could, resulting in wraparound, which is a bit counterintuitive to me that it achieves the same result.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ultimately, this boils down to a call to imul, but not obvious to me why imul is faster than a bit shift plus addition.

Copy link
Copy Markdown
Contributor Author

@impawstarlight impawstarlight Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, the logic for computing the lower 32 bits doesn't exceed the max uint32, but, here, a*b could, resulting in wraparound, which is a bit counterintuitive to me that it achieves the same result.

Actually, the way it avoids overflow is a clever engineering trick for extracting those bits that would normally overflow outside the lower 32 bits. This is done because these overflow bits contribute to the higher 32 bits and hence necessary for that calculation.

But for the lower 32 bits, we could very well make do with allowing overflow if we didn't have to calculate the higher 32 bits, like here in our imul polyfill.

Ultimately, it is fully equivalent to imul because of what its purpose is - calculate the low 32-bit of a 32x32 mult - which is basically the definition of imul.

So the wrap around behavour of imul is also happening in the shift-add approach, just not very obvious because they are handled through the 16-bit splitting logic while eliminating any intermediate overflow.

Copy link
Copy Markdown
Contributor Author

@impawstarlight impawstarlight Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ultimately, this boils down to a call to imul, but not obvious to me why imul is faster than a bit shift plus addition.

imul is probably faster here because otherwise we were doing 3 operations before:

w3 = ( t & LOW_WORD_MASK ) >>> 0;
...
out[ offset + stride ] = ( ( t << 16 ) + w3) >>> 0;

So we're comparing AND + SHIFT + ADD vs IMUL. Although individual add and bitwise instructions are very fast, the combination is probably slower than a single IMUL instruction because of various other factors like intermediate moving around around between registers. Just my guess, but the benchmark approves.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, sounds good.


return out;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
// MODULES //

var tape = require( 'tape' );
var isnan = require( '@stdlib/math/base/assert/is-nan' );
var Float64Array = require( '@stdlib/array/float64' );
var umuldw = require( './../lib/assign.js' );

Expand All @@ -39,31 +38,6 @@ tape( 'main export is a function', function test( t ) {
t.end();
});

tape( 'the function returns `NaN` if provided `NaN`', function test( t ) {
var out;
var v;

out = [ 0, 0 ];
v = umuldw( NaN, 1, out, 1, 0 );
t.strictEqual( v, out, 'returns output array' );
t.strictEqual( isnan( v[0] ), true, 'returns expected value' );
t.strictEqual( isnan( v[1] ), true, 'returns expected value' );

out = [ 0, 0 ];
v = umuldw( 1, NaN, out, 1, 0 );
t.strictEqual( v, out, 'returns output array' );
t.strictEqual( isnan( v[0] ), true, 'returns expected value' );
t.strictEqual( isnan( v[1] ), true, 'returns expected value' );

out = [ 0, 0 ];
v = umuldw( NaN, NaN, out, 1, 0 );
t.strictEqual( v, out, 'returns output array' );
t.strictEqual( isnan( v[0] ), true, 'returns expected value' );
t.strictEqual( isnan( v[1] ), true, 'returns expected value' );

t.end();
});

tape( 'the function computes the double word product of two (unsigned) words', function test( t ) {
var expected;
var actual;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
// MODULES //

var tape = require( 'tape' );
var isnan = require( '@stdlib/math/base/assert/is-nan' );
var umuldw = require( './../lib/main.js' );


Expand All @@ -38,24 +37,6 @@ tape( 'main export is a function', function test( t ) {
t.end();
});

tape( 'the function returns `NaN` if provided `NaN`', function test( t ) {
var v;

v = umuldw( NaN, 1 );
t.strictEqual( isnan( v[0] ), true, 'returns expected value' );
t.strictEqual( isnan( v[1] ), true, 'returns expected value' );

v = umuldw( 1, NaN );
t.strictEqual( isnan( v[0] ), true, 'returns expected value' );
t.strictEqual( isnan( v[1] ), true, 'returns expected value' );

v = umuldw( NaN, NaN );
t.strictEqual( isnan( v[0] ), true, 'returns expected value' );
t.strictEqual( isnan( v[1] ), true, 'returns expected value' );

t.end();
});

tape( 'the function computes the double word product of two (unsigned) words', function test( t ) {
var expected;
var actual;
Expand Down