Skip to content

Commit ffd97e9

Browse files
More vectorization of Shishua, .NET 8, use standard intrinsics API (#1)
1 parent 1be51d3 commit ffd97e9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+298
-7825
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
- name: Setup .NET Core
1919
uses: actions/setup-dotnet@v1
2020
with:
21-
dotnet-version: '7.0.x'
21+
dotnet-version: '8.0.100-preview.5.23303.2'
2222

2323
- name: Install dependencies
2424
run: dotnet restore

.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"dotnet.defaultSolution": "Fast.PRNGs.sln"
3+
}

Fast.PRNGs.sln

Lines changed: 15 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,58 +7,30 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{B56AF188-D99
77
EndProject
88
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{82A9760F-251B-4220-9263-153755FA2EC3}"
99
EndProject
10-
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "External", "External", "{12A7C294-6EF5-4FDF-A2BA-A01E320B9C36}"
11-
ProjectSection(SolutionItems) = preProject
12-
src\External\Directory.Build.props = src\External\Directory.Build.props
13-
EndProjectSection
14-
EndProject
15-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RawIntrinsics", "src\External\RawIntrinsics\RawIntrinsics.csproj", "{BA5145CD-6180-4BA3-817F-197158280327}"
16-
EndProject
17-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RawIntrinsicsGenerator", "src\External\RawIntrinsicsGenerator\RawIntrinsicsGenerator.csproj", "{A161A378-55BF-48D2-84FF-DA3F09EA5258}"
18-
EndProject
1910
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "_files", "_files", "{3D9E2A5B-D3F0-49AB-BEC3-647C5063537C}"
2011
ProjectSection(SolutionItems) = preProject
21-
Directory.Build.props = Directory.Build.props
22-
global.json = global.json
23-
Fast.PRNGs.sln = Fast.PRNGs.sln
2412
.editorconfig = .editorconfig
25-
.gitignore = .gitignore
2613
.gitattributes = .gitattributes
14+
.gitignore = .gitignore
15+
Directory.Build.props = Directory.Build.props
16+
Fast.PRNGs.sln = Fast.PRNGs.sln
17+
global.json = global.json
2718
EndProjectSection
2819
EndProject
29-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Fast.PRNGs", "src\Fast.PRNGs\Fast.PRNGs.csproj", "{AE271FFA-B5D2-40D8-92E4-71D970142F6D}"
20+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Fast.PRNGs", "src\Fast.PRNGs\Fast.PRNGs.csproj", "{AE271FFA-B5D2-40D8-92E4-71D970142F6D}"
3021
EndProject
31-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Fast.PRNGs.Tests", "test\Fast.PRNGs.Tests\Fast.PRNGs.Tests.csproj", "{732E59B8-C209-495B-8608-77E746A68F22}"
22+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Fast.PRNGs.Tests", "test\Fast.PRNGs.Tests\Fast.PRNGs.Tests.csproj", "{732E59B8-C209-495B-8608-77E746A68F22}"
3223
EndProject
3324
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "benchmark", "benchmark", "{089CE6DA-C860-48D3-95D2-353C7A71C9CD}"
3425
EndProject
35-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Fast.PRNGs.Benchmarks", "benchmark\Fast.PRNGs.Benchmarks\Fast.PRNGs.Benchmarks.csproj", "{2A875B02-B84C-43A3-BF16-593F5E6276BC}"
26+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Fast.PRNGs.Benchmarks", "benchmark\Fast.PRNGs.Benchmarks\Fast.PRNGs.Benchmarks.csproj", "{2A875B02-B84C-43A3-BF16-593F5E6276BC}"
3627
EndProject
3728
Global
3829
GlobalSection(SolutionConfigurationPlatforms) = preSolution
3930
Debug|Any CPU = Debug|Any CPU
4031
Release|Any CPU = Release|Any CPU
4132
EndGlobalSection
42-
GlobalSection(SolutionProperties) = preSolution
43-
HideSolutionNode = FALSE
44-
EndGlobalSection
45-
GlobalSection(NestedProjects) = preSolution
46-
{12A7C294-6EF5-4FDF-A2BA-A01E320B9C36} = {B56AF188-D999-4444-AE68-4971A573FAA4}
47-
{BA5145CD-6180-4BA3-817F-197158280327} = {12A7C294-6EF5-4FDF-A2BA-A01E320B9C36}
48-
{A161A378-55BF-48D2-84FF-DA3F09EA5258} = {12A7C294-6EF5-4FDF-A2BA-A01E320B9C36}
49-
{AE271FFA-B5D2-40D8-92E4-71D970142F6D} = {B56AF188-D999-4444-AE68-4971A573FAA4}
50-
{732E59B8-C209-495B-8608-77E746A68F22} = {82A9760F-251B-4220-9263-153755FA2EC3}
51-
{2A875B02-B84C-43A3-BF16-593F5E6276BC} = {089CE6DA-C860-48D3-95D2-353C7A71C9CD}
52-
EndGlobalSection
5333
GlobalSection(ProjectConfigurationPlatforms) = postSolution
54-
{BA5145CD-6180-4BA3-817F-197158280327}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
55-
{BA5145CD-6180-4BA3-817F-197158280327}.Debug|Any CPU.Build.0 = Debug|Any CPU
56-
{BA5145CD-6180-4BA3-817F-197158280327}.Release|Any CPU.ActiveCfg = Release|Any CPU
57-
{BA5145CD-6180-4BA3-817F-197158280327}.Release|Any CPU.Build.0 = Release|Any CPU
58-
{A161A378-55BF-48D2-84FF-DA3F09EA5258}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
59-
{A161A378-55BF-48D2-84FF-DA3F09EA5258}.Debug|Any CPU.Build.0 = Debug|Any CPU
60-
{A161A378-55BF-48D2-84FF-DA3F09EA5258}.Release|Any CPU.ActiveCfg = Release|Any CPU
61-
{A161A378-55BF-48D2-84FF-DA3F09EA5258}.Release|Any CPU.Build.0 = Release|Any CPU
6234
{AE271FFA-B5D2-40D8-92E4-71D970142F6D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
6335
{AE271FFA-B5D2-40D8-92E4-71D970142F6D}.Debug|Any CPU.Build.0 = Debug|Any CPU
6436
{AE271FFA-B5D2-40D8-92E4-71D970142F6D}.Release|Any CPU.ActiveCfg = Release|Any CPU
@@ -72,4 +44,12 @@ Global
7244
{2A875B02-B84C-43A3-BF16-593F5E6276BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
7345
{2A875B02-B84C-43A3-BF16-593F5E6276BC}.Release|Any CPU.Build.0 = Release|Any CPU
7446
EndGlobalSection
47+
GlobalSection(SolutionProperties) = preSolution
48+
HideSolutionNode = FALSE
49+
EndGlobalSection
50+
GlobalSection(NestedProjects) = preSolution
51+
{AE271FFA-B5D2-40D8-92E4-71D970142F6D} = {B56AF188-D999-4444-AE68-4971A573FAA4}
52+
{732E59B8-C209-495B-8608-77E746A68F22} = {82A9760F-251B-4220-9263-153755FA2EC3}
53+
{2A875B02-B84C-43A3-BF16-593F5E6276BC} = {089CE6DA-C860-48D3-95D2-353C7A71C9CD}
54+
EndGlobalSection
7555
EndGlobal

README.md

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,5 @@ NOTE - MWC256 is likely poorly implemented (it is supposed to be faster). As see
3838
This is clear from the generated assembly atm but I'm not sure why those branching instructions are generated. `UInt128` support is pretty new
3939
so maybe there are some inefficiencies there.
4040

41-
![Scaling iterations](/img/perf-scaling.png "Scaling iterations")
42-
43-
#### With hardware counters
44-
45-
Instrumented with more diagnostics, including hardware counters
46-
47-
![With hardware counters](/img/perf-hardwarecounters.png "With hardware counters")
41+
![Scaling iterations](/img/perf-scaling-2.png "Scaling iterations")
4842

benchmark/Fast.PRNGs.Benchmarks/Fast.PRNGs.Benchmarks.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<PropertyGroup>
33
<OutputType>Exe</OutputType>
44
<LangVersion>latest</LangVersion>
5-
<TargetFramework>net7.0</TargetFramework>
5+
<TargetFramework>net8.0</TargetFramework>
66
<Nullable>disable</Nullable>
77
<DebugType>pdbonly</DebugType>
88
<DebugSymbols>true</DebugSymbols>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
namespace Fast.PRNGs.Benchmarks.Internals;
2+
3+
[ConfigSource]
4+
public class ToDoublesBenchmark
5+
{
6+
internal const ulong DoubleMask = (1L << 53) - 1;
7+
internal const double Norm53 = 1.0d / (1L << 53);
8+
9+
[Params(31512512431231UL)]
10+
public ulong Value { get; set; }
11+
12+
[Benchmark]
13+
public double Original()
14+
{
15+
return (Value & DoubleMask) * Norm53;
16+
}
17+
18+
[Benchmark]
19+
public double New()
20+
{
21+
return (Value >> 11) * (1.0 / (1ul << 53));
22+
}
23+
24+
private class ConfigSourceAttribute : Attribute, IConfigSource
25+
{
26+
public IConfig Config { get; }
27+
28+
public ConfigSourceAttribute() => Config = new SimpleBenchConfig(8);
29+
}
30+
}

benchmark/Fast.PRNGs.Benchmarks/PRNGsScaling.cs

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,31 @@
1+
using BenchmarkDotNet.Environments;
2+
using System.Runtime.Intrinsics;
3+
14
namespace Fast.PRNGs.Benchmarks;
25

3-
[Config(typeof(Config))]
6+
[ConfigSource]
47
public class PRNGsScaling
58
{
9+
private const int _iterations = 1 << 17;
10+
611
private Random _random;
7-
private Shishua _shishua;
12+
private Shishua _shishuaSeq;
13+
private Shishua _shishuaVec256;
14+
private Shishua _shishuaVec512;
815
private Xoroshiro128Plus _xoroshiro128plus;
916
private Xoshiro256Plus _xoshiro256plus;
1017
private MWC256 _mwc256;
1118

12-
[Params(100_000, 1_000_000)]
19+
[Params(_iterations)]
1320
public int Iterations { get; set; }
1421

1522
[GlobalSetup]
1623
public void Setup()
1724
{
1825
_random = new Random();
19-
_shishua = Shishua.Create();
26+
_shishuaSeq = Shishua.Create();
27+
_shishuaVec256 = Shishua.Create();
28+
_shishuaVec512 = Shishua.Create();
2029
_xoroshiro128plus = Xoroshiro128Plus.Create();
2130
_xoshiro256plus = Xoshiro256Plus.Create();
2231
_mwc256 = MWC256.Create();
@@ -25,7 +34,9 @@ public void Setup()
2534
[GlobalCleanup]
2635
public void Cleanup()
2736
{
28-
_shishua.Dispose();
37+
_shishuaSeq.Dispose();
38+
_shishuaVec256.Dispose();
39+
_shishuaVec512.Dispose();
2940
}
3041

3142
[Benchmark(Baseline = true)]
@@ -38,10 +49,30 @@ public double SystemRandomGen()
3849
}
3950

4051
[Benchmark]
41-
public double ShishuaGen()
52+
public double ShishuaSeqGen()
4253
{
4354
for (int i = 0; i < Iterations; i++)
44-
_ = _shishua.NextDouble();
55+
_ = _shishuaSeq.NextDouble();
56+
57+
return default;
58+
}
59+
60+
[Benchmark]
61+
public double ShishuaVec256Gen()
62+
{
63+
Vector256<double> result = default;
64+
for (int i = 0; i < Iterations; i += 4)
65+
_shishuaVec256.NextDoubles256(ref result);
66+
67+
return default;
68+
}
69+
70+
[Benchmark]
71+
public double ShishuaVec512Gen()
72+
{
73+
Vector512<double> result = default;
74+
for (int i = 0; i < Iterations; i += 8)
75+
_shishuaVec512.NextDoubles512(ref result);
4576

4677
return default;
4778
}
@@ -73,13 +104,10 @@ public double MWC256Gen()
73104
return default;
74105
}
75106

76-
private sealed class Config : ManualConfig
107+
private class ConfigSourceAttribute : Attribute, IConfigSource
77108
{
78-
public Config()
79-
{
80-
this.SummaryStyle = SummaryStyle.Default.WithRatioStyle(RatioStyle.Trend);
81-
this.AddColumn(RankColumn.Arabic);
82-
this.Orderer = new DefaultOrderer(SummaryOrderPolicy.SlowestToFastest, MethodOrderPolicy.Declared);
83-
}
109+
public IConfig Config { get; }
110+
111+
public ConfigSourceAttribute() => Config = new SimpleBenchConfig(_iterations * sizeof(double));
84112
}
85113
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
namespace Fast.PRNGs.Benchmarks;
2+
3+
internal sealed class SimpleBenchConfig : ManualConfig
4+
{
5+
public SimpleBenchConfig(ulong? byteSizePerIteration = null)
6+
{
7+
this.SummaryStyle = SummaryStyle.Default.WithRatioStyle(RatioStyle.Trend);
8+
this.AddColumn(RankColumn.Arabic);
9+
this.Orderer = new DefaultOrderer(SummaryOrderPolicy.SlowestToFastest, MethodOrderPolicy.Declared);
10+
if (byteSizePerIteration != null)
11+
this.AddColumn(new ThroughputColumn(byteSizePerIteration.Value));
12+
}
13+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
using BenchmarkDotNet.Running;
2+
3+
namespace Fast.PRNGs.Benchmarks;
4+
5+
public class ThroughputColumn : IColumn
6+
{
7+
public string Id { get; }
8+
9+
public string ColumnName { get; }
10+
11+
private readonly ulong _byteSizePerIteration;
12+
13+
public ThroughputColumn(ulong byteSizePerIteration)
14+
{
15+
ColumnName = "Throughput";
16+
Id = nameof(TagColumn) + "." + ColumnName;
17+
18+
_byteSizePerIteration = byteSizePerIteration;
19+
}
20+
21+
public bool IsDefault(Summary summary, BenchmarkCase benchmarkCase) => false;
22+
public string GetValue(Summary summary, BenchmarkCase benchmarkCase)
23+
{
24+
var stats = summary[benchmarkCase].ResultStatistics;
25+
if (stats is null || stats.Mean == default || double.IsNaN(stats.Mean))
26+
return "?";
27+
28+
var gbs = (_byteSizePerIteration / 1e9d) / (stats.Mean / 1e9d);
29+
return $"{gbs:0.00} GB/s";
30+
}
31+
32+
public bool IsAvailable(Summary summary) => true;
33+
public bool AlwaysShow => true;
34+
public ColumnCategory Category => ColumnCategory.Metric;
35+
public int PriorityInCategory => 0;
36+
public bool IsNumeric => true;
37+
public UnitType UnitType => UnitType.Size;
38+
public string Legend => $"Throughput in GB/s";
39+
public string GetValue(Summary summary, BenchmarkCase benchmarkCase, SummaryStyle style) => GetValue(summary, benchmarkCase);
40+
public override string ToString() => ColumnName;
41+
}

global.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"sdk": {
3-
"version": "7.0.201",
4-
"rollForward": "latestFeature"
3+
"version": "8.0.100-preview.5.23303.2",
4+
"rollForward": "latestFeature",
5+
"allowPrerelease": true
56
}
6-
}
7+
}

0 commit comments

Comments
 (0)