@@ -72,7 +72,7 @@ static void BM_LSTM(benchmark::State &state) {
7272 // FLOPS for LSTM: ignore activations, assume static S and B.
7373 // Eight matrix-matrix multiplications are combined into two
7474 // matrix-matrix multiplications: [B,I]x[I,4*H] and [B,H]x[H,4*H].
75- PERF_RECORD_FLOPS (
75+ perf_recordFlops (state,
7676 D * S * (4.0 * B * H * (2.0 * I - 1.0 ) + 4.0 * B * H * (2.0 * H - 1.0 )));
7777}
7878BENCHMARK (BM_LSTM)->Apply(CommonArgs)->Unit(benchmark::kMillisecond );
@@ -94,7 +94,7 @@ static void BM_GRU_LINEAR_BEFORE_RESET(benchmark::State &state) {
9494 // FLOPS for GRU: ignore activations, assume static S and B.
9595 // Six matrix-matrix multiplications are combined into two
9696 // matrix-matrix multiplications: [B,I]x[I,3*H] and [B,H]x[H,3*H].
97- PERF_RECORD_FLOPS (
97+ perf_recordFlops (state,
9898 D * S * (3.0 * B * H * (2.0 * I - 1.0 ) + 3.0 * B * H * (2.0 * H - 1.0 )));
9999}
100100BENCHMARK (BM_GRU_LINEAR_BEFORE_RESET)
@@ -118,7 +118,7 @@ static void BM_GRU_LINEAR_AFTER_RESET(benchmark::State &state) {
118118 // FLOPS for GRU: ignore activations, assume static S and B.
119119 // Six matrix-matrix multiplications are combined into two
120120 // matrix-matrix multiplications: [B,I]x[I,3*H] and [B,H]x[H,3*H].
121- PERF_RECORD_FLOPS (
121+ perf_recordFlops (state,
122122 D * S * (3.0 * B * H * (2.0 * I - 1.0 ) + 3.0 * B * H * (2.0 * H - 1.0 )));
123123}
124124BENCHMARK (BM_GRU_LINEAR_AFTER_RESET)
@@ -141,8 +141,8 @@ static void BM_RNN(benchmark::State &state) {
141141 rnn.run ();
142142 // FLOPS for RNN: ignore activations, assume static S and B.
143143 // Two matrix-matrix multiplications: [B,I]x[I,H] and [B,H]x[H,H].
144- PERF_RECORD_FLOPS (
145- D * S * (B * H * (2.0 * I - 1.0 ) + B * H * (2.0 * H - 1.0 )));
144+ perf_recordFlops (
145+ state, D * S * (B * H * (2.0 * I - 1.0 ) + B * H * (2.0 * H - 1.0 )));
146146}
147147BENCHMARK (BM_RNN)->Apply(CommonArgs)->Unit(benchmark::kMillisecond );
148148
0 commit comments