From 7467d5c2eb599887aadbc25756147f793878ee92 Mon Sep 17 00:00:00 2001 From: brunoortega Date: Tue, 26 May 2026 18:07:18 -0300 Subject: [PATCH 1/2] Adding Uniform Checkpointing class and updating cuSZp compressor to V3.0.0 --- docs/CppCudaExamples.md | 2 +- src/Compressor/include/Compressor.hpp | 2 + src/Compressor/include/CompressorCuSZp.hpp | 82 +++++++-- .../include/checkpointing/Checkpointing.hpp | 1 + .../uniform/UniformCheckpointing.cpp | 156 ++++++++++++++++++ .../include/common/GPUZIPBuilders.cpp | 15 +- src/Prefetch/include/common/GPUZIPConfig.h | 10 +- 7 files changed, 246 insertions(+), 22 deletions(-) create mode 100644 src/Prefetch/include/checkpointing/uniform/UniformCheckpointing.cpp diff --git a/docs/CppCudaExamples.md b/docs/CppCudaExamples.md index 66271e3..a830eae 100644 --- a/docs/CppCudaExamples.md +++ b/docs/CppCudaExamples.md @@ -94,7 +94,7 @@ Or use our Docker image `docker pull maltempi/awave-dev:ompc` FetchContent_Declare( cuszp GIT_REPOSITORY https://github.com/szcompressor/cuSZp.git - GIT_TAG cuSZp-V1.1 + GIT_TAG cuSZp-V3.0.0 ) FetchContent_MakeAvailable(cuszp) target_link_libraries(awave3d-decom diff --git a/src/Compressor/include/Compressor.hpp b/src/Compressor/include/Compressor.hpp index 0f1a48a..b96deb1 100644 --- a/src/Compressor/include/Compressor.hpp +++ b/src/Compressor/include/Compressor.hpp @@ -28,6 +28,8 @@ class Compressor * Initializes the compressor with profiling disabled by default. */ Compressor() : profile_{false} {} + + virtual ~Compressor() = default; /** * @brief Compress data. diff --git a/src/Compressor/include/CompressorCuSZp.hpp b/src/Compressor/include/CompressorCuSZp.hpp index 99999fd..3a8fa47 100644 --- a/src/Compressor/include/CompressorCuSZp.hpp +++ b/src/Compressor/include/CompressorCuSZp.hpp @@ -1,9 +1,7 @@ #pragma once #include "Compressor.hpp" #include "cuda_utils.hpp" -#include -#include -#include +#include "cuSZp.h" #include #include #include @@ -39,11 +37,20 @@ class CompressorCuSZp final * @param n1 First dimension of the input data. * @param n2 Second dimension of the input data. * @param n3 Third dimension of the input data. + * @param float_kind The kind of floating-point data ('float' or 'double'). * @param error_bound The error bound for lossy compression. */ - explicit CompressorCuSZp(const std::size_t n1, const std::size_t n2, - const std::size_t n3, const double error_bound) - : n1_(n1), n2_(n2), n3_(n3), n_(n1 * n2 * n3), error_bound_(error_bound) { + explicit CompressorCuSZp(const std::size_t n1, + const std::size_t n2, + const std::size_t n3, + const double error_bound, + const std::string &float_kind = "float") + : n1_(n1), n2_(n2), n3_(n3), n_(n1 * n2 * n3), error_bound_(error_bound), float_kind_(float_kind) + { + if (float_kind_ != "float" && float_kind_ != "double") + { + throw std::invalid_argument("float_kind must be 'float' or 'double'"); + } } protected: @@ -57,13 +64,28 @@ class CompressorCuSZp final */ size_t compress(decompressType *buf_in, compressedType *buf_out) override { size_t compressed_size; - double rel_errbound = - error_bound_ * (maxFloat(reinterpret_cast(buf_in), n_) - - minFloat(reinterpret_cast(buf_in), n_)); - SZp_compress_deviceptr_f32(reinterpret_cast(buf_in), - reinterpret_cast(buf_out), n_, - &compressed_size, rel_errbound, - cudaStreamDefault); + if (float_kind_ == "float") + { + float rel_errbound = error_bound_ * + (maxFloat(reinterpret_cast(buf_in), n_) - + minFloat(reinterpret_cast(buf_in), n_)); + + cuSZp_compress_1D_fixed_f32(reinterpret_cast(buf_in), + reinterpret_cast(buf_out), n_, + &compressed_size, rel_errbound, + cudaStreamDefault); + } + else if (float_kind_ == "double") + { + double rel_errbound = error_bound_ * + (maxDouble(reinterpret_cast(buf_in), n_) - + minDouble(reinterpret_cast(buf_in), n_)); + + cuSZp_compress_1D_fixed_f64(reinterpret_cast(buf_in), + reinterpret_cast(buf_out), n_, + &compressed_size, rel_errbound, + cudaStreamDefault); + } return compressed_size; } @@ -78,10 +100,24 @@ class CompressorCuSZp final */ void decompress(compressedType *buf_in, decompressType *buf_out, size_t compressed_size = -1) override { - SZp_decompress_deviceptr_f32(reinterpret_cast(buf_out), - reinterpret_cast(buf_in), n_, - compressed_size, error_bound_, - cudaStreamDefault); + if (float_kind_ == "float") + { + cuSZp_decompress_1D_fixed_f32(reinterpret_cast(buf_out), + reinterpret_cast(buf_in), n_, + compressed_size, error_bound_, + cudaStreamDefault); + } + else if (float_kind_ == "double") + { + cuSZp_decompress_1D_fixed_f64(reinterpret_cast(buf_out), + reinterpret_cast(buf_in), n_, + compressed_size, error_bound_, + cudaStreamDefault); + } + else + { + throw std::invalid_argument("invalid argument for float_kind"); + } } /** @@ -118,10 +154,20 @@ class CompressorCuSZp final */ std::size_t compressedMaxSize() override { // https://github.com/szcompressor/cuSZp/blob/f47064f4edbc00aceb36692232ac7eef3fefaf2b/examples/cuSZp_gpu_f32_api.cpp#L64 - return ((n_ + 262144 - 1) / 262144 * 262144) * sizeof(float); + size_t elem_size; + + if (float_kind_ == "float") + elem_size = sizeof(float); + else if (float_kind_ == "double") + elem_size = sizeof(double); + else + throw std::invalid_argument("invalid argument for float_kind"); + + return ((n_ + 262144 - 1) / 262144 * 262144) * elem_size; } private: + std::string float_kind_; const double error_bound_; ///< Absolute error bound for compression. const size_t n1_; ///< First dimension of input data. const size_t n2_; ///< Second dimension of input data. diff --git a/src/Prefetch/include/checkpointing/Checkpointing.hpp b/src/Prefetch/include/checkpointing/Checkpointing.hpp index ab5b028..74fd98e 100644 --- a/src/Prefetch/include/checkpointing/Checkpointing.hpp +++ b/src/Prefetch/include/checkpointing/Checkpointing.hpp @@ -97,6 +97,7 @@ class Checkpointing { * @param _steps The total number of computational steps for checkpointing. */ Checkpointing(int _steps) { steps = _steps; } + Checkpointing(int _steps, int _snaps) {steps = _steps; snaps = _snaps;} /** * @brief Retrieves the total number of checkpoints. diff --git a/src/Prefetch/include/checkpointing/uniform/UniformCheckpointing.cpp b/src/Prefetch/include/checkpointing/uniform/UniformCheckpointing.cpp new file mode 100644 index 0000000..c161c86 --- /dev/null +++ b/src/Prefetch/include/checkpointing/uniform/UniformCheckpointing.cpp @@ -0,0 +1,156 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "../Checkpointing.hpp" + +#include "../../common/GPUZIPLogger.cpp" + +/** + * @class UniformCheckpointing + * @brief Implements a checkpointing mechanism using uniform checkpoint spacing. + * @author Bruno Ortega + * @date May 26th, 2026 + * + * This class extends the base `Checkpointing` class to provide specific + * checkpointing actions (save, restore, forward, backward, terminate) using + * a fixed-spacing checkpoint distribution strategy. + * + * The algorithm stores checkpoints at fixed timestep intervals and + * recomputes forward states between restored checkpoints during the + * adjoint phase. + */ +class UniformCheckpointing : public Checkpointing { + +private: + bool adjoint = false; //< Indicates whether execution is currently in + // the forward or in the adjoint phase. + bool save = false; //< Indicates whether the forward computation for the current + // timestep has already been issued before saving the checkpoint. + bool restore = false; //< Control variable to allow backward and restore + // actions for the same timestep. + int current_ts = 1; //< Current timestep. + int last_checkpoint = 0; //< Timestep corresponding to the last restored checkpoint. + int adj_fwd_ts = 0; //< Current timestep during forward recomputation + // in the adjoint phase. + +protected: + + /** + * @brief Resets the internal state of the checkpointing process. + * + * Sets `adjoint`, `save`, `restore`, `current_ts`, `last_checkpoint` and + * `adj_fwd_ts` to their initial values. + * This is typically called to reinitialize the checkpointing algorithm. + */ + void reset() override { + adjoint = false; + save = false; + restore = false; + current_ts = 1; + last_checkpoint = 0; + adj_fwd_ts = 0; + } + + /** + * @brief Determines the next action to perform in the checkpointing process. + * + * @return An `Action` object describing the next step, including its type and + * relevant parameters. + */ + Action getAction() override { + int spacing = std::max(1, steps / snaps); + + // Forward from first to last timestep + if(!adjoint) { + // At last timestep, forward finishes and adjoint begins + if(current_ts == steps) { + adjoint = true; + return Action(current_ts, ACTION_FORWARD); + } + // Apply forward and save for the current timestep + if(current_ts == 1 || current_ts % spacing == 0) { + if(!save) { + save = true; + return Action(current_ts, ACTION_FORWARD); + } + save = false; + last_checkpoint = current_ts; + current_ts++; + return Action(current_ts-1, ACTION_SAVE); + } + // Apply forward for the current timestep + current_ts++; + return Action(current_ts-1, ACTION_FORWARD); + } + // Adjoint from last to first timestep + // Adjoint finishes + if(current_ts == 0) { + return Action(current_ts, ACTION_TERMINATE); + } + // Apply backward and recover last saved snapshot + if(current_ts % spacing == 0 || current_ts == steps) { + if(!restore) { + restore = true; + return Action(current_ts, ACTION_BACKWARD); + } + // Get last saved checkpoint timestep + if(current_ts % spacing == 0) + last_checkpoint = std::max(1, current_ts - spacing); + adj_fwd_ts = last_checkpoint; + restore = false; + current_ts--; + return Action(last_checkpoint, ACTION_RESTORE); + } + // Forward from restored checkpoint to current timestep + if(adj_fwd_ts <= current_ts) { + adj_fwd_ts++; + return Action(adj_fwd_ts-1, ACTION_FORWARD); + } else { // Backward at current timestep + current_ts--; + adj_fwd_ts = last_checkpoint; + return Action(current_ts+1, ACTION_BACKWARD); + } + + return Action(current_ts, ACTION_ERROR); + } + + + /** + * @brief Returns the configured number of checkpoints. + * + * Uniform checkpointing requires the number of checkpoints (`snaps`) + * to be explicitly defined during construction. + * + * @return The configured number of checkpoints. + */ + int getNumberOfCheckpoints() override { + if (snaps == 0) { + GPUZIPLogger::Error("There must be set a value for snapshots.\n"); + } + return snaps; + } + +public: + + /** + * @brief Constructor for the UniformCheckpointing class. + * + * @param steps The number of computational steps for which checkpointing is + * required. + * @param snaps Total number of checkpoints used by the algorithm. + * + * Initializes the base `Checkpointing` class and sets up the internal state. + */ + UniformCheckpointing(int steps, int snaps) + : Checkpointing(steps, snaps) {} +}; \ No newline at end of file diff --git a/src/Prefetch/include/common/GPUZIPBuilders.cpp b/src/Prefetch/include/common/GPUZIPBuilders.cpp index 416b3b5..91e0b14 100644 --- a/src/Prefetch/include/common/GPUZIPBuilders.cpp +++ b/src/Prefetch/include/common/GPUZIPBuilders.cpp @@ -3,6 +3,7 @@ #include "GPUZIPLogger.cpp" #include "checkpointing/revolve/RevolveCheckpointing.cpp" +#include "checkpointing/uniform/UniformCheckpointing.cpp" #include "checkpointing/trace/TraceCheckpointing.cpp" #include "prefetch/CheckpointOnly.cuh" #include "prefetch/Prefetch.cuh" @@ -114,9 +115,17 @@ class GPUZIPBuilders { GPUZIPLogger::Info("Using Trace Checkpointing (%s).\n", gpuzip_config->trace_file_path); return new TraceCheckpointing(steps, gpuzip_config->trace_file_path); - } else { + } else if(gpuzip_config->checkpointing_algorithm == 1) { GPUZIPLogger::Info("Using Revolve Checkpointing .\n"); return new RevolveCheckpointing(steps, gpuzip_config->revolve_log_level); + } else{ + GPUZIPLogger::Info("Using Uniform Checkpointing. \n"); + if(gpuzip_config->checkpointing_snaps <= 0) { + GPUZIPLogger::Error("UniformCheckpointing requires " + "gpuzip_config->checkpointing_snaps > 0.\n"); + exit(-1); + } + return new UniformCheckpointing(steps, gpuzip_config->checkpointing_snaps); } } @@ -158,6 +167,10 @@ class GPUZIPBuilders { #endif } else if (gpuzip_config->compressor == 3) { #ifdef CUSZP + if(gpuzip_config->float_kind) { + return std::make_unique>( + n1, n2, n3, gpuzip_config->cuszp_err_bound, gpuzip_config->float_kind); + } return std::make_unique>( n1, n2, n3, gpuzip_config->cuszp_err_bound); #else diff --git a/src/Prefetch/include/common/GPUZIPConfig.h b/src/Prefetch/include/common/GPUZIPConfig.h index 5545f02..9c65ef4 100644 --- a/src/Prefetch/include/common/GPUZIPConfig.h +++ b/src/Prefetch/include/common/GPUZIPConfig.h @@ -1,13 +1,16 @@ #pragma once typedef struct { - // TraceCheckpointing=0 or RevolveCheckpointing=1 + // TraceCheckpointing=0 or RevolveCheckpointing=1 or UniformCheckpointing=2 int checkpointing_algorithm; + // Uniform's checkpointing number of snapshots + int checkpointing_snaps; + // 0 no prefetch. For prefetching, the capacity needs at minimum 2. int cache_capacity; - // Empty=0 (for no compression), cuZFP=1 or Bitcomp=2. Integer. + // Empty=0 (for no compression), cuZFP=1, Bitcomp=2, cuSZp=3. Integer. int compressor; // 0=DEBUG, 1=INFO, 2=WARN, 3=ERROR @@ -37,6 +40,9 @@ typedef struct { // cuSZp's error bound parameter. Double. double cuszp_err_bound; + // cuSZp's floating-point type ("float" or "double") Default: float. + const char *float_kind; + // The way Bitcomp's delta parameter will be set. Integer. // 0 = Max fraction strategy -- Used max value in data scaled by `MaxFraction` // 1 = Statistical range -- Used mean +- `NumSigma` * stddev for scaling From 9bf2ecc790562fe1004df25dbfffe2f2bfba5291 Mon Sep 17 00:00:00 2001 From: brunoortega Date: Wed, 3 Jun 2026 15:36:22 -0300 Subject: [PATCH 2/2] Corrected PR --- .../include/checkpointing/Checkpointing.hpp | 3 +- .../uniform/UniformCheckpointing.cpp | 110 +++++++++++------- 2 files changed, 72 insertions(+), 41 deletions(-) diff --git a/src/Prefetch/include/checkpointing/Checkpointing.hpp b/src/Prefetch/include/checkpointing/Checkpointing.hpp index 74fd98e..97c969f 100644 --- a/src/Prefetch/include/checkpointing/Checkpointing.hpp +++ b/src/Prefetch/include/checkpointing/Checkpointing.hpp @@ -29,8 +29,9 @@ enum ActionType { ACTION_FORWARD, ACTION_SAVE, ACTION_RESTORE, ACTION_BACKWARD, * * - TRACE: Follows a predefined trace for checkpointing actions. * - REVOLVE: Uses the Revolve algorithm for checkpointing. + * - UNIFORM: Uses the Uniform algorithm for checkpointing. */ -enum CheckpointingImplementation { TRACE, REVOLVE }; +enum CheckpointingImplementation { TRACE, REVOLVE, UNIFORM }; /** * @struct Action diff --git a/src/Prefetch/include/checkpointing/uniform/UniformCheckpointing.cpp b/src/Prefetch/include/checkpointing/uniform/UniformCheckpointing.cpp index c161c86..f04b6d3 100644 --- a/src/Prefetch/include/checkpointing/uniform/UniformCheckpointing.cpp +++ b/src/Prefetch/include/checkpointing/uniform/UniformCheckpointing.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include "../Checkpointing.hpp" @@ -19,48 +20,68 @@ * @class UniformCheckpointing * @brief Implements a checkpointing mechanism using uniform checkpoint spacing. * @author Bruno Ortega - * @date May 26th, 2026 + * @date Jun 3rd, 2026 * * This class extends the base `Checkpointing` class to provide specific * checkpointing actions (save, restore, forward, backward, terminate) using * a fixed-spacing checkpoint distribution strategy. * - * The algorithm stores checkpoints at fixed timestep intervals and - * recomputes forward states between restored checkpoints during the - * adjoint phase. + * The algorithm stores checkpoints at approximately uniform timestep + * intervals and, during the adjoint phase, restores the most recent + * checkpoint and recomputes forward states as needed before executing + * backward operations. */ class UniformCheckpointing : public Checkpointing { private: - bool adjoint = false; //< Indicates whether execution is currently in - // the forward or in the adjoint phase. - bool save = false; //< Indicates whether the forward computation for the current - // timestep has already been issued before saving the checkpoint. - bool restore = false; //< Control variable to allow backward and restore - // actions for the same timestep. - int current_ts = 1; //< Current timestep. - int last_checkpoint = 0; //< Timestep corresponding to the last restored checkpoint. - int adj_fwd_ts = 0; //< Current timestep during forward recomputation - // in the adjoint phase. + std::vector checkpoints; //< Vector that stores the timestep value of each checkpoint. + int checkpoint_idx = 0; //< Checkpoint index to access its timestep value. + bool adjoint = false; //< Indicates whether execution is currently in + // the forward or in the adjoint phase. + bool save = false; //< Controls the two-step checkpoint creation process: + // first issue FORWARD, then SAVE for the same timestep. + bool restore = false; //< Controls the two-step restore sequence: + // first execute BACKWARD at a checkpoint boundary, + // then issue RESTORE on the next scheduler call. + int current_ts = 1; //< Current timestep. + int adj_fwd_ts = 0; //< Current timestep during forward recomputation + // in the adjoint phase. protected: /** * @brief Resets the internal state of the checkpointing process. * - * Sets `adjoint`, `save`, `restore`, `current_ts`, `last_checkpoint` and - * `adj_fwd_ts` to their initial values. + * Sets `checkpoints`, `checkpoint_idx`, `adjoint`, `save`, `restore`, + * `current_ts` and `adj_fwd_ts` to their initial values. * This is typically called to reinitialize the checkpointing algorithm. */ void reset() override { + checkpoints.clear(); + checkpoint_idx = 0; adjoint = false; save = false; restore = false; current_ts = 1; - last_checkpoint = 0; adj_fwd_ts = 0; } + /** + * @brief Sets the checkpoints vector with its timesteps. + * + * Computes approximately uniformly spaced checkpoint locations + * and stores their timestep indices in the internal checkpoint list. + */ + void setCheckpoints() { + + checkpoints.push_back(1); + + for (int i = 1; i < snaps; i++) { + int cp = std::round(i * static_cast(steps) / snaps); + checkpoints.push_back(cp); + } + } + /** * @brief Determines the next action to perform in the checkpointing process. * @@ -68,59 +89,65 @@ class UniformCheckpointing : public Checkpointing { * relevant parameters. */ Action getAction() override { - int spacing = std::max(1, steps / snaps); - // Forward from first to last timestep - if(!adjoint) { + if(!adjoint){ // At last timestep, forward finishes and adjoint begins if(current_ts == steps) { adjoint = true; + checkpoint_idx--; return Action(current_ts, ACTION_FORWARD); - } + } + // Apply forward and save for the current timestep - if(current_ts == 1 || current_ts % spacing == 0) { + if(current_ts == checkpoints[checkpoint_idx]) { if(!save) { save = true; return Action(current_ts, ACTION_FORWARD); } save = false; - last_checkpoint = current_ts; current_ts++; + checkpoint_idx++; return Action(current_ts-1, ACTION_SAVE); } + // Apply forward for the current timestep current_ts++; return Action(current_ts-1, ACTION_FORWARD); } + // Adjoint from last to first timestep - // Adjoint finishes - if(current_ts == 0) { - return Action(current_ts, ACTION_TERMINATE); - } - // Apply backward and recover last saved snapshot - if(current_ts % spacing == 0 || current_ts == steps) { + // Beginning of a recomputation interval. + if(current_ts == checkpoints[checkpoint_idx+1] || current_ts == steps){ + + // First visit: execute backward at the interval boundary. if(!restore) { restore = true; return Action(current_ts, ACTION_BACKWARD); } - // Get last saved checkpoint timestep - if(current_ts % spacing == 0) - last_checkpoint = std::max(1, current_ts - spacing); - adj_fwd_ts = last_checkpoint; + + // No remaining checkpoints to restore: adjoint phase finished. + if(checkpoint_idx < 0) + return Action(current_ts, ACTION_TERMINATE); + + // Second visit: restore the previous checkpoint. + adj_fwd_ts = checkpoints[checkpoint_idx]; restore = false; current_ts--; - return Action(last_checkpoint, ACTION_RESTORE); + checkpoint_idx--; + return Action(checkpoints[checkpoint_idx+1], ACTION_RESTORE); } - // Forward from restored checkpoint to current timestep + + // Recompute forward states from the restored checkpoint + // until reaching the current adjoint timestep. if(adj_fwd_ts <= current_ts) { adj_fwd_ts++; return Action(adj_fwd_ts-1, ACTION_FORWARD); - } else { // Backward at current timestep + } else { // Recomputed state available: execute backward. current_ts--; - adj_fwd_ts = last_checkpoint; + adj_fwd_ts = checkpoints[checkpoint_idx+1]; return Action(current_ts+1, ACTION_BACKWARD); } - + return Action(current_ts, ACTION_ERROR); } @@ -149,8 +176,11 @@ class UniformCheckpointing : public Checkpointing { * required. * @param snaps Total number of checkpoints used by the algorithm. * - * Initializes the base `Checkpointing` class and sets up the internal state. + * Initializes the base `Checkpointing` class and computes the uniformly + * distributed checkpoint locations. */ UniformCheckpointing(int steps, int snaps) - : Checkpointing(steps, snaps) {} + : Checkpointing(steps, snaps) { + setCheckpoints(); + } }; \ No newline at end of file