Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/CppCudaExamples.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Or use our Docker image `docker pull maltempi/awave-dev:ompc`
FetchContent_Declare(
cuszp
GIT_REPOSITORY https://github.com/szcompressor/cuSZp.git
GIT_TAG cuSZp-V1.1
GIT_TAG cuSZp-V3.0.0
)
FetchContent_MakeAvailable(cuszp)
target_link_libraries(awave3d-decom
Expand Down
2 changes: 2 additions & 0 deletions src/Compressor/include/Compressor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class Compressor
* Initializes the compressor with profiling disabled by default.
*/
Compressor() : profile_{false} {}

virtual ~Compressor() = default;

/**
* @brief Compress data.
Expand Down
82 changes: 64 additions & 18 deletions src/Compressor/include/CompressorCuSZp.hpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
#pragma once
#include "Compressor.hpp"
#include "cuda_utils.hpp"
#include <cuSZp_entry_f32.h>
#include <cuSZp_timer.h>
#include <cuSZp_utility.h>
#include "cuSZp.h"
#include <cuda_runtime.h>
#include <stdio.h>
#include <stdlib.h>
Expand Down Expand Up @@ -39,11 +37,20 @@ class CompressorCuSZp final
* @param n1 First dimension of the input data.
* @param n2 Second dimension of the input data.
* @param n3 Third dimension of the input data.
* @param float_kind The kind of floating-point data ('float' or 'double').
* @param error_bound The error bound for lossy compression.
*/
explicit CompressorCuSZp(const std::size_t n1, const std::size_t n2,
const std::size_t n3, const double error_bound)
: n1_(n1), n2_(n2), n3_(n3), n_(n1 * n2 * n3), error_bound_(error_bound) {
explicit CompressorCuSZp(const std::size_t n1,
const std::size_t n2,
const std::size_t n3,
const double error_bound,
const std::string &float_kind = "float")
: n1_(n1), n2_(n2), n3_(n3), n_(n1 * n2 * n3), error_bound_(error_bound), float_kind_(float_kind)
{
if (float_kind_ != "float" && float_kind_ != "double")
{
throw std::invalid_argument("float_kind must be 'float' or 'double'");
}
}

protected:
Expand All @@ -57,13 +64,28 @@ class CompressorCuSZp final
*/
size_t compress(decompressType *buf_in, compressedType *buf_out) override {
size_t compressed_size;
double rel_errbound =
error_bound_ * (maxFloat(reinterpret_cast<float *>(buf_in), n_) -
minFloat(reinterpret_cast<float *>(buf_in), n_));
SZp_compress_deviceptr_f32(reinterpret_cast<float *>(buf_in),
reinterpret_cast<unsigned char *>(buf_out), n_,
&compressed_size, rel_errbound,
cudaStreamDefault);
if (float_kind_ == "float")
{
float rel_errbound = error_bound_ *
(maxFloat(reinterpret_cast<float *>(buf_in), n_) -
minFloat(reinterpret_cast<float *>(buf_in), n_));

cuSZp_compress_1D_fixed_f32(reinterpret_cast<float *>(buf_in),
reinterpret_cast<unsigned char *>(buf_out), n_,
&compressed_size, rel_errbound,
cudaStreamDefault);
}
else if (float_kind_ == "double")
{
double rel_errbound = error_bound_ *
(maxDouble(reinterpret_cast<double *>(buf_in), n_) -
minDouble(reinterpret_cast<double *>(buf_in), n_));

cuSZp_compress_1D_fixed_f64(reinterpret_cast<double *>(buf_in),
reinterpret_cast<unsigned char *>(buf_out), n_,
&compressed_size, rel_errbound,
cudaStreamDefault);
}
return compressed_size;
}

Expand All @@ -78,10 +100,24 @@ class CompressorCuSZp final
*/
void decompress(compressedType *buf_in, decompressType *buf_out,
size_t compressed_size = -1) override {
SZp_decompress_deviceptr_f32(reinterpret_cast<float *>(buf_out),
reinterpret_cast<unsigned char *>(buf_in), n_,
compressed_size, error_bound_,
cudaStreamDefault);
if (float_kind_ == "float")
{
cuSZp_decompress_1D_fixed_f32(reinterpret_cast<float *>(buf_out),
reinterpret_cast<unsigned char *>(buf_in), n_,
compressed_size, error_bound_,
cudaStreamDefault);
}
else if (float_kind_ == "double")
{
cuSZp_decompress_1D_fixed_f64(reinterpret_cast<double *>(buf_out),
reinterpret_cast<unsigned char *>(buf_in), n_,
compressed_size, error_bound_,
cudaStreamDefault);
}
else
{
throw std::invalid_argument("invalid argument for float_kind");
}
}

/**
Expand Down Expand Up @@ -118,10 +154,20 @@ class CompressorCuSZp final
*/
std::size_t compressedMaxSize() override {
// https://github.com/szcompressor/cuSZp/blob/f47064f4edbc00aceb36692232ac7eef3fefaf2b/examples/cuSZp_gpu_f32_api.cpp#L64
return ((n_ + 262144 - 1) / 262144 * 262144) * sizeof(float);
size_t elem_size;

if (float_kind_ == "float")
elem_size = sizeof(float);
else if (float_kind_ == "double")
elem_size = sizeof(double);
else
throw std::invalid_argument("invalid argument for float_kind");

return ((n_ + 262144 - 1) / 262144 * 262144) * elem_size;
}

private:
std::string float_kind_;
const double error_bound_; ///< Absolute error bound for compression.
const size_t n1_; ///< First dimension of input data.
const size_t n2_; ///< Second dimension of input data.
Expand Down
4 changes: 3 additions & 1 deletion src/Prefetch/include/checkpointing/Checkpointing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ enum ActionType { ACTION_FORWARD, ACTION_SAVE, ACTION_RESTORE, ACTION_BACKWARD,
*
* - TRACE: Follows a predefined trace for checkpointing actions.
* - REVOLVE: Uses the Revolve algorithm for checkpointing.
* - UNIFORM: Uses the Uniform algorithm for checkpointing.
*/
enum CheckpointingImplementation { TRACE, REVOLVE };
enum CheckpointingImplementation { TRACE, REVOLVE, UNIFORM };

/**
* @struct Action
Expand Down Expand Up @@ -97,6 +98,7 @@ class Checkpointing {
* @param _steps The total number of computational steps for checkpointing.
*/
Checkpointing(int _steps) { steps = _steps; }
Checkpointing(int _steps, int _snaps) {steps = _steps; snaps = _snaps;}

/**
* @brief Retrieves the total number of checkpoints.
Expand Down
186 changes: 186 additions & 0 deletions src/Prefetch/include/checkpointing/uniform/UniformCheckpointing.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#pragma once

#include <stdio.h>
#include <stdlib.h>

#include <map>
#include <string>
#include <vector>

#include <fstream>
#include <iostream>
#include <sstream>
#include <cmath>

#include "../Checkpointing.hpp"

#include "../../common/GPUZIPLogger.cpp"

/**
* @class UniformCheckpointing
* @brief Implements a checkpointing mechanism using uniform checkpoint spacing.
* @author Bruno Ortega <brunoteixeira@estudante.ufscar.br>
* @date Jun 3rd, 2026
*
* This class extends the base `Checkpointing` class to provide specific
* checkpointing actions (save, restore, forward, backward, terminate) using
* a fixed-spacing checkpoint distribution strategy.
*
* The algorithm stores checkpoints at approximately uniform timestep
* intervals and, during the adjoint phase, restores the most recent
* checkpoint and recomputes forward states as needed before executing
* backward operations.
*/
class UniformCheckpointing : public Checkpointing {

private:
std::vector<int> checkpoints; //< Vector that stores the timestep value of each checkpoint.
int checkpoint_idx = 0; //< Checkpoint index to access its timestep value.
bool adjoint = false; //< Indicates whether execution is currently in
// the forward or in the adjoint phase.
bool save = false; //< Controls the two-step checkpoint creation process:
// first issue FORWARD, then SAVE for the same timestep.
bool restore = false; //< Controls the two-step restore sequence:
// first execute BACKWARD at a checkpoint boundary,
// then issue RESTORE on the next scheduler call.
int current_ts = 1; //< Current timestep.
int adj_fwd_ts = 0; //< Current timestep during forward recomputation
// in the adjoint phase.

protected:

/**
* @brief Resets the internal state of the checkpointing process.
*
* Sets `checkpoints`, `checkpoint_idx`, `adjoint`, `save`, `restore`,
* `current_ts` and `adj_fwd_ts` to their initial values.
* This is typically called to reinitialize the checkpointing algorithm.
*/
void reset() override {
checkpoints.clear();
checkpoint_idx = 0;
adjoint = false;
save = false;
restore = false;
current_ts = 1;
adj_fwd_ts = 0;
}

/**
* @brief Sets the checkpoints vector with its timesteps.
*
* Computes approximately uniformly spaced checkpoint locations
* and stores their timestep indices in the internal checkpoint list.
*/
void setCheckpoints() {

checkpoints.push_back(1);

for (int i = 1; i < snaps; i++) {
int cp = std::round(i * static_cast<double>(steps) / snaps);
checkpoints.push_back(cp);
}
}

/**
* @brief Determines the next action to perform in the checkpointing process.
*
* @return An `Action` object describing the next step, including its type and
* relevant parameters.
*/
Action getAction() override {
// Forward from first to last timestep
if(!adjoint){
// At last timestep, forward finishes and adjoint begins
if(current_ts == steps) {
adjoint = true;
checkpoint_idx--;
return Action(current_ts, ACTION_FORWARD);
}

// Apply forward and save for the current timestep
if(current_ts == checkpoints[checkpoint_idx]) {
if(!save) {
save = true;
return Action(current_ts, ACTION_FORWARD);
}
save = false;
current_ts++;
checkpoint_idx++;
return Action(current_ts-1, ACTION_SAVE);
}

// Apply forward for the current timestep
current_ts++;
return Action(current_ts-1, ACTION_FORWARD);
}

// Adjoint from last to first timestep
// Beginning of a recomputation interval.
if(current_ts == checkpoints[checkpoint_idx+1] || current_ts == steps){

// First visit: execute backward at the interval boundary.
if(!restore) {
restore = true;
return Action(current_ts, ACTION_BACKWARD);
}

// No remaining checkpoints to restore: adjoint phase finished.
if(checkpoint_idx < 0)
return Action(current_ts, ACTION_TERMINATE);

// Second visit: restore the previous checkpoint.
adj_fwd_ts = checkpoints[checkpoint_idx];
restore = false;
current_ts--;
checkpoint_idx--;
return Action(checkpoints[checkpoint_idx+1], ACTION_RESTORE);
}

// Recompute forward states from the restored checkpoint
// until reaching the current adjoint timestep.
if(adj_fwd_ts <= current_ts) {
adj_fwd_ts++;
return Action(adj_fwd_ts-1, ACTION_FORWARD);
} else { // Recomputed state available: execute backward.
current_ts--;
adj_fwd_ts = checkpoints[checkpoint_idx+1];
return Action(current_ts+1, ACTION_BACKWARD);
}

return Action(current_ts, ACTION_ERROR);
}


/**
* @brief Returns the configured number of checkpoints.
*
* Uniform checkpointing requires the number of checkpoints (`snaps`)
* to be explicitly defined during construction.
*
* @return The configured number of checkpoints.
*/
int getNumberOfCheckpoints() override {
if (snaps == 0) {
GPUZIPLogger::Error("There must be set a value for snapshots.\n");
}
return snaps;
Comment thread
maltempi marked this conversation as resolved.
}

public:

/**
* @brief Constructor for the UniformCheckpointing class.
*
* @param steps The number of computational steps for which checkpointing is
* required.
* @param snaps Total number of checkpoints used by the algorithm.
*
* Initializes the base `Checkpointing` class and computes the uniformly
* distributed checkpoint locations.
*/
UniformCheckpointing(int steps, int snaps)
: Checkpointing(steps, snaps) {
setCheckpoints();
}
};
15 changes: 14 additions & 1 deletion src/Prefetch/include/common/GPUZIPBuilders.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "GPUZIPLogger.cpp"
#include "checkpointing/revolve/RevolveCheckpointing.cpp"
#include "checkpointing/uniform/UniformCheckpointing.cpp"
#include "checkpointing/trace/TraceCheckpointing.cpp"
#include "prefetch/CheckpointOnly.cuh"
#include "prefetch/Prefetch.cuh"
Expand Down Expand Up @@ -114,9 +115,17 @@ class GPUZIPBuilders {
GPUZIPLogger::Info("Using Trace Checkpointing (%s).\n",
gpuzip_config->trace_file_path);
return new TraceCheckpointing(steps, gpuzip_config->trace_file_path);
} else {
} else if(gpuzip_config->checkpointing_algorithm == 1) {
GPUZIPLogger::Info("Using Revolve Checkpointing .\n");
return new RevolveCheckpointing(steps, gpuzip_config->revolve_log_level);
} else{
GPUZIPLogger::Info("Using Uniform Checkpointing. \n");
if(gpuzip_config->checkpointing_snaps <= 0) {
GPUZIPLogger::Error("UniformCheckpointing requires "
"gpuzip_config->checkpointing_snaps > 0.\n");
exit(-1);
}
return new UniformCheckpointing(steps, gpuzip_config->checkpointing_snaps);
}
}

Expand Down Expand Up @@ -158,6 +167,10 @@ class GPUZIPBuilders {
#endif
} else if (gpuzip_config->compressor == 3) {
#ifdef CUSZP
if(gpuzip_config->float_kind) {
return std::make_unique<CompressorCuSZp<void, void>>(
n1, n2, n3, gpuzip_config->cuszp_err_bound, gpuzip_config->float_kind);
}
return std::make_unique<CompressorCuSZp<void, void>>(
n1, n2, n3, gpuzip_config->cuszp_err_bound);
#else
Expand Down
Loading