Skip to content
7 changes: 5 additions & 2 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3612,7 +3612,6 @@ All parameter, weight, gradient are variables in Paddle.
return platform::GetDeviceProperties(id);
},
py::return_value_policy::copy);

py::class_<gpuDeviceProp>(m, "_gpuDeviceProperties", py::module_local())
.def_property_readonly(
"name", [](const gpuDeviceProp &prop) { return prop.name; })
Expand Down Expand Up @@ -3653,7 +3652,11 @@ All parameter, weight, gradient are variables in Paddle.
m.def("nvprof_disable_record_event", platform::NvprofDisableRecordEvent);
#endif
#endif

#if defined(PADDLE_WITH_CUDA)
m.def("vmm_max_free_size", [] {
memory::VmmMaxFreeSize(phi::GPUPlace(platform::GetCurrentDeviceId()), 1);
});
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
m.def(
"get_device_properties",
Expand Down
9 changes: 8 additions & 1 deletion paddle/phi/core/memory/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
add_subdirectory(allocation)

collect_srcs(core_srcs SRCS malloc.cc memcpy.cc stats.cc mem_utils.cc)
collect_srcs(
core_srcs
SRCS
malloc.cc
memcpy.cc
stats.cc
mem_utils.cc
mem_visitor.cc)
3 changes: 3 additions & 0 deletions paddle/phi/core/memory/allocation/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/memory/allocation/inlined_vector.h"
#include "paddle/phi/core/memory/mem_visitor.h"
#include "paddle/phi/core/platform/device/gpu/gpu_types.h"

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Expand Down Expand Up @@ -202,6 +203,8 @@ class PADDLE_API Allocator : public phi::Allocator {
uint64_t Release(const phi::Place& place) { return ReleaseImpl(place); }
size_t Compact(const phi::Place& place) { return CompactImpl(place); }

virtual void Accept(AllocatorVisitor* visitor) { visitor->Visit(this); }

protected:
virtual phi::Allocation* AllocateImpl(size_t size) = 0;
virtual void FreeImpl(phi::Allocation* allocation);
Expand Down
7 changes: 7 additions & 0 deletions paddle/phi/core/memory/allocation/allocator_facade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1663,6 +1663,13 @@ uint64_t AllocatorFacade::Release(const phi::Place& place) {
->Release(place);
}

void AllocatorFacade::Accept(const phi::Place& place,
AllocatorVisitor* visitor) {
GetPrivate()
->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)
->Accept(visitor);
}

size_t AllocatorFacade::Compact(const phi::Place& place) {
return GetPrivate()
->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)
Expand Down
16 changes: 16 additions & 0 deletions paddle/phi/core/memory/allocation/allocator_facade.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

namespace paddle {
namespace memory {
class AllocatorVisitor;
namespace allocation {

// Allocator Facade is the interface exposed to other modules.
Expand Down Expand Up @@ -73,6 +74,21 @@ class AllocatorFacade {
// Compact memory of free blocks held by the VmmAllocator.
size_t Compact(const phi::Place& place);

/**
* @brief Accepts an AllocatorVisitor and iterates over all nested Allocator
* instances associated with a specific memory location (Place), executing the
* visitor's corresponding Visit method for each one.
*
* This method facilitates the traversal of the Allocator hierarchy for the
* given memory Place, allowing the visitor to collect statistics or perform
* operations on all constituent allocators.
*
* @param place The memory location
* @param visitor A pointer to the AllocatorVisitor whose Visit methods will
* be executed against the nested allocators found at the specified Place.
*/
void Accept(const phi::Place& place, AllocatorVisitor* visitor);

std::shared_ptr<Allocation> AllocShared(const phi::Place& place,
size_t size,
const phi::Stream& stream);
Expand Down
5 changes: 5 additions & 0 deletions paddle/phi/core/memory/allocation/retry_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/memory/allocation/allocator.h"
#include "paddle/phi/core/memory/mem_visitor.h"

namespace paddle {
namespace memory {
Expand All @@ -49,6 +50,10 @@ class PADDLE_API RetryAllocator : public Allocator {
common::errors::PreconditionNotMet(
"Underlying allocator of RetryAllocator is not thread-safe"));
}
std::shared_ptr<Allocator>& GetUnderLyingAllocator() {
return underlying_allocator_;
}
void Accept(AllocatorVisitor* visitor) override { visitor->Visit(this); }

bool IsAllocThreadSafe() const override { return true; }

Expand Down
5 changes: 5 additions & 0 deletions paddle/phi/core/memory/allocation/stat_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#pragma once

#include "paddle/phi/core/memory/allocation/allocator.h"
#include "paddle/phi/core/memory/mem_visitor.h"
#include "paddle/phi/core/memory/stats.h"
#include "paddle/phi/core/platform/profiler/mem_tracing.h"

Expand All @@ -28,6 +29,10 @@ class StatAllocator : public Allocator {
: underlying_allocator_(std::move(underlying_allocator)) {}

bool IsAllocThreadSafe() const override { return true; }
void Accept(AllocatorVisitor* visitor) override { visitor->Visit(this); }
std::shared_ptr<Allocator>& GetUnderLyingAllocator() {
return underlying_allocator_;
}

protected:
void FreeImpl(phi::Allocation* allocation) override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/memory/allocation/allocator.h"
#include "paddle/phi/core/memory/allocation/spin_lock.h"
#include "paddle/phi/core/memory/mem_visitor.h"

#ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h>
Expand Down Expand Up @@ -76,9 +77,16 @@ class StreamSafeCUDAAllocator
bool in_cuda_graph_capturing = false);
~StreamSafeCUDAAllocator();

std::shared_ptr<Allocator> &GetUnderLyingAllocator() {
return underlying_allocator_;
}
std::vector<StreamSafeCUDAAllocator *> &GetAllocatorByPlace() {
return allocator_map_[place_];
}
bool IsAllocThreadSafe() const override;
gpuStream_t GetDefaultStream() const;
void SetDefaultStream(gpuStream_t stream);
void Accept(AllocatorVisitor *visitor) override { visitor->Visit(this); }

protected:
phi::Allocation *AllocateImpl(size_t size) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,23 @@ phi::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks(
return nullptr;
}

std::pair<size_t, size_t>
VirtualMemoryAutoGrowthBestFitAllocator::SumLargestFreeBlockSizes(
int32_t n) const {
if (n <= 0 || free_blocks_.empty()) return std::make_pair(0, 0);

size_t large_size = free_blocks_.rbegin()->first.first;
size_t total_size = 0;
int32_t count = 0;

for (auto it = free_blocks_.rbegin(); it != free_blocks_.rend() && count < n;
++it, ++count) {
total_size += it->first.first;
}

return std::make_pair(large_size, total_size);
}

void VirtualMemoryAutoGrowthBestFitAllocator::DumpInfo(
std::string phase) const {
size_t total = 0, free = 0, used = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "paddle/phi/core/memory/allocation/allocator.h"
#include "paddle/phi/core/memory/allocation/spin_lock.h"
#include "paddle/phi/core/memory/mem_utils.h"
#include "paddle/phi/core/memory/mem_visitor.h"

namespace paddle {
namespace memory {
Expand All @@ -43,6 +44,12 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator {
size_t alignment,
const phi::GPUPlace &place);

std::shared_ptr<Allocator> &GetUnderLyingAllocator() {
return underlying_allocator_;
}
std::pair<size_t, size_t> SumLargestFreeBlockSizes(int32_t n) const;
void Accept(AllocatorVisitor *visitor) override { visitor->Visit(this); }

bool IsAllocThreadSafe() const override { return true; }

protected:
Expand Down
12 changes: 12 additions & 0 deletions paddle/phi/core/memory/malloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License. */

#include "paddle/phi/common/place.h"
#include "paddle/phi/core/memory/allocation/allocator_facade.h"
#include "paddle/phi/core/memory/mem_visitor.h"
#include "paddle/phi/core/stream.h"

namespace paddle::memory {
Expand Down Expand Up @@ -80,6 +81,17 @@ gpuStream_t GetStream(const std::shared_ptr<Allocation>& allocation) {

#endif

#if defined(PADDLE_WITH_CUDA)
std::pair<size_t, size_t> VmmMaxFreeSize(const phi::GPUPlace& place,
int32_t n) {
FreeMemoryMetricsVisitor free_memory_metrics_visitor(n);
allocation::AllocatorFacade::Instance().Accept(place,
&free_memory_metrics_visitor);
return std::make_pair(free_memory_metrics_visitor.GetLargeSize(),
free_memory_metrics_visitor.GetSumSize());
}
#endif

#ifdef PADDLE_WITH_XPU
bool RecordStream(std::shared_ptr<Allocation> allocation, XPUStream stream) {
return allocation::AllocatorFacade::Instance().RecordStream(allocation,
Expand Down
6 changes: 6 additions & 0 deletions paddle/phi/core/memory/malloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ void EraseStream(std::shared_ptr<Allocation> allocation, gpuStream_t stream);
PADDLE_API gpuStream_t GetStream(const std::shared_ptr<Allocation>& allocation);
#endif

#if defined(PADDLE_WITH_CUDA)
// return a pair of <largest_free_block_size, sum_of_n_largest_free_block_size>
PADDLE_API extern std::pair<size_t, size_t> VmmMaxFreeSize(
const phi::GPUPlace& place, int32_t n);
#endif

#ifdef PADDLE_WITH_XPU
bool RecordStream(std::shared_ptr<Allocation> allocation, XPUStream stream);
#endif
Expand Down
62 changes: 62 additions & 0 deletions paddle/phi/core/memory/mem_visitor.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/core/memory/mem_visitor.h"
#include "paddle/phi/core/memory/allocation/allocator.h"
#include "paddle/phi/core/memory/allocation/retry_allocator.h"
#include "paddle/phi/core/memory/allocation/spin_lock.h"
#include "paddle/phi/core/memory/allocation/stat_allocator.h"

#ifdef PADDLE_WITH_CUDA
#include "paddle/phi/core/memory/allocation/stream_safe_cuda_allocator.h"
#include "paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h"
#endif

namespace paddle {
namespace memory {

void AllocatorVisitor::Visit(RetryAllocator* allocator) {
allocator->GetUnderLyingAllocator()->Accept(this);
}

void AllocatorVisitor::Visit(StatAllocator* allocator) {
allocator->GetUnderLyingAllocator()->Accept(this);
}

#ifdef PADDLE_WITH_CUDA
void AllocatorVisitor::Visit(StreamSafeCUDAAllocator* allocator) {
const std::vector<StreamSafeCUDAAllocator*>& allocators =
allocator->GetAllocatorByPlace();
for (StreamSafeCUDAAllocator* allocator : allocators) {
allocator->GetUnderLyingAllocator()->Accept(this);
}
}

void AllocatorVisitor::Visit(
VirtualMemoryAutoGrowthBestFitAllocator* allocator) {
allocator->GetUnderLyingAllocator()->Accept(this);
}

void FreeMemoryMetricsVisitor::Visit(
VirtualMemoryAutoGrowthBestFitAllocator* allocator) {
auto [large_size, sum_size] =
allocator->SumLargestFreeBlockSizes(nums_blocks_);
large_size_ = std::max(large_size_, large_size);
sum_size_ = std::max(sum_size_, sum_size);
VLOG(1) << "Visit VirtualMemoryAutoGrowthBestFitAllocator large_free_size:"
<< large_size_ << " sum_free_size:" << sum_size_;
}
#endif
} // namespace memory
} // namespace paddle
Loading
Loading