Skip to content

Commit 25fb6bb

Browse files
liuruyanLittleHeroZZZX
authored andcommitted
【Allocator】Add allocator visitor (PaddlePaddle#76349)
* add allocator visitor * fix ci * fix dcu * fix cuda * fix cuda * fix cuda * fix ci * fix ci * fix ci * add ut * fix ci * fix ci * fix ci * fix ci * fix dcu * fix dcu * fix conflict * fix conflict
1 parent fbef36b commit 25fb6bb

17 files changed

+416
-3
lines changed

paddle/fluid/pybind/pybind.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3629,7 +3629,6 @@ All parameter, weight, gradient are variables in Paddle.
36293629
return platform::GetDeviceProperties(id);
36303630
},
36313631
py::return_value_policy::copy);
3632-
36333632
py::class_<gpuDeviceProp>(m, "_gpuDeviceProperties", py::module_local())
36343633
.def_property_readonly(
36353634
"name", [](const gpuDeviceProp &prop) { return prop.name; })
@@ -3670,7 +3669,11 @@ All parameter, weight, gradient are variables in Paddle.
36703669
m.def("nvprof_disable_record_event", platform::NvprofDisableRecordEvent);
36713670
#endif
36723671
#endif
3673-
3672+
#if defined(PADDLE_WITH_CUDA)
3673+
m.def("vmm_max_free_size", [] {
3674+
memory::VmmMaxFreeSize(phi::GPUPlace(platform::GetCurrentDeviceId()), 1);
3675+
});
3676+
#endif
36743677
#ifdef PADDLE_WITH_CUSTOM_DEVICE
36753678
m.def(
36763679
"get_device_properties",
Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
11
add_subdirectory(allocation)
22

3-
collect_srcs(core_srcs SRCS malloc.cc memcpy.cc stats.cc mem_utils.cc)
3+
collect_srcs(
4+
core_srcs
5+
SRCS
6+
malloc.cc
7+
memcpy.cc
8+
stats.cc
9+
mem_utils.cc
10+
mem_visitor.cc)

paddle/phi/core/memory/allocation/allocator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "paddle/phi/core/allocator.h"
2626
#include "paddle/phi/core/enforce.h"
2727
#include "paddle/phi/core/memory/allocation/inlined_vector.h"
28+
#include "paddle/phi/core/memory/mem_visitor.h"
2829
#include "paddle/phi/core/platform/device/gpu/gpu_types.h"
2930

3031
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
@@ -202,6 +203,8 @@ class PADDLE_API Allocator : public phi::Allocator {
202203
uint64_t Release(const phi::Place& place) { return ReleaseImpl(place); }
203204
size_t Compact(const phi::Place& place) { return CompactImpl(place); }
204205

206+
virtual void Accept(AllocatorVisitor* visitor) { visitor->Visit(this); }
207+
205208
protected:
206209
virtual phi::Allocation* AllocateImpl(size_t size) = 0;
207210
virtual void FreeImpl(phi::Allocation* allocation);

paddle/phi/core/memory/allocation/allocator_facade.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,6 +1682,13 @@ uint64_t AllocatorFacade::Release(const phi::Place& place) {
16821682
->Release(place);
16831683
}
16841684

1685+
void AllocatorFacade::Accept(const phi::Place& place,
1686+
AllocatorVisitor* visitor) {
1687+
GetPrivate()
1688+
->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)
1689+
->Accept(visitor);
1690+
}
1691+
16851692
size_t AllocatorFacade::Compact(const phi::Place& place) {
16861693
return GetPrivate()
16871694
->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)

paddle/phi/core/memory/allocation/allocator_facade.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
namespace paddle {
3434
namespace memory {
35+
class AllocatorVisitor;
3536
namespace allocation {
3637

3738
// Allocator Facade is the interface exposed to other modules.
@@ -73,6 +74,21 @@ class AllocatorFacade {
7374
// Compact memory of free blocks held by the VmmAllocator.
7475
size_t Compact(const phi::Place& place);
7576

77+
/**
78+
* @brief Accepts an AllocatorVisitor and iterates over all nested Allocator
79+
* instances associated with a specific memory location (Place), executing the
80+
* visitor's corresponding Visit method for each one.
81+
*
82+
* This method facilitates the traversal of the Allocator hierarchy for the
83+
* given memory Place, allowing the visitor to collect statistics or perform
84+
* operations on all constituent allocators.
85+
*
86+
* @param place The memory location
87+
* @param visitor A pointer to the AllocatorVisitor whose Visit methods will
88+
* be executed against the nested allocators found at the specified Place.
89+
*/
90+
void Accept(const phi::Place& place, AllocatorVisitor* visitor);
91+
7692
std::shared_ptr<Allocation> AllocShared(const phi::Place& place,
7793
size_t size,
7894
const phi::Stream& stream);

paddle/phi/core/memory/allocation/retry_allocator.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include "paddle/phi/core/enforce.h"
2525
#include "paddle/phi/core/memory/allocation/allocator.h"
26+
#include "paddle/phi/core/memory/mem_visitor.h"
2627

2728
namespace paddle {
2829
namespace memory {
@@ -49,6 +50,10 @@ class PADDLE_API RetryAllocator : public Allocator {
4950
common::errors::PreconditionNotMet(
5051
"Underlying allocator of RetryAllocator is not thread-safe"));
5152
}
53+
std::shared_ptr<Allocator>& GetUnderLyingAllocator() {
54+
return underlying_allocator_;
55+
}
56+
void Accept(AllocatorVisitor* visitor) override { visitor->Visit(this); }
5257

5358
bool IsAllocThreadSafe() const override { return true; }
5459

paddle/phi/core/memory/allocation/stat_allocator.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#pragma once
1616

1717
#include "paddle/phi/core/memory/allocation/allocator.h"
18+
#include "paddle/phi/core/memory/mem_visitor.h"
1819
#include "paddle/phi/core/memory/stats.h"
1920
#include "paddle/phi/core/platform/profiler/mem_tracing.h"
2021

@@ -28,6 +29,10 @@ class StatAllocator : public Allocator {
2829
: underlying_allocator_(std::move(underlying_allocator)) {}
2930

3031
bool IsAllocThreadSafe() const override { return true; }
32+
void Accept(AllocatorVisitor* visitor) override { visitor->Visit(this); }
33+
std::shared_ptr<Allocator>& GetUnderLyingAllocator() {
34+
return underlying_allocator_;
35+
}
3136

3237
protected:
3338
void FreeImpl(phi::Allocation* allocation) override {

paddle/phi/core/memory/allocation/stream_safe_cuda_allocator.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "paddle/phi/common/place.h"
2222
#include "paddle/phi/core/memory/allocation/allocator.h"
2323
#include "paddle/phi/core/memory/allocation/spin_lock.h"
24+
#include "paddle/phi/core/memory/mem_visitor.h"
2425

2526
#ifdef PADDLE_WITH_CUDA
2627
#include <cuda_runtime.h>
@@ -76,9 +77,16 @@ class StreamSafeCUDAAllocator
7677
bool in_cuda_graph_capturing = false);
7778
~StreamSafeCUDAAllocator();
7879

80+
std::shared_ptr<Allocator> &GetUnderLyingAllocator() {
81+
return underlying_allocator_;
82+
}
83+
std::vector<StreamSafeCUDAAllocator *> &GetAllocatorByPlace() {
84+
return allocator_map_[place_];
85+
}
7986
bool IsAllocThreadSafe() const override;
8087
gpuStream_t GetDefaultStream() const;
8188
void SetDefaultStream(gpuStream_t stream);
89+
void Accept(AllocatorVisitor *visitor) override { visitor->Visit(this); }
8290

8391
protected:
8492
phi::Allocation *AllocateImpl(size_t size) override;

paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,23 @@ phi::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks(
322322
return nullptr;
323323
}
324324

325+
std::pair<size_t, size_t>
326+
VirtualMemoryAutoGrowthBestFitAllocator::SumLargestFreeBlockSizes(
327+
int32_t n) const {
328+
if (n <= 0 || free_blocks_.empty()) return std::make_pair(0, 0);
329+
330+
size_t large_size = free_blocks_.rbegin()->first.first;
331+
size_t total_size = 0;
332+
int32_t count = 0;
333+
334+
for (auto it = free_blocks_.rbegin(); it != free_blocks_.rend() && count < n;
335+
++it, ++count) {
336+
total_size += it->first.first;
337+
}
338+
339+
return std::make_pair(large_size, total_size);
340+
}
341+
325342
void VirtualMemoryAutoGrowthBestFitAllocator::DumpInfo(
326343
std::string phase) const {
327344
size_t total = 0, free = 0, used = 0;

paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "paddle/phi/core/memory/allocation/allocator.h"
2323
#include "paddle/phi/core/memory/allocation/spin_lock.h"
2424
#include "paddle/phi/core/memory/mem_utils.h"
25+
#include "paddle/phi/core/memory/mem_visitor.h"
2526

2627
namespace paddle {
2728
namespace memory {
@@ -43,6 +44,12 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator {
4344
size_t alignment,
4445
const phi::GPUPlace &place);
4546

47+
std::shared_ptr<Allocator> &GetUnderLyingAllocator() {
48+
return underlying_allocator_;
49+
}
50+
std::pair<size_t, size_t> SumLargestFreeBlockSizes(int32_t n) const;
51+
void Accept(AllocatorVisitor *visitor) override { visitor->Visit(this); }
52+
4653
bool IsAllocThreadSafe() const override { return true; }
4754
void PreAlloc() override;
4855
void PreAllocate(size_t size);

0 commit comments

Comments
 (0)