diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index cfeaf9a8117ebc..c8c6677025f1c0 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -3612,7 +3612,6 @@ All parameter, weight, gradient are variables in Paddle. return platform::GetDeviceProperties(id); }, py::return_value_policy::copy); - py::class_(m, "_gpuDeviceProperties", py::module_local()) .def_property_readonly( "name", [](const gpuDeviceProp &prop) { return prop.name; }) @@ -3653,7 +3652,11 @@ All parameter, weight, gradient are variables in Paddle. m.def("nvprof_disable_record_event", platform::NvprofDisableRecordEvent); #endif #endif - +#if defined(PADDLE_WITH_CUDA) + m.def("vmm_max_free_size", [] { + memory::VmmMaxFreeSize(phi::GPUPlace(platform::GetCurrentDeviceId()), 1); + }); +#endif #ifdef PADDLE_WITH_CUSTOM_DEVICE m.def( "get_device_properties", diff --git a/paddle/phi/core/memory/CMakeLists.txt b/paddle/phi/core/memory/CMakeLists.txt index 188ca77b7668c4..5e904c771a9c41 100644 --- a/paddle/phi/core/memory/CMakeLists.txt +++ b/paddle/phi/core/memory/CMakeLists.txt @@ -1,3 +1,10 @@ add_subdirectory(allocation) -collect_srcs(core_srcs SRCS malloc.cc memcpy.cc stats.cc mem_utils.cc) +collect_srcs( + core_srcs + SRCS + malloc.cc + memcpy.cc + stats.cc + mem_utils.cc + mem_visitor.cc) diff --git a/paddle/phi/core/memory/allocation/allocator.h b/paddle/phi/core/memory/allocation/allocator.h index bb328ac9dd95dc..1d63ad93a02b50 100644 --- a/paddle/phi/core/memory/allocation/allocator.h +++ b/paddle/phi/core/memory/allocation/allocator.h @@ -25,6 +25,7 @@ #include "paddle/phi/core/allocator.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/memory/allocation/inlined_vector.h" +#include "paddle/phi/core/memory/mem_visitor.h" #include "paddle/phi/core/platform/device/gpu/gpu_types.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -202,6 +203,8 @@ class PADDLE_API Allocator : public phi::Allocator { uint64_t Release(const phi::Place& place) { return ReleaseImpl(place); } size_t Compact(const phi::Place& place) { return CompactImpl(place); } + virtual void Accept(AllocatorVisitor* visitor) { visitor->Visit(this); } + protected: virtual phi::Allocation* AllocateImpl(size_t size) = 0; virtual void FreeImpl(phi::Allocation* allocation); diff --git a/paddle/phi/core/memory/allocation/allocator_facade.cc b/paddle/phi/core/memory/allocation/allocator_facade.cc index b1a2cfd81b2450..107a92df361d22 100644 --- a/paddle/phi/core/memory/allocation/allocator_facade.cc +++ b/paddle/phi/core/memory/allocation/allocator_facade.cc @@ -1663,6 +1663,13 @@ uint64_t AllocatorFacade::Release(const phi::Place& place) { ->Release(place); } +void AllocatorFacade::Accept(const phi::Place& place, + AllocatorVisitor* visitor) { + GetPrivate() + ->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1) + ->Accept(visitor); +} + size_t AllocatorFacade::Compact(const phi::Place& place) { return GetPrivate() ->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1) diff --git a/paddle/phi/core/memory/allocation/allocator_facade.h b/paddle/phi/core/memory/allocation/allocator_facade.h index cb3f9bdb93fffc..855fc3563aabcb 100644 --- a/paddle/phi/core/memory/allocation/allocator_facade.h +++ b/paddle/phi/core/memory/allocation/allocator_facade.h @@ -32,6 +32,7 @@ namespace paddle { namespace memory { +class AllocatorVisitor; namespace allocation { // Allocator Facade is the interface exposed to other modules. @@ -73,6 +74,21 @@ class AllocatorFacade { // Compact memory of free blocks held by the VmmAllocator. size_t Compact(const phi::Place& place); + /** + * @brief Accepts an AllocatorVisitor and iterates over all nested Allocator + * instances associated with a specific memory location (Place), executing the + * visitor's corresponding Visit method for each one. + * + * This method facilitates the traversal of the Allocator hierarchy for the + * given memory Place, allowing the visitor to collect statistics or perform + * operations on all constituent allocators. + * + * @param place The memory location + * @param visitor A pointer to the AllocatorVisitor whose Visit methods will + * be executed against the nested allocators found at the specified Place. + */ + void Accept(const phi::Place& place, AllocatorVisitor* visitor); + std::shared_ptr AllocShared(const phi::Place& place, size_t size, const phi::Stream& stream); diff --git a/paddle/phi/core/memory/allocation/retry_allocator.h b/paddle/phi/core/memory/allocation/retry_allocator.h index 887a84a09ae9ac..ea0700e078c27b 100644 --- a/paddle/phi/core/memory/allocation/retry_allocator.h +++ b/paddle/phi/core/memory/allocation/retry_allocator.h @@ -23,6 +23,7 @@ #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/memory/allocation/allocator.h" +#include "paddle/phi/core/memory/mem_visitor.h" namespace paddle { namespace memory { @@ -49,6 +50,10 @@ class PADDLE_API RetryAllocator : public Allocator { common::errors::PreconditionNotMet( "Underlying allocator of RetryAllocator is not thread-safe")); } + std::shared_ptr& GetUnderLyingAllocator() { + return underlying_allocator_; + } + void Accept(AllocatorVisitor* visitor) override { visitor->Visit(this); } bool IsAllocThreadSafe() const override { return true; } diff --git a/paddle/phi/core/memory/allocation/stat_allocator.h b/paddle/phi/core/memory/allocation/stat_allocator.h index ad5f2ab1e77c61..32b068093eaa57 100644 --- a/paddle/phi/core/memory/allocation/stat_allocator.h +++ b/paddle/phi/core/memory/allocation/stat_allocator.h @@ -15,6 +15,7 @@ #pragma once #include "paddle/phi/core/memory/allocation/allocator.h" +#include "paddle/phi/core/memory/mem_visitor.h" #include "paddle/phi/core/memory/stats.h" #include "paddle/phi/core/platform/profiler/mem_tracing.h" @@ -28,6 +29,10 @@ class StatAllocator : public Allocator { : underlying_allocator_(std::move(underlying_allocator)) {} bool IsAllocThreadSafe() const override { return true; } + void Accept(AllocatorVisitor* visitor) override { visitor->Visit(this); } + std::shared_ptr& GetUnderLyingAllocator() { + return underlying_allocator_; + } protected: void FreeImpl(phi::Allocation* allocation) override { diff --git a/paddle/phi/core/memory/allocation/stream_safe_cuda_allocator.h b/paddle/phi/core/memory/allocation/stream_safe_cuda_allocator.h index 4e8912a3e958cd..888f5418e27e59 100644 --- a/paddle/phi/core/memory/allocation/stream_safe_cuda_allocator.h +++ b/paddle/phi/core/memory/allocation/stream_safe_cuda_allocator.h @@ -21,6 +21,7 @@ #include "paddle/phi/common/place.h" #include "paddle/phi/core/memory/allocation/allocator.h" #include "paddle/phi/core/memory/allocation/spin_lock.h" +#include "paddle/phi/core/memory/mem_visitor.h" #ifdef PADDLE_WITH_CUDA #include @@ -76,9 +77,16 @@ class StreamSafeCUDAAllocator bool in_cuda_graph_capturing = false); ~StreamSafeCUDAAllocator(); + std::shared_ptr &GetUnderLyingAllocator() { + return underlying_allocator_; + } + std::vector &GetAllocatorByPlace() { + return allocator_map_[place_]; + } bool IsAllocThreadSafe() const override; gpuStream_t GetDefaultStream() const; void SetDefaultStream(gpuStream_t stream); + void Accept(AllocatorVisitor *visitor) override { visitor->Visit(this); } protected: phi::Allocation *AllocateImpl(size_t size) override; diff --git a/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc b/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc index 5ee4d70a1d8555..14ff9199f04e2e 100644 --- a/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc +++ b/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc @@ -264,6 +264,23 @@ phi::Allocation *VirtualMemoryAutoGrowthBestFitAllocator::AllocFromFreeBlocks( return nullptr; } +std::pair +VirtualMemoryAutoGrowthBestFitAllocator::SumLargestFreeBlockSizes( + int32_t n) const { + if (n <= 0 || free_blocks_.empty()) return std::make_pair(0, 0); + + size_t large_size = free_blocks_.rbegin()->first.first; + size_t total_size = 0; + int32_t count = 0; + + for (auto it = free_blocks_.rbegin(); it != free_blocks_.rend() && count < n; + ++it, ++count) { + total_size += it->first.first; + } + + return std::make_pair(large_size, total_size); +} + void VirtualMemoryAutoGrowthBestFitAllocator::DumpInfo( std::string phase) const { size_t total = 0, free = 0, used = 0; diff --git a/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h b/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h index 1c71dc159cfa73..16f0058742b4d5 100644 --- a/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h +++ b/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h @@ -22,6 +22,7 @@ #include "paddle/phi/core/memory/allocation/allocator.h" #include "paddle/phi/core/memory/allocation/spin_lock.h" #include "paddle/phi/core/memory/mem_utils.h" +#include "paddle/phi/core/memory/mem_visitor.h" namespace paddle { namespace memory { @@ -43,6 +44,12 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator { size_t alignment, const phi::GPUPlace &place); + std::shared_ptr &GetUnderLyingAllocator() { + return underlying_allocator_; + } + std::pair SumLargestFreeBlockSizes(int32_t n) const; + void Accept(AllocatorVisitor *visitor) override { visitor->Visit(this); } + bool IsAllocThreadSafe() const override { return true; } protected: diff --git a/paddle/phi/core/memory/malloc.cc b/paddle/phi/core/memory/malloc.cc index 8f2d12885e2387..a67b747443cb9d 100644 --- a/paddle/phi/core/memory/malloc.cc +++ b/paddle/phi/core/memory/malloc.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/phi/common/place.h" #include "paddle/phi/core/memory/allocation/allocator_facade.h" +#include "paddle/phi/core/memory/mem_visitor.h" #include "paddle/phi/core/stream.h" namespace paddle::memory { @@ -80,6 +81,17 @@ gpuStream_t GetStream(const std::shared_ptr& allocation) { #endif +#if defined(PADDLE_WITH_CUDA) +std::pair VmmMaxFreeSize(const phi::GPUPlace& place, + int32_t n) { + FreeMemoryMetricsVisitor free_memory_metrics_visitor(n); + allocation::AllocatorFacade::Instance().Accept(place, + &free_memory_metrics_visitor); + return std::make_pair(free_memory_metrics_visitor.GetLargeSize(), + free_memory_metrics_visitor.GetSumSize()); +} +#endif + #ifdef PADDLE_WITH_XPU bool RecordStream(std::shared_ptr allocation, XPUStream stream) { return allocation::AllocatorFacade::Instance().RecordStream(allocation, diff --git a/paddle/phi/core/memory/malloc.h b/paddle/phi/core/memory/malloc.h index d1862731f8e474..fa30ef9f05dd27 100644 --- a/paddle/phi/core/memory/malloc.h +++ b/paddle/phi/core/memory/malloc.h @@ -69,6 +69,12 @@ void EraseStream(std::shared_ptr allocation, gpuStream_t stream); PADDLE_API gpuStream_t GetStream(const std::shared_ptr& allocation); #endif +#if defined(PADDLE_WITH_CUDA) +// return a pair of +PADDLE_API extern std::pair VmmMaxFreeSize( + const phi::GPUPlace& place, int32_t n); +#endif + #ifdef PADDLE_WITH_XPU bool RecordStream(std::shared_ptr allocation, XPUStream stream); #endif diff --git a/paddle/phi/core/memory/mem_visitor.cc b/paddle/phi/core/memory/mem_visitor.cc new file mode 100644 index 00000000000000..23be65d5aa14e9 --- /dev/null +++ b/paddle/phi/core/memory/mem_visitor.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/memory/mem_visitor.h" +#include "paddle/phi/core/memory/allocation/allocator.h" +#include "paddle/phi/core/memory/allocation/retry_allocator.h" +#include "paddle/phi/core/memory/allocation/spin_lock.h" +#include "paddle/phi/core/memory/allocation/stat_allocator.h" + +#ifdef PADDLE_WITH_CUDA +#include "paddle/phi/core/memory/allocation/stream_safe_cuda_allocator.h" +#include "paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h" +#endif + +namespace paddle { +namespace memory { + +void AllocatorVisitor::Visit(RetryAllocator* allocator) { + allocator->GetUnderLyingAllocator()->Accept(this); +} + +void AllocatorVisitor::Visit(StatAllocator* allocator) { + allocator->GetUnderLyingAllocator()->Accept(this); +} + +#ifdef PADDLE_WITH_CUDA +void AllocatorVisitor::Visit(StreamSafeCUDAAllocator* allocator) { + const std::vector& allocators = + allocator->GetAllocatorByPlace(); + for (StreamSafeCUDAAllocator* allocator : allocators) { + allocator->GetUnderLyingAllocator()->Accept(this); + } +} + +void AllocatorVisitor::Visit( + VirtualMemoryAutoGrowthBestFitAllocator* allocator) { + allocator->GetUnderLyingAllocator()->Accept(this); +} + +void FreeMemoryMetricsVisitor::Visit( + VirtualMemoryAutoGrowthBestFitAllocator* allocator) { + auto [large_size, sum_size] = + allocator->SumLargestFreeBlockSizes(nums_blocks_); + large_size_ = std::max(large_size_, large_size); + sum_size_ = std::max(sum_size_, sum_size); + VLOG(1) << "Visit VirtualMemoryAutoGrowthBestFitAllocator large_free_size:" + << large_size_ << " sum_free_size:" << sum_size_; +} +#endif +} // namespace memory +} // namespace paddle diff --git a/paddle/phi/core/memory/mem_visitor.h b/paddle/phi/core/memory/mem_visitor.h new file mode 100644 index 00000000000000..29752135eae8bb --- /dev/null +++ b/paddle/phi/core/memory/mem_visitor.h @@ -0,0 +1,130 @@ +// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "paddle/phi/core/enforce.h" + +namespace paddle { +namespace memory { + +namespace allocation { +class Allocator; +class RetryAllocator; +class StatAllocator; +class StreamSafeCUDAAllocator; +class VirtualMemoryAutoGrowthBestFitAllocator; +} // namespace allocation + +using allocation::Allocator; +using allocation::RetryAllocator; +using allocation::StatAllocator; +using allocation::StreamSafeCUDAAllocator; +using allocation::VirtualMemoryAutoGrowthBestFitAllocator; + +/** + * @brief AllocatorVisitorReqImpl serves as the Abstract Visitor interface in + * the Visitor design pattern. + * + * It defines the pure virtual function signatures for all required Visit + * methods necessary to interact with different concrete allocator types. + * Derived classes must implement these Visit methods to perform specific + * operations on each allocator type. + */ +class AllocatorVisitorReqImpl { + public: + virtual ~AllocatorVisitorReqImpl() = default; + virtual void Visit(RetryAllocator* allocator) = 0; + virtual void Visit(StatAllocator* allocator) = 0; + virtual void Visit(Allocator* allocator) {} +#ifdef PADDLE_WITH_CUDA + virtual void Visit(StreamSafeCUDAAllocator* allocator) = 0; + virtual void Visit(VirtualMemoryAutoGrowthBestFitAllocator* allocator) = 0; +#endif +}; + +/** + * @brief AllocatorVisitor is an abstract base class that implements the + * AllocatorVisitorReqImpl interface. + * + * It inherits all the Visit interfaces and can provide default (often recursive + * call) implementations for them. It serves as a convenient base class for + * concrete visitors (like FreeMemoryMetricsVisitor), simplifying the + * implementation by handling cases that do not require specialized logic. + */ +class AllocatorVisitor : public AllocatorVisitorReqImpl { + public: + virtual ~AllocatorVisitor() = default; + virtual void Visit(RetryAllocator* allocator); + virtual void Visit(StatAllocator* allocator); + virtual void Visit(Allocator* allocator) {} +#ifdef PADDLE_WITH_CUDA + virtual void Visit(StreamSafeCUDAAllocator* allocator); + virtual void Visit(VirtualMemoryAutoGrowthBestFitAllocator* allocator); +#endif +}; + +#ifdef PADDLE_WITH_CUDA +/** + * @brief FreeMemoryMetricsVisitor is a Concrete Visitor class designed to + * inspect allocators for free memory information. + * + * Its primary goal is to gather statistics, specifically focusing on the + * largest contiguous free block size within the visited allocators. Currently, + * it provides specialized logic for the + * VirtualMemoryAutoGrowthBestFitAllocator. + */ +class FreeMemoryMetricsVisitor : public AllocatorVisitor { + public: + /** + * @brief Constructor for FreeMemoryMetricsVisitor. + * @param nums_blocks The number of largest free blocks to potentially track + * (defaults to 1). + */ + explicit FreeMemoryMetricsVisitor(int32_t nums_blocks = 1) + : nums_blocks_(nums_blocks) {} + + /** + * @brief Implements the visit operation for + * VirtualMemoryAutoGrowthBestFitAllocator. This is where the logic to query + * and record the largest and total free sizes resides. + * @param allocator The VirtualMemoryAutoGrowthBestFitAllocator instance to + * visit. + */ + void Visit(VirtualMemoryAutoGrowthBestFitAllocator* allocator) override; + + /** + * @brief Retrieves the size of the largest free block found during the + * visitation process. + * @return The size of the largest free block in bytes. + */ + size_t GetLargeSize() const { return large_size_; } + + /** + * @brief Retrieves the total size of all free memory blocks found during the + * visitation process. + * @return The sum of `nums_blocks` free block sizes in bytes. + */ + size_t GetSumSize() const { return sum_size_; } + + private: + int32_t nums_blocks_ = 1; + size_t large_size_ = 0; + size_t sum_size_ = 0; +}; +#endif + +} // namespace memory +} // namespace paddle diff --git a/test/cpp/fluid/memory/CMakeLists.txt b/test/cpp/fluid/memory/CMakeLists.txt index 7216b71e171a22..d287ce62b8ba25 100644 --- a/test/cpp/fluid/memory/CMakeLists.txt +++ b/test/cpp/fluid/memory/CMakeLists.txt @@ -58,6 +58,17 @@ cc_test( SRCS test_aligned_allocator.cc DEPS phi common) +if(WITH_GPU) + if(WIN32) + message(STATUS "Skip allocator_visitor_test on Windows") + else() + nv_test( + allocator_visitor_test + SRCS allocator_visitor_test.cc + DEPS phi common) + endif() +endif() + if(WIN32) cc_test( retry_allocator_test diff --git a/test/cpp/fluid/memory/allocator_visitor_test.cc b/test/cpp/fluid/memory/allocator_visitor_test.cc new file mode 100644 index 00000000000000..1df632e3b1eac4 --- /dev/null +++ b/test/cpp/fluid/memory/allocator_visitor_test.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "paddle/phi/core/memory/allocation/allocator.h" +#include "paddle/phi/core/memory/allocation/cuda_virtual_mem_allocator.h" +#include "paddle/phi/core/memory/allocation/retry_allocator.h" +#include "paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h" +#include "paddle/phi/core/platform/device/gpu/gpu_info.h" +#ifdef PADDLE_WITH_CUDA +#include +#include +#endif +#include "glog/logging.h" +#include "gtest/gtest.h" +namespace paddle { +namespace memory { +namespace allocation { + +TEST(VirtualMemoryAutoGrowthBestFitAllocator, TestAllocatorVisitor) { + FLAGS_v = 1; + auto vmm_cuda_allocator = + std::make_shared(phi::GPUPlace()); + auto vma_allocator = + std::make_shared( + vmm_cuda_allocator, platform::GpuMinChunkSize(), phi::GPUPlace()); + memory::AllocatorVisitor visitor; + vma_allocator->Accept(&visitor); +} + +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/test/legacy_test/test_allocator_visitor.py b/test/legacy_test/test_allocator_visitor.py new file mode 100644 index 00000000000000..36d90cf2fbcff7 --- /dev/null +++ b/test/legacy_test/test_allocator_visitor.py @@ -0,0 +1,72 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle + + +@unittest.skipIf( + (not paddle.is_compiled_with_cuda()) or paddle.is_compiled_with_rocm(), + 'should compile with cuda.', +) +class TestAllocatorVisitor(unittest.TestCase): + def setUp(self): + self.GB = 1000**3 + self.MB = 1000**2 + self.cmds = [ + ["Alloc", 1 * self.GB, "0x100000000"], + ["Alloc", 2 * self.GB, "0x100000001"], + ["Alloc", 1 * self.GB, "0x100000002"], + ["Alloc", 2 * self.GB, "0x100000003"], + ["Free", 1 * self.GB, "0x100000000"], + ["Free", 2 * self.GB, "0x100000003"], + ] + paddle.set_flags({'FLAGS_use_virtual_memory_auto_growth': True}) + + def allocate_cmds(self, cmds): + params = {} + for op, size, ptr in self.cmds: + paddle.device.synchronize() + paddle_reserved1 = paddle.device.cuda.memory_reserved() // self.MB + + if op == "Alloc": + params[ptr] = paddle.randn( + [int(int(size) / 4)], dtype='float32' + ) + if op == "Free" and ptr in params: + del params[ptr] + + paddle.device.synchronize() + paddle_reserved2 = paddle.device.cuda.memory_reserved() // self.MB + paddle_allocated2 = paddle.device.cuda.memory_allocated() // self.MB + paddle_max_reserved = ( + paddle.device.cuda.max_memory_reserved() // self.MB + ) + paddle_max_allocated = ( + paddle.device.cuda.max_memory_allocated() // self.MB + ) + + print( + f"reserved = {paddle_reserved2} allocated = {paddle_allocated2} auto growth = {paddle_reserved2 - paddle_reserved1} max_allocated = {paddle_max_allocated} max_reserved = {paddle_max_reserved}" + ) + return params + + def test_multi_scale_alloc_free(self): + params = self.allocate_cmds(self.cmds) + paddle.core.vmm_max_free_size() + + +if __name__ == '__main__': + unittest.main()