llmq/src/utilities/lazy_allocator.cpp at dev · IST-DASLab/llmq · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// Copyright (c) 2025, IST Austria, developed by Erik Schultheis
// SPDX-License-Identifier: Apache-2.0
//

#include "lazy_allocator.h"
#include "utilities/tensor.h"
#include "utilities/allocator.h"
#include "utilities/stack.h"
#include "utilities/tensor_container.h"

void LazyAllocator::allocate(Tensor* target, ETensorDType dtype, const std::vector<long>& shape) {
    target->Data = nullptr;
    target->Rank = shape.size();
    target->DType = dtype;
    std::copy(shape.begin(), shape.end(), target->Sizes.begin());
    mTargets.push_back(target);
}

void LazyAllocator::allocate(Tensor *target) {
    target->Data = nullptr;
    mTargets.push_back(target);
}

void LazyAllocator::allocate(SimpleTensorContainer& target) {
    std::size_t count = target.num_tensors();
    for(std::size_t i = 0; i < count; ++i) {
        allocate(&target.get_tensor(i));
    }
}

Tensor LazyAllocator::commit(TensorAllocator& storage, EAllocationType type, const char* name) {
    std::size_t total_size = 0;
    constexpr std::size_t page_size = 4096;
    for(auto& target: mTargets) {
        std::size_t tgt_size = div_ceil(target->bytes(), page_size) * page_size;
        total_size += tgt_size;
    }

    Tensor backing = storage.allocate(ETensorDType::BYTE, name, type, {(long)total_size});
    auto* ptr = backing.get<std::byte>();
    for(auto& target: mTargets) {
        target->Data = ptr;
        target->Device = backing.Device;
        ptr += div_ceil(target->bytes(), page_size) * page_size;
    }

    mTargets.clear();

    return backing;
}

Tensor LazyAllocator::commit(DeviceMemoryStack& storage, const char* name) {
    std::size_t total_size = 0;
    constexpr std::size_t page_size = 4096;
    for(auto& target: mTargets) {
        std::size_t tgt_size = div_ceil(target->bytes(), page_size) * page_size;
        total_size += tgt_size;
    }

    Tensor backing = storage.allocate(ETensorDType::BYTE, {(long)total_size}, name);
    // we may run the allocator in "tracing" mode, where we don't get any memory allocated from `storage`.
    // in that case, `backing` may be nullptr, and we should leave target tensors empty
    if (backing) {
        auto* ptr = backing.get<std::byte>();
        for(auto& target: mTargets) {
            target->Data = ptr;
            target->Device = backing.Device;
            ptr += div_ceil(target->bytes(), page_size) * page_size;
        }
    }

    mTargets.clear();

    return backing;
}