Skip to content

Commit 0ac5213

Browse files
authored
Update staging buffer allocation flags by transfer direction
Differential Revision: D89086669 Pull Request resolved: #16268
1 parent b7dc758 commit 0ac5213

File tree

9 files changed

+81
-25
lines changed

9 files changed

+81
-25
lines changed

backends/vulkan/runtime/api/containers/StagingBuffer.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,13 @@ class StagingBuffer final {
3131
StagingBuffer(
3232
Context* context_p,
3333
const vkapi::ScalarType dtype,
34-
const size_t numel)
34+
const size_t numel,
35+
const vkapi::CopyDirection direction)
3536
: context_p_(context_p),
3637
dtype_(dtype),
3738
vulkan_buffer_(context_p_->adapter_ptr()->vma().create_staging_buffer(
38-
element_size(dtype_) * numel)),
39+
element_size(dtype_) * numel,
40+
direction)),
3941
mapped_data_(nullptr) {}
4042

4143
StagingBuffer(const StagingBuffer&) = delete;

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,10 +543,11 @@ ValueRef ComputeGraph::add_tensorref(
543543

544544
ValueRef ComputeGraph::add_staging(
545545
const vkapi::ScalarType dtype,
546-
const size_t numel) {
546+
const size_t numel,
547+
const vkapi::CopyDirection direction) {
547548
ValueRef idx(static_cast<int>(values_.size()));
548549
check_no_active_value_ptrs();
549-
values_.emplace_back(api::StagingBuffer(context(), dtype, numel));
550+
values_.emplace_back(api::StagingBuffer(context(), dtype, numel, direction));
550551
return idx;
551552
}
552553

@@ -593,7 +594,8 @@ ValueRef ComputeGraph::set_input_tensor(
593594
// For texture storage, the buffer size needs to account for the zero
594595
// padding applied by unused texel elements.
595596
size_t buf_numel = get_tensor(idx)->staging_buffer_numel();
596-
ValueRef staging_idx = add_staging(staging_dtype, buf_numel);
597+
ValueRef staging_idx = add_staging(
598+
staging_dtype, buf_numel, vkapi::CopyDirection::HOST_TO_DEVICE);
597599
add_staging_to_tensor_node(*this, staging_idx, idx);
598600
inputs_.push_back({idx, staging_idx});
599601
return staging_idx;
@@ -617,7 +619,8 @@ ValueRef ComputeGraph::set_output_tensor(
617619
// For texture storage, the buffer size needs to account for the zero
618620
// padding applied by unused texel elements.
619621
size_t buf_numel = get_tensor(idx)->staging_buffer_numel();
620-
ValueRef staging_idx = add_staging(staging_dtype, buf_numel);
622+
ValueRef staging_idx = add_staging(
623+
staging_dtype, buf_numel, vkapi::CopyDirection::DEVICE_TO_HOST);
621624
// We only run this when the tensor is non-empty. When the underlying
622625
// tensor is empty (e.g. padded_numel == 0), we do not allocate a VkImage to
623626
// tensor, we will not be able to bind the node for execution.

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -762,7 +762,10 @@ class ComputeGraph final {
762762
* use memory that is visible to both the CPU and GPU, and therefore is used
763763
* as a intermediary when transferring data between the CPU and GPU.
764764
*/
765-
ValueRef add_staging(const vkapi::ScalarType dtype, const size_t numel);
765+
ValueRef add_staging(
766+
const vkapi::ScalarType dtype,
767+
const size_t numel,
768+
const vkapi::CopyDirection direction);
766769

767770
ValueRef add_none();
768771

backends/vulkan/runtime/graph/ops/PrepackNode.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,21 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
5353
const std::vector<int64_t> packed_sizes = graph->sizes_of(packed_);
5454
size_t numel = utils::multiply_integers(packed_sizes);
5555
api::StagingBuffer staging(
56-
graph->context(), graph->dtype_of(packed_), numel);
56+
graph->context(),
57+
graph->dtype_of(packed_),
58+
numel,
59+
vkapi::CopyDirection::HOST_TO_DEVICE);
5760
staging.set_staging_zeros();
5861
return staging;
5962
}
6063

6164
TensorRefPtr tref = graph->get_tref(tref_);
6265
size_t numel = utils::multiply_integers(tref->sizes);
63-
api::StagingBuffer staging(graph->context(), tref->dtype, numel);
66+
api::StagingBuffer staging(
67+
graph->context(),
68+
tref->dtype,
69+
numel,
70+
vkapi::CopyDirection::HOST_TO_DEVICE);
6471
graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
6572
size_t nbytes = numel * vkapi::element_size(tref->dtype);
6673
staging.copy_from(tref->data, nbytes);

backends/vulkan/runtime/vk_api/memory/Allocator.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,19 +141,25 @@ VulkanImage Allocator::create_image(
141141
allocate_memory);
142142
}
143143

144-
VulkanBuffer Allocator::create_staging_buffer(const VkDeviceSize size) {
144+
VulkanBuffer Allocator::create_staging_buffer(
145+
const VkDeviceSize size,
146+
const CopyDirection direction) {
145147
const VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
146148

147149
VmaAllocationCreateInfo alloc_create_info = {};
148-
alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY;
150+
alloc_create_info.flags =
151+
DEFAULT_ALLOCATION_STRATEGY | VMA_ALLOCATION_CREATE_MAPPED_BIT;
149152
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
150153

151154
// Staging buffers are accessed by both the CPU and GPU, so set the
152155
// appropriate flags to indicate that the host device will be accessing
153156
// the data from this buffer.
154-
alloc_create_info.flags |=
155-
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
156-
VMA_ALLOCATION_CREATE_MAPPED_BIT;
157+
if (direction == CopyDirection::HOST_TO_DEVICE) {
158+
alloc_create_info.flags |=
159+
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
160+
} else {
161+
alloc_create_info.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
162+
}
157163
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
158164
alloc_create_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
159165
alloc_create_info.preferredFlags =

backends/vulkan/runtime/vk_api/memory/Allocator.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,17 @@
2323
namespace vkcompute {
2424
namespace vkapi {
2525

26+
/**
27+
* Indicates the direction of a copy to or from a staging buffer.
28+
*
29+
* HOST_TO_DEVICE: Data is written by the host and read by the device.
30+
* DEVICE_TO_HOST: Data is written by the device and read by the host.
31+
*/
32+
enum class CopyDirection : uint8_t {
33+
HOST_TO_DEVICE = 0u,
34+
DEVICE_TO_HOST = 1u,
35+
};
36+
2637
constexpr VmaAllocationCreateFlags DEFAULT_ALLOCATION_STRATEGY =
2738
VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT;
2839

@@ -66,7 +77,7 @@ class Allocator final {
6677
const bool allow_transfer = false,
6778
const bool allocate_memory = true);
6879

69-
VulkanBuffer create_staging_buffer(const VkDeviceSize);
80+
VulkanBuffer create_staging_buffer(const VkDeviceSize, const CopyDirection);
7081

7182
VulkanBuffer create_storage_buffer(
7283
const VkDeviceSize,

backends/vulkan/test/utils/test_utils.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,11 @@ void record_matmul_texture3d(
399399
_(int8_t, QInt8)
400400

401401
void fill_vtensor(api::vTensor& vten, std::vector<float>& data) {
402-
api::StagingBuffer staging_buffer(api::context(), vten.dtype(), data.size());
402+
api::StagingBuffer staging_buffer(
403+
api::context(),
404+
vten.dtype(),
405+
data.size(),
406+
vkapi::CopyDirection::HOST_TO_DEVICE);
403407

404408
#define CASE(ctype, name) \
405409
case vkapi::ScalarType::name: { \
@@ -486,7 +490,10 @@ void fill_vtensor(
486490

487491
void extract_vtensor(api::vTensor& vten, std::vector<float>& data) {
488492
api::StagingBuffer staging_buffer(
489-
api::context(), vten.dtype(), vten.staging_buffer_numel());
493+
api::context(),
494+
vten.dtype(),
495+
vten.staging_buffer_numel(),
496+
vkapi::CopyDirection::DEVICE_TO_HOST);
490497

491498
if (vten.storage_type() == utils::StorageType::BUFFER) {
492499
record_buffer_to_nchw_op(api::context(), vten, staging_buffer.buffer());

backends/vulkan/test/utils/test_utils.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,17 @@
4242
vkcompute::api::StagingBuffer staging_buffer_##tensor( \
4343
vkcompute::api::context(), \
4444
vkapi::kFloat, \
45-
tensor.staging_buffer_numel()); \
45+
tensor.staging_buffer_numel(), \
46+
vkapi::CopyDirection::HOST_TO_DEVICE); \
4647
record_nchw_to_image_op( \
4748
vkcompute::api::context(), staging_buffer_##tensor.buffer(), tensor);
4849

4950
#define DEFINE_STAGING_BUFFER_AND_RECORD_FROM_GPU_FOR(tensor) \
5051
vkcompute::api::StagingBuffer staging_buffer_##tensor( \
5152
vkcompute::api::context(), \
5253
vkapi::kFloat, \
53-
tensor.staging_buffer_numel()); \
54+
tensor.staging_buffer_numel(), \
55+
vkapi::CopyDirection::DEVICE_TO_HOST); \
5456
record_image_to_nchw_op( \
5557
vkcompute::api::context(), tensor, staging_buffer_##tensor.buffer());
5658

backends/vulkan/test/vulkan_compute_api_test.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,8 @@ TEST_F(VulkanComputeAPITest, spec_var_classes_test) {
530530

531531
TEST_F(VulkanComputeAPITest, spec_var_shader_test) {
532532
size_t len = 16;
533-
StagingBuffer buffer(context(), vkapi::kFloat, len);
533+
StagingBuffer buffer(
534+
context(), vkapi::kFloat, len, vkapi::CopyDirection::DEVICE_TO_HOST);
534535

535536
float scale = 3.0f;
536537
float offset = 1.5f;
@@ -602,7 +603,10 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
602603
}
603604

604605
StagingBuffer staging_buffer(
605-
context(), vkapi::kFloat, a.staging_buffer_numel());
606+
context(),
607+
vkapi::kFloat,
608+
a.staging_buffer_numel(),
609+
vkapi::CopyDirection::DEVICE_TO_HOST);
606610
record_image_to_nchw_op(context(), a, staging_buffer.buffer());
607611

608612
submit_to_gpu();
@@ -622,7 +626,8 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
622626

623627
template <typename T, vkapi::ScalarType dtype>
624628
void test_storage_buffer_type(const size_t len) {
625-
StagingBuffer buffer(context(), dtype, len);
629+
StagingBuffer buffer(
630+
context(), dtype, len, vkapi::CopyDirection::DEVICE_TO_HOST);
626631

627632
std::string kernel_name("idx_fill_buffer");
628633
switch (dtype) {
@@ -2013,7 +2018,11 @@ void run_from_gpu_test(
20132018
vten.sizes_ubo());
20142019
}
20152020

2016-
StagingBuffer staging_buffer(context(), dtype, vten.staging_buffer_numel());
2021+
StagingBuffer staging_buffer(
2022+
context(),
2023+
dtype,
2024+
vten.staging_buffer_numel(),
2025+
vkapi::CopyDirection::DEVICE_TO_HOST);
20172026

20182027
if (dtype == vkapi::kChar &&
20192028
!context()->adapter_ptr()->has_full_int8_buffers_support()) {
@@ -2049,7 +2058,10 @@ void round_trip_test(
20492058

20502059
// Create and fill input staging buffer
20512060
StagingBuffer staging_buffer_in(
2052-
context(), dtype, vten.staging_buffer_numel());
2061+
context(),
2062+
dtype,
2063+
vten.staging_buffer_numel(),
2064+
vkapi::CopyDirection::HOST_TO_DEVICE);
20532065

20542066
std::vector<T> data_in(staging_buffer_in.numel());
20552067
for (int i = 0; i < staging_buffer_in.numel(); i++) {
@@ -2059,7 +2071,10 @@ void round_trip_test(
20592071

20602072
// Output staging buffer
20612073
StagingBuffer staging_buffer_out(
2062-
context(), dtype, vten.staging_buffer_numel());
2074+
context(),
2075+
dtype,
2076+
vten.staging_buffer_numel(),
2077+
vkapi::CopyDirection::DEVICE_TO_HOST);
20632078

20642079
record_nchw_to_image_op(context(), staging_buffer_in.buffer(), vten);
20652080

0 commit comments

Comments
 (0)