-
Notifications
You must be signed in to change notification settings - Fork 51
Expand file tree
/
Copy pathtensor_arg.h
More file actions
62 lines (52 loc) · 2.59 KB
/
tensor_arg.h
File metadata and controls
62 lines (52 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/*
* Copyright (c) PyPTO Contributors.
* This program is free software, you can redistribute it and/or modify it under the terms and conditions of
* CANN Open Software License Agreement Version 2.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
* -----------------------------------------------------------------------------------------------------------
*/
/**
* ContinuousTensor - Compact tensor descriptor for orchestration arguments
*
* 40 bytes, trivially copyable, suitable for DMA and device-side access.
*/
#pragma once
#include <cstdint>
#include <type_traits>
#include "data_type.h"
constexpr int CONTINUOUS_TENSOR_MAX_DIMS = 5;
struct ContinuousTensor {
uint64_t data; // Host/device memory address
uint32_t shapes[CONTINUOUS_TENSOR_MAX_DIMS]; // Shape per dim (element count)
uint32_t ndims; // Number of dimensions (1..5)
DataType dtype; // DataType : uint8_t
uint8_t child_memory; // 0 = host (default), 1 = child-managed device memory
[[nodiscard]] uint64_t nbytes() const {
uint64_t total = 1;
for (uint32_t i = 0; i < ndims; i++)
total *= shapes[i];
return total * get_element_size(dtype);
}
template <typename T>
T *data_as() const {
return reinterpret_cast<T *>(static_cast<uintptr_t>(data));
}
[[nodiscard]] bool is_child_memory() const { return child_memory != 0; }
};
static_assert(std::is_trivially_copyable_v<ContinuousTensor>, "ContinuousTensor must be trivially copyable for DMA");
static_assert(
sizeof(ContinuousTensor) == 40, "ContinuousTensor size must be exactly 40B (34B fields + 6B tail padding)"
);
/**
* TensorArgType - Distinguishes inputs, outputs, and in-place updates
*/
enum class TensorArgType : int32_t {
INPUT = 0, // Read-only input buffer
OUTPUT = 1, // Write-only output buffer (runtime allocates)
INOUT = 2, // Read-then-write: modifier for downstream
OUTPUT_EXISTING = 3, // Write-only existing tensor: skips OverlapMap lookup, depends on creator
NO_DEP = 4, // No-dependency existing tensor: skips OverlapMap lookup, no publish
};