|
20 | 20 | #include <stdint.h> |
21 | 21 | #include <stdio.h> |
22 | 22 | #include <stdlib.h> |
| 23 | +#include <string.h> |
23 | 24 | #include <assert.h> |
24 | 25 | #include <iostream> |
25 | 26 | #include <future> |
@@ -211,6 +212,27 @@ class vx_device { |
211 | 212 | this->dcr_write(VX_DCR_BASE_STARTUP_ARG0, args_addr & 0xffffffff); |
212 | 213 | this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32); |
213 | 214 |
|
| 215 | + // read block and grid dimensions from kernel arguments |
| 216 | + uint32_t block_dim[3] = {1, 1, 1}; |
| 217 | + uint32_t grid_dim[3] = {1, 1, 1}; |
| 218 | + if (args_addr != 0) { |
| 219 | + // Read first 24 bytes of kernel arguments (block_dim[3] + grid_dim[3]) |
| 220 | + struct { |
| 221 | + uint32_t block_dim[3]; |
| 222 | + uint32_t grid_dim[3]; |
| 223 | + } args_dims; |
| 224 | + this->download(&args_dims, args_addr, sizeof(args_dims)); |
| 225 | + memcpy(block_dim, args_dims.block_dim, sizeof(block_dim)); |
| 226 | + memcpy(grid_dim, args_dims.grid_dim, sizeof(grid_dim)); |
| 227 | + } |
| 228 | + |
| 229 | + this->dcr_write(VX_DCR_BASE_GRID_DIM0, grid_dim[0]); |
| 230 | + this->dcr_write(VX_DCR_BASE_GRID_DIM1, grid_dim[1]); |
| 231 | + this->dcr_write(VX_DCR_BASE_GRID_DIM2, grid_dim[2]); |
| 232 | + this->dcr_write(VX_DCR_BASE_BLOCK_DIM0, block_dim[0]); |
| 233 | + this->dcr_write(VX_DCR_BASE_BLOCK_DIM1, block_dim[1]); |
| 234 | + this->dcr_write(VX_DCR_BASE_BLOCK_DIM2, block_dim[2]); |
| 235 | + |
214 | 236 | // start new run |
215 | 237 | future_ = std::async(std::launch::async, [&]{ |
216 | 238 | processor_.run(); |
|
0 commit comments