Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 68 additions & 37 deletions src/d3d11/d3d11_context_impl.cpp

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions src/d3d11/d3d11_texture_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,8 @@ HRESULT CreateDeviceTextureInternal(MTLD3D11Device *pDevice,
flags.set(TextureAllocationFlag::GpuPrivate);
if (finalDesc.Usage == D3D11_USAGE_IMMUTABLE)
flags.set(TextureAllocationFlag::GpuReadonly);
if (!(finalDesc.BindFlags & (D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_RENDER_TARGET | D3D11_BIND_DEPTH_STENCIL)))
flags.set(TextureAllocationFlag::ShaderReadonly);
flags.set(TextureAllocationFlag::Shared);
auto allocation = texture->allocate(flags);

Expand Down Expand Up @@ -421,6 +423,8 @@ HRESULT CreateDeviceTextureInternal(MTLD3D11Device *pDevice,

Flags<TextureAllocationFlag> flags;
flags.set(finalDesc.CPUAccessFlags ? TextureAllocationFlag::GpuManaged : TextureAllocationFlag::GpuPrivate);
if (!(finalDesc.BindFlags & (D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_RENDER_TARGET | D3D11_BIND_DEPTH_STENCIL)))
flags.set(TextureAllocationFlag::ShaderReadonly);
if (finalDesc.Usage == D3D11_USAGE_IMMUTABLE)
flags.set(TextureAllocationFlag::GpuReadonly);
if (single_subresource && (finalDesc.BindFlags & D3D11_BIND_DEPTH_STENCIL)) {
Expand Down
1 change: 1 addition & 0 deletions src/d3d11/d3d11_texture_dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ HRESULT CreateDynamicTextureInternal(MTLD3D11Device *pDevice,
auto texture = Rc<Texture>(new Texture(info, pDevice->GetMTLDevice()));
Flags<TextureAllocationFlag> flags;
flags.set(TextureAllocationFlag::GpuManaged);
flags.set(TextureAllocationFlag::ShaderReadonly);
if (pInitialData) {
auto default_allocation = texture->allocate(flags);
InitializeTextureData(pDevice, default_allocation->texture(), finalDesc, pInitialData);
Expand Down
1 change: 1 addition & 0 deletions src/d3d11/d3d11_texture_linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ TDynamicLinearTexture(
bytes_per_row_(bytes_per_row) {
this->texture_ = new Texture(bytes_per_image, bytes_per_row, descriptor, device->GetMTLDevice());
Flags<TextureAllocationFlag> flags;
flags.set(TextureAllocationFlag::ShaderReadonly);
if (!this->m_parent->IsTraced() && pDesc->Usage == D3D11_USAGE_DYNAMIC)
flags.set(TextureAllocationFlag::CpuWriteCombined);
// if (pDesc->Usage != D3D11_USAGE_DEFAULT)
Expand Down
16 changes: 8 additions & 8 deletions src/d3d11/d3d11_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct D3D11ShaderResourceView : ID3D11ShaderResourceView1 {
Buffer *buffer_{};
BufferSlice slice_{};
Texture *texture_{};
unsigned view_id_{};
uint64_t view_id_{};
ResourceSubsetState subset_{};
uint32_t bind_flags_{};

Expand All @@ -38,7 +38,7 @@ struct D3D11ShaderResourceView : ID3D11ShaderResourceView1 {
texture() const {
return texture_;
};
unsigned
uint64_t
viewId() const {
return view_id_;
};
Expand All @@ -57,7 +57,7 @@ struct D3D11UnorderedAccessView : ID3D11UnorderedAccessView1 {
Buffer *buffer_{};
BufferSlice slice_{};
Texture *texture_{};
unsigned view_id_{};
uint64_t view_id_{};
Rc<Buffer> counter_;
ResourceSubsetState subset_{};
uint32_t bind_flags_{};
Expand All @@ -74,7 +74,7 @@ struct D3D11UnorderedAccessView : ID3D11UnorderedAccessView1 {
texture() const {
return texture_;
};
unsigned
uint64_t
viewId() const {
return view_id_;
};
Expand All @@ -90,7 +90,7 @@ struct D3D11UnorderedAccessView : ID3D11UnorderedAccessView1 {
struct D3D11RenderTargetView : ID3D11RenderTargetView1 {
Com<D3D11ResourceCommon> resource_{};
Texture *texture_{};
unsigned view_id_{};
uint64_t view_id_{};
MTL_RENDER_PASS_ATTACHMENT_DESC pass_desc_;
WMTPixelFormat format_{};
ResourceSubsetState subset_{};
Expand All @@ -108,7 +108,7 @@ struct D3D11RenderTargetView : ID3D11RenderTargetView1 {
texture() const {
return texture_;
};
unsigned
uint64_t
viewId() const {
return view_id_;
};
Expand All @@ -120,7 +120,7 @@ struct D3D11RenderTargetView : ID3D11RenderTargetView1 {
struct D3D11DepthStencilView : ID3D11DepthStencilView {
Com<D3D11ResourceCommon> resource_{};
Texture *texture_{};
unsigned view_id_{};
uint64_t view_id_{};
MTL_RENDER_PASS_ATTACHMENT_DESC pass_desc_;
WMTPixelFormat format_{};
uint32_t readonly_flags_{};
Expand All @@ -140,7 +140,7 @@ struct D3D11DepthStencilView : ID3D11DepthStencilView {
texture() const {
return texture_;
};
unsigned
uint64_t
viewId() const {
return view_id_;
};
Expand Down
5 changes: 5 additions & 0 deletions src/dxmt/dxmt_binding_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ template <typename Element, size_t NumElements> class BindingSet {
return bound.any();
}

constexpr bool
any_bound_masked(uint64_t mask) const noexcept {
return (bound.qword(0) & mask) != 0;
}

constexpr uint32_t
max_binding_64() const noexcept {
uint64_t qword = dirty.qword(0);
Expand Down
7 changes: 2 additions & 5 deletions src/dxmt/dxmt_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ BufferAllocation::BufferAllocation(WMT::Device device, const WMTBufferInfo &info
suballocation_count_ = DXMT_PAGE_SIZE / suballocation_size_;
info_.length = DXMT_PAGE_SIZE;
}
fenceTrackers.resize(suballocation_count_);
if (flags_.test(BufferAllocationFlag::CpuPlaced)) {
placed_buffer = wsi::aligned_malloc(info_.length, DXMT_PAGE_SIZE);
info_.memory.set(placed_buffer);
}
obj_ = device.newBuffer(info_);
gpuAddress_ = info_.gpu_address;
mappedMemory_ = info_.memory.get_accessible_or_null();
depkey = EncoderDepSet::generateNewKey(global_buffer_seq.fetch_add(1));
};

BufferAllocation::~BufferAllocation() {
Expand Down Expand Up @@ -130,10 +130,7 @@ Buffer::createView(BufferViewDescriptor const &descriptor) {

Rc<BufferAllocation>
Buffer::allocate(Flags<BufferAllocationFlag> flags) {
WMTResourceOptions options = WMTResourceStorageModeShared;
if (flags.test(BufferAllocationFlag::GpuReadonly)) {
options |= WMTResourceHazardTrackingModeUntracked;
}
WMTResourceOptions options = WMTResourceHazardTrackingModeUntracked;
if (flags.test(BufferAllocationFlag::CpuWriteCombined)) {
options |= WMTResourceOptionCPUCacheModeWriteCombined;
}
Expand Down
11 changes: 8 additions & 3 deletions src/dxmt/dxmt_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "rc/util_rc_ptr.hpp"
#include "thread.hpp"
#include "util_flags.hpp"
#include "util_svector.hpp"

namespace dxmt {

Expand All @@ -23,7 +24,7 @@ enum class BufferAllocationFlag : uint32_t {
CpuPlaced = 6,
};

typedef unsigned BufferViewKey;
typedef uint64_t BufferViewKey;

struct BufferViewDescriptor {
WMTPixelFormat format;
Expand Down Expand Up @@ -88,6 +89,10 @@ class BufferAllocation : public Allocation {
return current_suballocation_ * stride;
}

uint32_t currentSuballocation() {
return current_suballocation_;
}

void
updateContents(uint64_t offset, const void *data, uint64_t length, uint32_t suballocation = 0) noexcept {
if (likely(mappedMemory_ != nullptr && !flags_.test(BufferAllocationFlag::GpuManaged))) {
Expand All @@ -98,7 +103,7 @@ class BufferAllocation : public Allocation {
}

DXMT_RESOURCE_RESIDENCY_STATE residencyState;
EncoderDepKey depkey;
small_vector<GenericAccessTracker, 1> fenceTrackers;

private:
BufferAllocation(WMT::Device device, const WMTBufferInfo &info, Flags<BufferAllocationFlag> flags);
Expand All @@ -111,7 +116,7 @@ class BufferAllocation : public Allocation {
WMTBufferInfo info_;
uint32_t version_ = 0;
Flags<BufferAllocationFlag> flags_;
std::vector<std::unique_ptr<BufferView>> cached_view_;
small_vector<std::unique_ptr<BufferView>, 1> cached_view_;
void *mappedMemory_;
uint64_t gpuAddress_;
uint32_t current_suballocation_ = 0;
Expand Down
60 changes: 55 additions & 5 deletions src/dxmt/dxmt_command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,13 @@ ClearRenderTargetContext::begin(Rc<Texture> texture, TextureViewKey view) {

if (dsv_flag) {
auto &depth = pass_info.depth;
depth.attachment = ctx_.access(texture, view, DXMT_ENCODER_RESOURCE_ACESS_WRITE);
depth.attachment = ctx_.access<PipelineStage::Pixel>(texture, view, DXMT_ENCODER_RESOURCE_ACESS_WRITE);
depth.depth_plane = 0;
depth.load_action = WMTLoadActionLoad;
depth.store_action = WMTStoreActionStore;
} else {
auto &color = pass_info.colors[0];
color.attachment = ctx_.access(texture, view, DXMT_ENCODER_RESOURCE_ACESS_WRITE);
color.attachment = ctx_.access<PipelineStage::Pixel>(texture, view, DXMT_ENCODER_RESOURCE_ACESS_WRITE);
color.depth_plane = 0;
color.load_action = WMTLoadActionLoad;
color.store_action = WMTStoreActionStore;
Expand Down Expand Up @@ -345,18 +345,19 @@ DepthStencilBlitContext::copyFromBuffer(
auto height = depth_stencil->height(view);
auto &pass_info = *ctx_.startRenderPass(0b11, 0, 0, 0);
auto &depth = pass_info.depth;
depth.attachment = ctx_.access(depth_stencil, view, DXMT_ENCODER_RESOURCE_ACESS_WRITE);
depth.attachment = ctx_.access<PipelineStage::Pixel>(depth_stencil, view, DXMT_ENCODER_RESOURCE_ACESS_WRITE);
depth.depth_plane = 0;
depth.load_action = WMTLoadActionLoad;
depth.store_action = WMTStoreActionStore;

auto &stencil = pass_info.stencil;
stencil.attachment = ctx_.access(depth_stencil, view, DXMT_ENCODER_RESOURCE_ACESS_WRITE);
stencil.attachment = ctx_.access<PipelineStage::Pixel>(depth_stencil, view, DXMT_ENCODER_RESOURCE_ACESS_WRITE);
stencil.depth_plane = 0;
stencil.load_action = WMTLoadActionLoad;
stencil.store_action = WMTStoreActionStore;

auto [src_, src_sub_offset] = ctx_.access(src, src_offset, src_length, DXMT_ENCODER_RESOURCE_ACESS_READ);
auto [src_, src_sub_offset] =
ctx_.access<PipelineStage::Pixel>(src, src_offset, src_length, DXMT_ENCODER_RESOURCE_ACESS_READ);

pass_info.render_target_width = width;
pass_info.render_target_height = height;
Expand Down Expand Up @@ -694,4 +695,53 @@ MTLFXMVScaleContext::dispatch(
ctx_.endPass();
}

TileBarrierContext::TileBarrierContext(WMT::Device device, InternalCommandLibrary &lib, ArgumentEncodingContext &ctx) :
ctx_(ctx),
device_(device) {
tile_function_ = lib.getLibrary().newFunction("tile_barrier");
}

void
TileBarrierContext::dispatch() {
if (auto tile_pso = getPSO(ctx_.currentRenderEncoder()->tile_barrier_pso_key)) {
auto &cmd_pso = ctx_.encodeRenderCommand<wmtcmd_render_setpso>();
cmd_pso.type = WMTRenderCommandSetPSO;
cmd_pso.pso = tile_pso;

auto &cmd_dispatch = ctx_.encodeRenderCommand<wmtcmd_render_dispatch_threads_per_tile>();
cmd_dispatch.type = WMTRenderCommandDispatchThreadsPerTile;
cmd_dispatch.width = kBarrierTileSize;
cmd_dispatch.height = kBarrierTileSize;

if (auto pso_recover = ctx_.currentRenderEncoder()->last_pso) {
auto &cmd_recover = ctx_.encodeRenderCommand<wmtcmd_render_setpso>();
cmd_recover.type = WMTRenderCommandSetPSO;
cmd_recover.pso = pso_recover;
}
}
}

WMT::RenderPipelineState
TileBarrierContext::getPSO(TileBarrierPSOKey &key) {
auto it = psos_.find(key);
if (it != psos_.end())
return it->second;

WMTTileRenderPipelineInfo info;
WMT::InitializeTileRenderPipelineInfo(info);
memcpy(&info.color_formats, key.color_formats, sizeof(key.color_formats));
info.raster_sample_count = key.raster_sample_count;
info.tile_function = tile_function_;

WMT::Reference<WMT::Error> err;
auto pso = device_.newRenderPipelineState(info, err);

if (!pso) {
ERR("Failed to create tile PSO: ", err.description().getUTF8String());
return {};
}

return psos_.emplace(key, std::move(pso)).first->second;
}

} // namespace dxmt
43 changes: 43 additions & 0 deletions src/dxmt/dxmt_command.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,32 @@
#include <array>
#include <unordered_map>

namespace dxmt {
struct TileBarrierPSOKey {
WMTPixelFormat color_formats[8];
unsigned raster_sample_count;
};
} // namespace dxmt

namespace std {
template <> struct hash<dxmt::TileBarrierPSOKey> {
size_t
operator()(const dxmt::TileBarrierPSOKey &v) const noexcept {
constexpr size_t binsize = sizeof(v);
return std::hash<string_view>{}({reinterpret_cast<const char *>(&v), binsize});
};
};

template <> struct equal_to<dxmt::TileBarrierPSOKey> {
bool
operator()(const dxmt::TileBarrierPSOKey &x, const dxmt::TileBarrierPSOKey &y) const {
constexpr size_t binsize = sizeof(x);
return std::string_view({reinterpret_cast<const char *>(&x), binsize}) ==
std::string_view({reinterpret_cast<const char *>(&y), binsize});
}
};
}; // namespace std

namespace dxmt {

class ArgumentEncodingContext;
Expand Down Expand Up @@ -281,4 +307,21 @@ class MTLFXMVScaleContext {
WMT::Reference<WMT::ComputePipelineState> pso_downscale_dilated_mv_;
};

constexpr auto kBarrierTileSize = 16;

class TileBarrierContext {
public:
TileBarrierContext(WMT::Device device, InternalCommandLibrary &lib, ArgumentEncodingContext &ctx);

void dispatch();

private:
WMT::RenderPipelineState getPSO(TileBarrierPSOKey &format);

ArgumentEncodingContext &ctx_;
WMT::Device device_;
WMT::Reference<WMT::Function> tile_function_;
std::unordered_map<TileBarrierPSOKey, WMT::Reference<WMT::RenderPipelineState>> psos_;
};

} // namespace dxmt
4 changes: 4 additions & 0 deletions src/dxmt/dxmt_command.metal
Original file line number Diff line number Diff line change
Expand Up @@ -607,3 +607,7 @@ struct DXMTClearUintMetadata {
float2 lo_mv_pixel = hi_mv_pixel * scale;
downscaled.write(lo_mv_pixel.xyxy, pos);
}

[[kernel]] void tile_barrier(ushort2 pos [[thread_position_in_threadgroup]]) {
// empty
}
Loading
Loading