Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions 72_CooperativeBinarySearch/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
include(common RESULT_VARIABLE RES)
if(NOT RES)
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
endif()

nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}")

if(NBL_EMBED_BUILTIN_RESOURCES)
set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
set(RESOURCE_DIR "app_resources")

get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)

file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
endforeach()

ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")

LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
endif()
119 changes: 119 additions & 0 deletions 72_CooperativeBinarySearch/app_resources/binarySearch.comp.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

#pragma wave shader_stage(compute)

#include "common.h"
#include "nbl/builtin/hlsl/glsl_compat/subgroup_ballot.hlsl"
using namespace nbl::hlsl;

[[vk::push_constant]] PushConstants Constants;
[[vk::binding(0)]] StructuredBuffer<uint> Histogram;
[[vk::binding(1)]] RWStructuredBuffer<uint> Output;

static const uint32_t GroupsharedSize = 256;

uint getNextPowerOfTwo(uint number) {
return 2 << firstbithigh(number - 1);
}

uint getLaneWithFirstBitSet(bool condition) {
uint4 ballot = WaveActiveBallot(condition);
if (all(ballot == 0)) {
return WaveGetLaneCount();
}
return nbl::hlsl::glsl::subgroupBallotFindLSB(ballot);
}

// findValue must be the same across the entire wave
// Could use something like WaveReadFirstLane to be fully sure
uint binarySearchLowerBoundFindValue(uint findValue, StructuredBuffer<uint> searchBuffer, uint searchBufferSize) {
uint lane = WaveGetLaneIndex();

uint left = 0;
uint right = searchBufferSize - 1;

uint32_t range = getNextPowerOfTwo(right - left);
// do pivots as long as we can't coalesced load
while (range > WaveGetLaneCount())
{
// there must be at least 1 gap between subsequent pivots
const uint32_t step = range / WaveGetLaneCount();
const uint32_t halfStep = step >> 1;
const uint32_t pivotOffset = lane * step+halfStep;
const uint32_t pivotIndex = left + pivotOffset;

uint4 notGreaterPivots = WaveActiveBallot(pivotIndex < right && !(findValue < searchBuffer[pivotIndex]));
uint partition = nbl::hlsl::glsl::subgroupBallotBitCount(notGreaterPivots);
// only move left if needed
if (partition != 0)
left += partition * step - halfStep;
// if we go into final half partition, the range becomes less too
range = partition != WaveGetLaneCount() ? step : halfStep;
}

uint threadSearchIndex = left + lane;
bool laneValid = threadSearchIndex < searchBufferSize;
uint histAtIndex = laneValid ? searchBuffer[threadSearchIndex] : -1;
uint firstLaneGreaterThan = getLaneWithFirstBitSet(histAtIndex > findValue);

return left + firstLaneGreaterThan - 1;
}

groupshared uint shared_groupSearchBufferMinIndex;
groupshared uint shared_groupSearchBufferMaxIndex;
groupshared uint shared_groupSearchValues[GroupsharedSize];

// Binary search using the entire workgroup, making it log32 or log64 (every iteration, the possible set of
// values is divided by the number of lanes in a wave)
uint binarySearchLowerBoundCooperative(uint groupIndex, uint groupThread, StructuredBuffer<uint> searchBuffer, uint searchBufferSize) {
uint minSearchValue = groupIndex.x * GroupsharedSize;
uint maxSearchValue = ((groupIndex.x + 1) * GroupsharedSize) - 1;

// On each workgroup, two subgroups do the search
// - One searches for the minimum, the other searches for the maximum
// - Store the minimum and maximum on groupshared memory, then do a barrier
uint wave = groupThread / WaveGetLaneCount();
if (wave < 2) {
uint search = wave == 0 ? minSearchValue : maxSearchValue;
uint searchResult = binarySearchLowerBoundFindValue(search, searchBuffer, searchBufferSize);
if (WaveIsFirstLane()) {
if (wave == 0) shared_groupSearchBufferMinIndex = searchResult;
else shared_groupSearchBufferMaxIndex = searchResult;
}
}
GroupMemoryBarrierWithGroupSync();

// Since every instance has at least one triangle, we know that having workgroup values
// for each value in the range of minimum to maximum will suffice.

// Write every value in the range to groupshared memory and barrier.
uint idx = shared_groupSearchBufferMinIndex + groupThread.x;
if (idx <= shared_groupSearchBufferMaxIndex) {
shared_groupSearchValues[groupThread.x] = searchBuffer[idx];
}
GroupMemoryBarrierWithGroupSync();

uint maxValueIndex = shared_groupSearchBufferMaxIndex - shared_groupSearchBufferMinIndex;

uint searchValue = minSearchValue + groupThread;
uint currentSearchValueIndex = 0;
uint laneValue = shared_groupSearchBufferMaxIndex;
while (currentSearchValueIndex <= maxValueIndex) {
uint curValue = shared_groupSearchValues[currentSearchValueIndex];
if (curValue > searchValue) {
laneValue = shared_groupSearchBufferMinIndex + currentSearchValueIndex - 1;
break;
}
currentSearchValueIndex ++;
}

return laneValue;
}

[numthreads(256, 1, 1)]
void main(const uint3 thread : SV_DispatchThreadID, const uint3 groupThread : SV_GroupThreadID, const uint3 group : SV_GroupID)
{
Output[thread.x] = binarySearchLowerBoundCooperative(group.x, groupThread.x, Histogram, Constants.EntityCount);
}
19 changes: 19 additions & 0 deletions 72_CooperativeBinarySearch/app_resources/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef _COOPERATIVE_BINARY_SEARCH_HLSL_INCLUDED_
#define _COOPERATIVE_BINARY_SEARCH_HLSL_INCLUDED_

#include <nbl/builtin/hlsl/cpp_compat/basic.h>
#include <nbl/builtin/hlsl/cpp_compat/matrix.hlsl>

using namespace nbl::hlsl;
namespace nbl {
namespace hlsl {

struct PushConstants
{
uint32_t EntityCount;
};

};
};

#endif // _COOPERATIVE_BINARY_SEARCH_HLSL_INCLUDED_
19 changes: 19 additions & 0 deletions 72_CooperativeBinarySearch/app_resources/present.frag.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

#pragma wave shader_stage(fragment)

// vertex shader is provided by the fullScreenTriangle extension
#include <nbl/builtin/hlsl/ext/FullScreenTriangle/SVertexAttributes.hlsl>
using namespace nbl::hlsl;
using namespace ext::FullScreenTriangle;

// binding 0 set 0
[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture;
[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState;

[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0
{
return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f);
}
28 changes: 28 additions & 0 deletions 72_CooperativeBinarySearch/config.json.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"enableParallelBuild": true,
"threadsPerBuildProcess" : 2,
"isExecuted": false,
"scriptPath": "",
"cmake": {
"configurations": [ "Release", "Debug", "RelWithDebInfo" ],
"buildModes": [],
"requiredOptions": []
},
"profiles": [
{
"backend": "vulkan",
"platform": "windows",
"buildModes": [],
"runConfiguration": "Release",
"gpuArchitectures": []
}
],
"dependencies": [],
"data": [
{
"dependencies": [],
"command": [""],
"outputs": []
}
]
}
11 changes: 11 additions & 0 deletions 72_CooperativeBinarySearch/include/nbl/this_example/common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_

#include "nbl/examples/examples.hpp"

// example's own headers
#include "nbl/ui/ICursorControl.h" // TODO: why not in nabla.h ?
#include "nbl/ext/ImGui/ImGui.h"
#include "imgui/imgui_internal.h"

#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_
Loading