Skip to content

Commit 089b387

Browse files
committed
Removed maxrregcount argument since this seems to cause problems on some GPUs
1 parent 481dc3f commit 089b387

2 files changed

Lines changed: 25 additions & 17 deletions

File tree

.github/workflows/main.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: cuda-cross-build
1+
22

33
on: [push]
44

@@ -42,7 +42,7 @@ jobs:
4242
- name: build
4343
run: |
4444
45-
nvcc -g -maxrregcount 128 --resource-usage -lineinfo -Xptxas -lineinfo -v -O3 -arch=all -m=64 main.cu -o loneliest-cuda.exe -diag-suppress 177 -ID:\a\LoneliestSeed\LoneliestSeed\boinc\ -ID:\a\LoneliestSeed\LoneliestSeed\boinc\win\ -LD:\a\LoneliestSeed\LoneliestSeed\boinc\lib\win\ -lboinc_api -lboinc -lcuda -luser32 -DBOINC -D_WIN32 -allow-unsupported-compiler
45+
nvcc -g --resource-usage -lineinfo -Xptxas -lineinfo -v -O3 -arch=all -m=64 main.cu -o loneliest-cuda.exe -diag-suppress 177 -ID:\a\LoneliestSeed\LoneliestSeed\boinc\ -ID:\a\LoneliestSeed\LoneliestSeed\boinc\win\ -LD:\a\LoneliestSeed\LoneliestSeed\boinc\lib\win\ -lboinc_api -lboinc -lcuda -luser32 -DBOINC -D_WIN32 -allow-unsupported-compiler
4646
dir
4747
- uses: actions/upload-artifact@v3
4848
with:
@@ -56,7 +56,7 @@ jobs:
5656
- uses: actions/checkout@v4
5757
- name: build
5858
run: |
59-
nvcc -g -maxrregcount 128 --resource-usage -lineinfo -Xptxas -lineinfo -v -O3 -arch=all \
59+
nvcc -g --resource-usage -lineinfo -Xptxas -lineinfo -v -O3 -arch=all \
6060
main.cu -m64 -o loneliest-cuda \
6161
-DBOINC -Iboinc/ -Lboinc/lib/lin -lcuda -lboinc_api -lboinc -Xptxas -v
6262
ls -la

main.cu

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4715,24 +4715,19 @@ __device__ int radius = 5;
47154715

47164716
__global__ void kernel(uint64_t s, uint64_t *out) {
47174717
uint64_t input_seed = blockDim.x * blockIdx.x + threadIdx.x + s;
4718-
//atomicAdd(&checked, 1ull);
4719-
47204718
int structType = Village;
47214719
int mc = MC_1_20;
4722-
4720+
int region_size = 34;
4721+
4722+
int size = 5 * region_size;
47234723
Generator g;
47244724
setupGenerator(&g, mc, 0);
4725-
47264725
StructureConfig sconf;
47274726
getStructureConfig(Village, mc, &sconf);
47284727

4729-
int region_size = 34;
4730-
4731-
int size = 5 * region_size;
4732-
47334728
uint64_t seed = input_seed;
4734-
//printf("%" PRIu64 "\n", seed);
47354729
applySeed(&g, DIM_OVERWORLD, seed);
4730+
47364731
int villages = 0;
47374732
int i = 0;
47384733
bool found = false;
@@ -4742,16 +4737,11 @@ __global__ void kernel(uint64_t s, uint64_t *out) {
47424737
found = isViableStructurePos(structType, &g, p.x, p.z, 0);
47434738

47444739
if (found) {
4745-
//villages++;
4746-
//if(villages > village_thresh)
47474740
return;
47484741
}
47494742
}
47504743
}
47514744
out[blockDim.x * blockIdx.x + threadIdx.x] = seed;
4752-
//out_villages[blockDim.x * blockIdx.x + threadIdx.x] = villages;
4753-
//printf("%d\n", villages);
4754-
//printf("Found new best: %" PRIi64 " %d\n", seed, villages);
47554745
}
47564746

47574747
#include <time.h>
@@ -4843,8 +4833,26 @@ int main(int argc, char **argv) {
48434833
boinc_end_critical_section();
48444834
}
48454835
#endif
4836+
48464837
cudaSetDevice(device);
48474838
cudaMallocManaged(&out, (blocks * threads) * sizeof(*out));
4839+
// int numBlocks;
4840+
// cudaDeviceProp prop;
4841+
// int activeWarps;
4842+
// int maxWarps;
4843+
// cudaGetDevice(&device);
4844+
// cudaGetDeviceProperties(&prop, device);
4845+
// cudaOccupancyMaxActiveBlocksPerMultiprocessor(
4846+
// &numBlocks,
4847+
// kernel,
4848+
// blocks,
4849+
// 0);
4850+
// activeWarps = numBlocks * blocks / prop.warpSize;
4851+
// maxWarps = prop.maxThreadsPerMultiProcessor / prop.warpSize;
4852+
// printf("warpSize: %i\n", prop.warpSize);
4853+
// printf("Active warps: %i\n", activeWarps);
4854+
// printf("Max warps: %i\n", maxWarps);
4855+
// printf("Occupancy: %2f%\n",(double)activeWarps / maxWarps * 100 );
48484856
for(int i = 0; i < (blocks * threads); i++){
48494857
out[i] = 0;
48504858
}

0 commit comments

Comments
 (0)