Skip to content

Commit 1800a42

Browse files
committed
Add trivial (identity) geometry shader step.
1 parent 11a0e62 commit 1800a42

2 files changed

Lines changed: 78 additions & 35 deletions

File tree

src/main.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,12 @@ int main(int argc, char **argv) {
3838
}
3939

4040
void mainLoop() {
41+
int iters = 0;
4142
while (!glfwWindowShouldClose(window)) {
43+
iters++;
44+
if (iters > 500) {
45+
exit(1);
46+
}
4247
glfwPollEvents();
4348
runCuda();
4449

@@ -69,7 +74,6 @@ void mainLoop() {
6974
//-------------------------------
7075
//---------RUNTIME STUFF---------
7176
//-------------------------------
72-
7377
void runCuda() {
7478
// Map OpenGL buffer object for writing from CUDA on a single GPU
7579
// No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer

src/rasterize.cu

Lines changed: 73 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
#include <cstdio>
1313
#include <climits>
1414
#include <cuda.h>
15+
16+
#include <thrust/execution_policy.h>
17+
#include <thrust/device_vector.h>
1518
#include <thrust/random.h>
19+
#include <thrust/remove.h>
20+
1621
#include <glm/glm.hpp>
1722
#include <glm/gtc/matrix_transform.hpp>
1823
#include <glm/gtc/matrix_inverse.hpp>
@@ -43,6 +48,7 @@ struct Triangle {
4348
glm::vec3 col[3];
4449

4550
glm::vec3 worldPos[3];
51+
bool valid;
4652
};
4753
struct Fragment {
4854
glm::vec3 color;
@@ -58,13 +64,15 @@ static int width = 0;
5864
static int height = 0;
5965
static int bufIdxSize = 0;
6066
static int vertCount = 0;
67+
static int primMultFactor = 4;
6168

62-
static int *dev_bufIdx = NULL;
63-
static VertexIn *dev_bufVertexIn = NULL;
64-
static VertexOut *dev_bufVertexOut = NULL;
65-
static Triangle *dev_primitives = NULL;
66-
static Fragment *dev_depthbuffer = NULL;
67-
static glm::vec3 *dev_framebuffer = NULL;
69+
static int *dev_bufIdx = NULL;
70+
static VertexIn *dev_bufVertexIn = NULL;
71+
static VertexOut *dev_bufVertexOut = NULL;
72+
static Triangle *dev_origPrimitives = NULL;
73+
static Triangle *dev_genPrimitives = NULL;
74+
static Fragment *dev_depthbuffer = NULL;
75+
static glm::vec3 *dev_framebuffer = NULL;
6876

6977
__device__ void printVec3(glm::vec3 v) {
7078
printf("(%f, %f, %f)\n", v.x, v.y, v.z);
@@ -136,6 +144,10 @@ void rasterizeInit(int w, int h) {
136144
cudaMalloc(&dev_bufVertexOut, width * height * sizeof(VertexOut));
137145
cudaMemset( dev_bufVertexOut, 0, width * height * sizeof(VertexOut));
138146

147+
cudaFree(dev_genPrimitives);
148+
cudaMalloc(&dev_genPrimitives, primMultFactor * width * height * sizeof(Triangle));
149+
cudaMemset( dev_genPrimitives, 0, primMultFactor * width * height * sizeof(Triangle));
150+
139151
cudaFree(dev_framebuffer);
140152
cudaMalloc(&dev_framebuffer, width * height * sizeof(glm::vec3));
141153
cudaMemset( dev_framebuffer, 0, width * height * sizeof(glm::vec3));
@@ -166,9 +178,9 @@ void rasterizeSetBuffers(
166178
cudaMalloc(&dev_bufVertexIn, vertCount * sizeof(VertexIn));
167179
cudaMemcpy( dev_bufVertexIn, bufVertexIn, vertCount * sizeof(VertexIn), cudaMemcpyHostToDevice);
168180

169-
cudaFree(dev_primitives);
170-
cudaMalloc(&dev_primitives, vertCount / 3 * sizeof(Triangle));
171-
cudaMemset(dev_primitives, 0, vertCount / 3 * sizeof(Triangle));
181+
cudaFree(dev_origPrimitives);
182+
cudaMalloc(&dev_origPrimitives, vertCount / 3 * sizeof(Triangle));
183+
cudaMemset(dev_origPrimitives, 0, vertCount / 3 * sizeof(Triangle));
172184

173185
checkCUDAError("rasterizeSetBuffers");
174186
}
@@ -243,6 +255,15 @@ __global__ void assemblePrimitives(int primitivecount, VertexOut *vertices,
243255
}
244256
}
245257

258+
__global__ void geometryShader(int primitivecount, Triangle *primitives,
259+
Triangle *genprimitives) {
260+
int k = (blockIdx.x * blockDim.x) + threadIdx.x;
261+
262+
if (k < primitivecount) {
263+
genprimitives[k] = primitives[k];
264+
}
265+
}
266+
246267
__device__ void storeFragment(float x, float y, float width, float height,
247268
int fragmentidx, Triangle tri, Fragment *fragments) {
248269

@@ -319,6 +340,18 @@ __global__ void fragmentShader(int width, int height,
319340
}
320341
}
321342

343+
struct terminator {
344+
__device__ bool operator()(const Triangle tri) {
345+
return tri.valid == false;
346+
}
347+
};
348+
349+
int compactPrimitives(int primitivecount, Triangle *primitives) {
350+
Triangle *new_end = thrust::remove_if(thrust::device,
351+
primitives, primitives+primitivecount, terminator());
352+
return (new_end - primitives);
353+
}
354+
322355
/**
323356
* Perform rasterization.
324357
*/
@@ -373,50 +406,53 @@ void rasterize(uchar4 *pbo) {
373406
clearDepthBuffer<<<blockCount2d, blockSize2d>>>(width, height, dev_depthbuffer);
374407

375408
// VertexIn -> VertexOut
376-
cudaEventRecord(begin);
409+
cudaEventRecord(begin);
377410
vertexShader<<<vertBlockCount, blockSize1d>>>(vertCount, dev_bufVertexIn,
378411
dev_bufVertexOut, model, invModel, mvp);
379-
checkCUDAError("");
412+
checkCUDAError("");
380413

381-
cudaEventRecord(end);
382-
cudaEventSynchronize(end);
383-
cudaEventElapsedTime(&vShadeTime, begin, end);
414+
cudaEventRecord(end); cudaEventSynchronize(end); cudaEventElapsedTime(&vShadeTime, begin, end);
384415

385416
// VertexOut -> Triangle
386-
cudaEventRecord(begin);
417+
cudaEventRecord(begin);
387418
assemblePrimitives<<<triBlockCount, blockSize1d>>>(tricount,
388-
dev_bufVertexOut, dev_bufIdx, dev_primitives);
389-
checkCUDAError("");
419+
dev_bufVertexOut, dev_bufIdx, dev_origPrimitives);
420+
checkCUDAError("");
390421

391-
cudaEventRecord(end);
392-
cudaEventSynchronize(end);
393-
cudaEventElapsedTime(&assPrimitivesTime, begin, end);
422+
cudaEventRecord(end); cudaEventSynchronize(end); cudaEventElapsedTime(&assPrimitivesTime, begin, end);
423+
424+
// Triangle -> Triangle
425+
cudaEventRecord(begin);
426+
geometryShader<<<triBlockCount, blockSize1d>>>(tricount,
427+
dev_origPrimitives, dev_genPrimitives);
428+
checkCUDAError("");
429+
430+
cudaEventRecord(end); cudaEventSynchronize(end); cudaEventElapsedTime(&assPrimitivesTime, begin, end);
431+
432+
int genPrimitiveCount = compactPrimitives(tricount, dev_genPrimitives);
433+
dim3 genPrimCount((genPrimitiveCount + sideLength1d - 1) / sideLength1d);
394434

395435
// Triangle -> Fragment
396-
cudaEventRecord(begin);
436+
cudaEventRecord(begin);
397437
scanline<<<triBlockCount, blockSize1d>>>(width, height, tricount,
398-
dev_primitives, dev_depthbuffer);
399-
checkCUDAError("");
438+
dev_genPrimitives, dev_depthbuffer);
439+
checkCUDAError("");
400440

401-
cudaEventRecord(end);
402-
cudaEventSynchronize(end);
403-
cudaEventElapsedTime(&scanlineTime, begin, end);
441+
cudaEventRecord(end); cudaEventSynchronize(end); cudaEventElapsedTime(&scanlineTime, begin, end);
404442

405443
// Fragment -> Fragment
406-
cudaEventRecord(begin);
444+
cudaEventRecord(begin);
407445
fragmentShader<<<blockCount2d, blockSize2d>>>(width, height,
408446
dev_depthbuffer, c.light);
409-
checkCUDAError("");
447+
checkCUDAError("");
410448

411-
cudaEventRecord(end);
412-
cudaEventSynchronize(end);
413-
cudaEventElapsedTime(&fShadeTime, begin, end);
449+
cudaEventRecord(end); cudaEventSynchronize(end); cudaEventElapsedTime(&fShadeTime, begin, end);
414450

415451
// Clear CudaEvents
416452
cudaEventDestroy(begin);
417453
cudaEventDestroy(end);
418454

419-
fprintf(stderr, "%f %f %f %f\n", vShadeTime, assPrimitivesTime, scanlineTime, fShadeTime);
455+
//fprintf(stderr, "%f %f %f %f\n", vShadeTime, assPrimitivesTime, scanlineTime, fShadeTime);
420456

421457
// Copy depthbuffer colors into framebuffer
422458
render<<<blockCount2d, blockSize2d>>>(width, height, dev_depthbuffer, dev_framebuffer);
@@ -438,8 +474,11 @@ void rasterizeFree() {
438474
cudaFree(dev_bufVertexIn);
439475
dev_bufVertexIn = NULL;
440476

441-
cudaFree(dev_primitives);
442-
dev_primitives = NULL;
477+
cudaFree(dev_origPrimitives);
478+
dev_origPrimitives = NULL;
479+
480+
cudaFree(dev_genPrimitives);
481+
dev_genPrimitives = NULL;
443482

444483
cudaFree(dev_depthbuffer);
445484
dev_depthbuffer = NULL;

0 commit comments

Comments
 (0)