1212#include < cstdio>
1313#include < climits>
1414#include < cuda.h>
15+
16+ #include < thrust/execution_policy.h>
17+ #include < thrust/device_vector.h>
1518#include < thrust/random.h>
19+ #include < thrust/remove.h>
20+
1621#include < glm/glm.hpp>
1722#include < glm/gtc/matrix_transform.hpp>
1823#include < glm/gtc/matrix_inverse.hpp>
@@ -43,6 +48,7 @@ struct Triangle {
4348 glm::vec3 col[3 ];
4449
4550 glm::vec3 worldPos[3 ];
51+ bool valid;
4652};
4753struct Fragment {
4854 glm::vec3 color;
@@ -58,13 +64,15 @@ static int width = 0;
5864static int height = 0 ;
5965static int bufIdxSize = 0 ;
6066static int vertCount = 0 ;
67+ static int primMultFactor = 4 ;
6168
62- static int *dev_bufIdx = NULL ;
63- static VertexIn *dev_bufVertexIn = NULL ;
64- static VertexOut *dev_bufVertexOut = NULL ;
65- static Triangle *dev_primitives = NULL ;
66- static Fragment *dev_depthbuffer = NULL ;
67- static glm::vec3 *dev_framebuffer = NULL ;
69+ static int *dev_bufIdx = NULL ;
70+ static VertexIn *dev_bufVertexIn = NULL ;
71+ static VertexOut *dev_bufVertexOut = NULL ;
72+ static Triangle *dev_origPrimitives = NULL ;
73+ static Triangle *dev_genPrimitives = NULL ;
74+ static Fragment *dev_depthbuffer = NULL ;
75+ static glm::vec3 *dev_framebuffer = NULL ;
6876
6977__device__ void printVec3 (glm::vec3 v) {
7078 printf (" (%f, %f, %f)\n " , v.x , v.y , v.z );
@@ -136,6 +144,10 @@ void rasterizeInit(int w, int h) {
136144 cudaMalloc (&dev_bufVertexOut, width * height * sizeof (VertexOut));
137145 cudaMemset ( dev_bufVertexOut, 0 , width * height * sizeof (VertexOut));
138146
147+ cudaFree (dev_genPrimitives);
148+ cudaMalloc (&dev_genPrimitives, primMultFactor * width * height * sizeof (Triangle));
149+ cudaMemset ( dev_genPrimitives, 0 , primMultFactor * width * height * sizeof (Triangle));
150+
139151 cudaFree (dev_framebuffer);
140152 cudaMalloc (&dev_framebuffer, width * height * sizeof (glm::vec3));
141153 cudaMemset ( dev_framebuffer, 0 , width * height * sizeof (glm::vec3));
@@ -166,9 +178,9 @@ void rasterizeSetBuffers(
166178 cudaMalloc (&dev_bufVertexIn, vertCount * sizeof (VertexIn));
167179 cudaMemcpy ( dev_bufVertexIn, bufVertexIn, vertCount * sizeof (VertexIn), cudaMemcpyHostToDevice);
168180
169- cudaFree (dev_primitives );
170- cudaMalloc (&dev_primitives , vertCount / 3 * sizeof (Triangle));
171- cudaMemset (dev_primitives , 0 , vertCount / 3 * sizeof (Triangle));
181+ cudaFree (dev_origPrimitives );
182+ cudaMalloc (&dev_origPrimitives , vertCount / 3 * sizeof (Triangle));
183+ cudaMemset (dev_origPrimitives , 0 , vertCount / 3 * sizeof (Triangle));
172184
173185 checkCUDAError (" rasterizeSetBuffers" );
174186}
@@ -243,6 +255,15 @@ __global__ void assemblePrimitives(int primitivecount, VertexOut *vertices,
243255 }
244256}
245257
258+ __global__ void geometryShader (int primitivecount, Triangle *primitives,
259+ Triangle *genprimitives) {
260+ int k = (blockIdx .x * blockDim .x ) + threadIdx .x ;
261+
262+ if (k < primitivecount) {
263+ genprimitives[k] = primitives[k];
264+ }
265+ }
266+
246267__device__ void storeFragment (float x, float y, float width, float height,
247268 int fragmentidx, Triangle tri, Fragment *fragments) {
248269
@@ -319,6 +340,18 @@ __global__ void fragmentShader(int width, int height,
319340 }
320341}
321342
343+ struct terminator {
344+ __device__ bool operator ()(const Triangle tri) {
345+ return tri.valid == false ;
346+ }
347+ };
348+
349+ int compactPrimitives (int primitivecount, Triangle *primitives) {
350+ Triangle *new_end = thrust::remove_if (thrust::device,
351+ primitives, primitives+primitivecount, terminator ());
352+ return (new_end - primitives);
353+ }
354+
322355/* *
323356 * Perform rasterization.
324357 */
@@ -373,50 +406,53 @@ void rasterize(uchar4 *pbo) {
373406 clearDepthBuffer<<<blockCount2d, blockSize2d>>> (width, height, dev_depthbuffer);
374407
375408 // VertexIn -> VertexOut
376- cudaEventRecord (begin);
409+ cudaEventRecord (begin);
377410 vertexShader<<<vertBlockCount, blockSize1d>>> (vertCount, dev_bufVertexIn,
378411 dev_bufVertexOut, model, invModel, mvp);
379- checkCUDAError (" " );
412+ checkCUDAError (" " );
380413
381- cudaEventRecord (end);
382- cudaEventSynchronize (end);
383- cudaEventElapsedTime (&vShadeTime, begin, end);
414+ cudaEventRecord (end); cudaEventSynchronize (end); cudaEventElapsedTime (&vShadeTime, begin, end);
384415
385416 // VertexOut -> Triangle
386- cudaEventRecord (begin);
417+ cudaEventRecord (begin);
387418 assemblePrimitives<<<triBlockCount, blockSize1d>>> (tricount,
388- dev_bufVertexOut, dev_bufIdx, dev_primitives );
389- checkCUDAError (" " );
419+ dev_bufVertexOut, dev_bufIdx, dev_origPrimitives );
420+ checkCUDAError (" " );
390421
391- cudaEventRecord (end);
392- cudaEventSynchronize (end);
393- cudaEventElapsedTime (&assPrimitivesTime, begin, end);
422+ cudaEventRecord (end); cudaEventSynchronize (end); cudaEventElapsedTime (&assPrimitivesTime, begin, end);
423+
424+ // Triangle -> Triangle
425+ cudaEventRecord (begin);
426+ geometryShader<<<triBlockCount, blockSize1d>>> (tricount,
427+ dev_origPrimitives, dev_genPrimitives);
428+ checkCUDAError (" " );
429+
430+ cudaEventRecord (end); cudaEventSynchronize (end); cudaEventElapsedTime (&assPrimitivesTime, begin, end);
431+
432+ int genPrimitiveCount = compactPrimitives (tricount, dev_genPrimitives);
433+ dim3 genPrimCount ((genPrimitiveCount + sideLength1d - 1 ) / sideLength1d);
394434
395435 // Triangle -> Fragment
396- cudaEventRecord (begin);
436+ cudaEventRecord (begin);
397437 scanline<<<triBlockCount, blockSize1d>>> (width, height, tricount,
398- dev_primitives , dev_depthbuffer);
399- checkCUDAError (" " );
438+ dev_genPrimitives , dev_depthbuffer);
439+ checkCUDAError (" " );
400440
401- cudaEventRecord (end);
402- cudaEventSynchronize (end);
403- cudaEventElapsedTime (&scanlineTime, begin, end);
441+ cudaEventRecord (end); cudaEventSynchronize (end); cudaEventElapsedTime (&scanlineTime, begin, end);
404442
405443 // Fragment -> Fragment
406- cudaEventRecord (begin);
444+ cudaEventRecord (begin);
407445 fragmentShader<<<blockCount2d, blockSize2d>>> (width, height,
408446 dev_depthbuffer, c.light );
409- checkCUDAError (" " );
447+ checkCUDAError (" " );
410448
411- cudaEventRecord (end);
412- cudaEventSynchronize (end);
413- cudaEventElapsedTime (&fShadeTime , begin, end);
449+ cudaEventRecord (end); cudaEventSynchronize (end); cudaEventElapsedTime (&fShadeTime , begin, end);
414450
415451 // Clear CudaEvents
416452 cudaEventDestroy (begin);
417453 cudaEventDestroy (end);
418454
419- fprintf (stderr, " %f %f %f %f\n " , vShadeTime, assPrimitivesTime, scanlineTime, fShadeTime );
455+ // fprintf(stderr, "%f %f %f %f\n", vShadeTime, assPrimitivesTime, scanlineTime, fShadeTime);
420456
421457 // Copy depthbuffer colors into framebuffer
422458 render<<<blockCount2d, blockSize2d>>> (width, height, dev_depthbuffer, dev_framebuffer);
@@ -438,8 +474,11 @@ void rasterizeFree() {
438474 cudaFree (dev_bufVertexIn);
439475 dev_bufVertexIn = NULL ;
440476
441- cudaFree (dev_primitives);
442- dev_primitives = NULL ;
477+ cudaFree (dev_origPrimitives);
478+ dev_origPrimitives = NULL ;
479+
480+ cudaFree (dev_genPrimitives);
481+ dev_genPrimitives = NULL ;
443482
444483 cudaFree (dev_depthbuffer);
445484 dev_depthbuffer = NULL ;
0 commit comments