Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions examples/cuda_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import numba as nb
import numpy as np

def calc_dims(shape):
threadsperblock = (32,32)
blockspergrid = (
(shape[0] + (threadsperblock[0] - 1)) // threadsperblock[0],
(shape[1] + (threadsperblock[1] - 1)) // threadsperblock[1]
)
return blockspergrid, threadsperblock

@nb.cuda.jit(device=True)
def add(a, b):
return float3(a[0]+b[0], a[1]+b[1], a[2]+b[2])

@nb.cuda.jit(device=True)
def diff(a, b):
return float3(a[0]-b[0], a[1]-b[1], a[2]-b[2])

@nb.cuda.jit(device=True)
def mul(a, b):
return float3(a[0]*b, a[1]*b, a[2]*b)

@nb.cuda.jit(device=True)
def multColor(a, b):
return float3(a[0]*b[0], a[1]*b[1], a[2]*b[2])

@nb.cuda.jit(device=True)
def dot(a, b):
return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]

@nb.cuda.jit(device=True)
def mix(a, b, k):
return add(mul(a, k), mul(b, 1-k))

@nb.cuda.jit(device=True)
def make_float3(a, offset):
return float3(a[offset], a[offset+1], a[offset+2])

@nb.cuda.jit(device=True)
def invert(a):
return float3(-a[0], -a[1], -a[2])

@nb.cuda.jit(device=True)
def float3(a, b, c):
return (np.float32(a), np.float32(b), np.float32(c))
228 changes: 228 additions & 0 deletions examples/hillshade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import numpy as np
import numba as nb

import cupy
import xarray as xr
from rtxpy import RTX

from typing import Optional

from scipy.spatial.transform import Rotation as R

from raytrace.cuda_utils import *
from raytrace import mesh_utils

@nb.cuda.jit
def _generatePrimaryRays(data, x_coords, y_coords, H, W):
"""
A GPU kernel that given a set of x and y discrete coordinates on a raster terrain
generates in @data a list of parallel rays that represent camera rays generated from an ortographic camera
that is looking straight down at the surface from an origin height 10000
"""
i, j = nb.cuda.grid(2)
if i>=0 and i < H and j>=0 and j < W:
#data[i,j,0] = j + 1e-6 # x_coords[j] + 1e-6
#data[i,j,1] = i + 1e-6 # y_coords[i] + 1e-6

if (j == W-1):
data[i,j,0] = j - 1e-3
else:
data[i,j,0] = j + 1e-3

if (i == H-1):
data[i,j,1] = i - 1e-3
else:
data[i,j,1] = i + 1e-3

data[i,j,2] = 10000 # Location of the camera (height)
data[i,j,3] = 1e-3
data[i,j,4] = 0
data[i,j,5] = 0
data[i,j,6] = -1
data[i,j,7] = np.inf


def generatePrimaryRays(rays, x_coords, y_coords, H, W):
griddim, blockdim = calc_dims((H, W))
_generatePrimaryRays[griddim, blockdim](rays, x_coords, y_coords, H, W)
return 0


@nb.cuda.jit
def _generateShadowRays(rays, hits, normals, H, W, sunDir):
"""
A GPU kernel that given a set rays and their respective intersection points,
generates in rays (overwriting the original content) a new set of rays (shadow rays)
That have their origins at the point of intersection of their parent ray and direction - the direction towards the sun
The normals vectors at the point of intersection of the original rays are cached in @normals
Thus we can later use them to do lambertian shading, after the shadow rays have been traced
"""
i, j = nb.cuda.grid(2)
if i>=0 and i < H and j>=0 and j < W:
dist = hits[i,j,0]
norm = make_float3(hits[i,j], 1)
if (norm[2] < 0):
norm = invert(norm)
ray = rays[i,j]
rayOrigin = make_float3(ray, 0)
rayDir = make_float3(ray, 4)
p = add(rayOrigin, mul(rayDir,dist))

newOrigin = add(p, mul(norm, 1e-3))
ray[0] = newOrigin[0]
ray[1] = newOrigin[1]
ray[2] = newOrigin[2]
ray[3] = 1e-3
ray[4] = sunDir[0]
ray[5] = sunDir[1]
ray[6] = sunDir[2]
ray[7] = np.inf if dist > 0 else 0

normals[i,j,0] = norm[0]
normals[i,j,1] = norm[1]
normals[i,j,2] = norm[2]

def generateShadowRays(rays, hits, normals, H, W, sunDir):
griddim, blockdim = calc_dims((H, W))
_generateShadowRays[griddim, blockdim](rays, hits, normals, H, W, sunDir)
return 0


@nb.cuda.jit
def _shadeLambert(hits, normals, output, H, W, sunDir, castShadows):
"""
This kernel does a simple Lambertian shading
The hits array contains the results of tracing the shadow rays through the scene.
If the value in hits[x,y,0] is > 0, then a valid intersection occurred and that means that the point
at location x,y is in shadow.
The normals array stores the normal at the intersecion point of each camera ray
We then use the information for light visibility and normal to apply Lambert's cosine law
The final result is stored in output which is an RGB array
"""
i, j = nb.cuda.grid(2)
if i>=0 and i < H and j>=0 and j < W:
# Normal at the intersection of camera ray (i,j) with the scene
norm = make_float3(normals[i,j], 0)

# Below is same as existing algorithm without shadows and is OK with shadows.
# Could be improved with a bit of antialiasing at edges of shadow???

light_dir = make_float3(sunDir, 0) # Might have to make it zero if back cull.
cos_theta = dot(light_dir, norm) # light_dir and norm are already normalised.

temp = (cos_theta + 1) / 2

if castShadows and hits[i, j, 0] >= 0:
temp = temp / 2

if temp > 1:
temp = 1
elif temp < 0:
temp = 0

output[i, j] = temp



def shadeLambert(hits, normals, output, H, W, sunDir, castShadows):
griddim, blockdim = calc_dims((H, W))
_shadeLambert[griddim, blockdim](hits, normals, output, H, W, sunDir, castShadows)
return 0


def getSunDir(angle_altitude, azimuth):
"""
Calculate the vector towards the sun based on sun altitude angle and azimuth
"""
north = (0,1,0)
rx = R.from_euler('x', angle_altitude, degrees=True)
rz = R.from_euler('z', azimuth+180, degrees=True)
sunDir = rx.apply(north)
sunDir = rz.apply(sunDir)
return sunDir


def hillshade_rt(raster: xr.DataArray,
optix: RTX,
shadows: bool = False,
azimuth: int = 225,
angle_altitude: int = 25,
name: Optional[str] = 'hillshade') -> xr.DataArray:

H,W = raster.shape
sunDir = cupy.array(getSunDir(angle_altitude, azimuth))

#output = np.zeros((H,W,3), np.float32)

# Device buffers
d_rays = cupy.empty((H,W,8), np.float32)
d_hits = cupy.empty((H,W,4), np.float32)
d_aux = cupy.empty((H,W,3), np.float32)
d_output = cupy.empty((H,W), np.float32)

y_coords = cupy.array(raster.indexes.get('y').values)
x_coords = cupy.array(raster.indexes.get('x').values)

generatePrimaryRays(d_rays, x_coords, y_coords, H, W)
device = cupy.cuda.Device(0)
device.synchronize()
res = optix.trace(d_rays, d_hits, W*H)

generateShadowRays(d_rays, d_hits, d_aux, H, W, sunDir)
if shadows:
device.synchronize()
res = optix.trace(d_rays, d_hits, W*H)

shadeLambert(d_hits, d_aux, d_output, H, W, sunDir, shadows)

if isinstance(raster.data, np.ndarray):
output = cupy.asnumpy(d_output[:, :])
nanValue = np.nan
else:
output = d_output[:, :]
nanValue = cupy.nan

output[0, :] = nanValue
output[-1, :] = nanValue
output[:, 0] = nanValue
output[:, -1] = nanValue

hill = xr.DataArray(output,
name=name,
coords=raster.coords,
dims=raster.dims,
attrs=raster.attrs)
return hill

def hillshade_gpu(raster: xr.DataArray,
shadows: bool = False,
azimuth: int = 225,
angle_altitude: int = 25,
name: Optional[str] = 'hillshade') -> xr.DataArray:
# Move the terrain to GPU for testing the GPU path
if not isinstance(raster.data, cupy.ndarray):
print("WARNING: raster.data is not a cupy array. Additional overhead will be incurred")
H,W = raster.shape
optix = RTX()

datahash = np.uint64(hash(str(raster.data.get())))
optixhash = np.uint64(optix.getHash())
if (optixhash != datahash):
numTris = (H - 1) * (W - 1) * 2
verts = cupy.empty(H * W * 3, np.float32)
triangles = cupy.empty(numTris * 3, np.int32)

# Generate a mesh from the terrain (buffers are on the GPU, so generation happens also on GPU)
res = mesh_utils.triangulateTerrain(verts, triangles, raster)
if res:
raise ValueError("Failed to generate mesh from terrain. Error code:{}".format(res))
res = optix.build(datahash, verts, triangles)
if res:
raise ValueError("OptiX failed to build GAS with error code:{}".format(res))
#Clear some GPU memory that we no longer need
verts = None
triangles = None
cupy.get_default_memory_pool().free_all_blocks()

hill = hillshade_rt(raster, optix, azimuth=azimuth, angle_altitude=angle_altitude, shadows=shadows, name=name)
return hill
Loading
Loading