From fd74204825abbedcd4d7c85eb9040b48b4d00a94 Mon Sep 17 00:00:00 2001 From: brendancol Date: Mon, 22 Dec 2025 10:17:28 -0500 Subject: [PATCH] added example files --- examples/cuda_utils.py | 46 +++++++++ examples/hillshade.py | 228 +++++++++++++++++++++++++++++++++++++++++ examples/mesh_utils.py | 129 +++++++++++++++++++++++ examples/playground.py | 122 ++++++++++++++++++++++ examples/viewshed.py | 221 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 746 insertions(+) create mode 100644 examples/cuda_utils.py create mode 100644 examples/hillshade.py create mode 100644 examples/mesh_utils.py create mode 100644 examples/playground.py create mode 100644 examples/viewshed.py diff --git a/examples/cuda_utils.py b/examples/cuda_utils.py new file mode 100644 index 0000000..e73690a --- /dev/null +++ b/examples/cuda_utils.py @@ -0,0 +1,46 @@ +import numba as nb +import numpy as np + +def calc_dims(shape): + threadsperblock = (32,32) + blockspergrid = ( + (shape[0] + (threadsperblock[0] - 1)) // threadsperblock[0], + (shape[1] + (threadsperblock[1] - 1)) // threadsperblock[1] + ) + return blockspergrid, threadsperblock + +@nb.cuda.jit(device=True) +def add(a, b): + return float3(a[0]+b[0], a[1]+b[1], a[2]+b[2]) + +@nb.cuda.jit(device=True) +def diff(a, b): + return float3(a[0]-b[0], a[1]-b[1], a[2]-b[2]) + +@nb.cuda.jit(device=True) +def mul(a, b): + return float3(a[0]*b, a[1]*b, a[2]*b) + +@nb.cuda.jit(device=True) +def multColor(a, b): + return float3(a[0]*b[0], a[1]*b[1], a[2]*b[2]) + +@nb.cuda.jit(device=True) +def dot(a, b): + return a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + +@nb.cuda.jit(device=True) +def mix(a, b, k): + return add(mul(a, k), mul(b, 1-k)) + +@nb.cuda.jit(device=True) +def make_float3(a, offset): + return float3(a[offset], a[offset+1], a[offset+2]) + +@nb.cuda.jit(device=True) +def invert(a): + return float3(-a[0], -a[1], -a[2]) + +@nb.cuda.jit(device=True) +def float3(a, b, c): + return (np.float32(a), np.float32(b), np.float32(c)) diff --git a/examples/hillshade.py b/examples/hillshade.py new file mode 100644 index 0000000..9276e1d --- /dev/null +++ b/examples/hillshade.py @@ -0,0 +1,228 @@ +import numpy as np +import numba as nb + +import cupy +import xarray as xr +from rtxpy import RTX + +from typing import Optional + +from scipy.spatial.transform import Rotation as R + +from raytrace.cuda_utils import * +from raytrace import mesh_utils + +@nb.cuda.jit +def _generatePrimaryRays(data, x_coords, y_coords, H, W): + """ + A GPU kernel that given a set of x and y discrete coordinates on a raster terrain + generates in @data a list of parallel rays that represent camera rays generated from an ortographic camera + that is looking straight down at the surface from an origin height 10000 + """ + i, j = nb.cuda.grid(2) + if i>=0 and i < H and j>=0 and j < W: + #data[i,j,0] = j + 1e-6 # x_coords[j] + 1e-6 + #data[i,j,1] = i + 1e-6 # y_coords[i] + 1e-6 + + if (j == W-1): + data[i,j,0] = j - 1e-3 + else: + data[i,j,0] = j + 1e-3 + + if (i == H-1): + data[i,j,1] = i - 1e-3 + else: + data[i,j,1] = i + 1e-3 + + data[i,j,2] = 10000 # Location of the camera (height) + data[i,j,3] = 1e-3 + data[i,j,4] = 0 + data[i,j,5] = 0 + data[i,j,6] = -1 + data[i,j,7] = np.inf + + +def generatePrimaryRays(rays, x_coords, y_coords, H, W): + griddim, blockdim = calc_dims((H, W)) + _generatePrimaryRays[griddim, blockdim](rays, x_coords, y_coords, H, W) + return 0 + + +@nb.cuda.jit +def _generateShadowRays(rays, hits, normals, H, W, sunDir): + """ + A GPU kernel that given a set rays and their respective intersection points, + generates in rays (overwriting the original content) a new set of rays (shadow rays) + That have their origins at the point of intersection of their parent ray and direction - the direction towards the sun + The normals vectors at the point of intersection of the original rays are cached in @normals + Thus we can later use them to do lambertian shading, after the shadow rays have been traced + """ + i, j = nb.cuda.grid(2) + if i>=0 and i < H and j>=0 and j < W: + dist = hits[i,j,0] + norm = make_float3(hits[i,j], 1) + if (norm[2] < 0): + norm = invert(norm) + ray = rays[i,j] + rayOrigin = make_float3(ray, 0) + rayDir = make_float3(ray, 4) + p = add(rayOrigin, mul(rayDir,dist)) + + newOrigin = add(p, mul(norm, 1e-3)) + ray[0] = newOrigin[0] + ray[1] = newOrigin[1] + ray[2] = newOrigin[2] + ray[3] = 1e-3 + ray[4] = sunDir[0] + ray[5] = sunDir[1] + ray[6] = sunDir[2] + ray[7] = np.inf if dist > 0 else 0 + + normals[i,j,0] = norm[0] + normals[i,j,1] = norm[1] + normals[i,j,2] = norm[2] + +def generateShadowRays(rays, hits, normals, H, W, sunDir): + griddim, blockdim = calc_dims((H, W)) + _generateShadowRays[griddim, blockdim](rays, hits, normals, H, W, sunDir) + return 0 + + +@nb.cuda.jit +def _shadeLambert(hits, normals, output, H, W, sunDir, castShadows): + """ + This kernel does a simple Lambertian shading + The hits array contains the results of tracing the shadow rays through the scene. + If the value in hits[x,y,0] is > 0, then a valid intersection occurred and that means that the point + at location x,y is in shadow. + The normals array stores the normal at the intersecion point of each camera ray + We then use the information for light visibility and normal to apply Lambert's cosine law + The final result is stored in output which is an RGB array + """ + i, j = nb.cuda.grid(2) + if i>=0 and i < H and j>=0 and j < W: + # Normal at the intersection of camera ray (i,j) with the scene + norm = make_float3(normals[i,j], 0) + + # Below is same as existing algorithm without shadows and is OK with shadows. + # Could be improved with a bit of antialiasing at edges of shadow??? + + light_dir = make_float3(sunDir, 0) # Might have to make it zero if back cull. + cos_theta = dot(light_dir, norm) # light_dir and norm are already normalised. + + temp = (cos_theta + 1) / 2 + + if castShadows and hits[i, j, 0] >= 0: + temp = temp / 2 + + if temp > 1: + temp = 1 + elif temp < 0: + temp = 0 + + output[i, j] = temp + + + +def shadeLambert(hits, normals, output, H, W, sunDir, castShadows): + griddim, blockdim = calc_dims((H, W)) + _shadeLambert[griddim, blockdim](hits, normals, output, H, W, sunDir, castShadows) + return 0 + + +def getSunDir(angle_altitude, azimuth): + """ + Calculate the vector towards the sun based on sun altitude angle and azimuth + """ + north = (0,1,0) + rx = R.from_euler('x', angle_altitude, degrees=True) + rz = R.from_euler('z', azimuth+180, degrees=True) + sunDir = rx.apply(north) + sunDir = rz.apply(sunDir) + return sunDir + + +def hillshade_rt(raster: xr.DataArray, + optix: RTX, + shadows: bool = False, + azimuth: int = 225, + angle_altitude: int = 25, + name: Optional[str] = 'hillshade') -> xr.DataArray: + + H,W = raster.shape + sunDir = cupy.array(getSunDir(angle_altitude, azimuth)) + + #output = np.zeros((H,W,3), np.float32) + + # Device buffers + d_rays = cupy.empty((H,W,8), np.float32) + d_hits = cupy.empty((H,W,4), np.float32) + d_aux = cupy.empty((H,W,3), np.float32) + d_output = cupy.empty((H,W), np.float32) + + y_coords = cupy.array(raster.indexes.get('y').values) + x_coords = cupy.array(raster.indexes.get('x').values) + + generatePrimaryRays(d_rays, x_coords, y_coords, H, W) + device = cupy.cuda.Device(0) + device.synchronize() + res = optix.trace(d_rays, d_hits, W*H) + + generateShadowRays(d_rays, d_hits, d_aux, H, W, sunDir) + if shadows: + device.synchronize() + res = optix.trace(d_rays, d_hits, W*H) + + shadeLambert(d_hits, d_aux, d_output, H, W, sunDir, shadows) + + if isinstance(raster.data, np.ndarray): + output = cupy.asnumpy(d_output[:, :]) + nanValue = np.nan + else: + output = d_output[:, :] + nanValue = cupy.nan + + output[0, :] = nanValue + output[-1, :] = nanValue + output[:, 0] = nanValue + output[:, -1] = nanValue + + hill = xr.DataArray(output, + name=name, + coords=raster.coords, + dims=raster.dims, + attrs=raster.attrs) + return hill + +def hillshade_gpu(raster: xr.DataArray, + shadows: bool = False, + azimuth: int = 225, + angle_altitude: int = 25, + name: Optional[str] = 'hillshade') -> xr.DataArray: + # Move the terrain to GPU for testing the GPU path + if not isinstance(raster.data, cupy.ndarray): + print("WARNING: raster.data is not a cupy array. Additional overhead will be incurred") + H,W = raster.shape + optix = RTX() + + datahash = np.uint64(hash(str(raster.data.get()))) + optixhash = np.uint64(optix.getHash()) + if (optixhash != datahash): + numTris = (H - 1) * (W - 1) * 2 + verts = cupy.empty(H * W * 3, np.float32) + triangles = cupy.empty(numTris * 3, np.int32) + + # Generate a mesh from the terrain (buffers are on the GPU, so generation happens also on GPU) + res = mesh_utils.triangulateTerrain(verts, triangles, raster) + if res: + raise ValueError("Failed to generate mesh from terrain. Error code:{}".format(res)) + res = optix.build(datahash, verts, triangles) + if res: + raise ValueError("OptiX failed to build GAS with error code:{}".format(res)) + #Clear some GPU memory that we no longer need + verts = None + triangles = None + cupy.get_default_memory_pool().free_all_blocks() + + hill = hillshade_rt(raster, optix, azimuth=azimuth, angle_altitude=angle_altitude, shadows=shadows, name=name) + return hill diff --git a/examples/mesh_utils.py b/examples/mesh_utils.py new file mode 100644 index 0000000..e847558 --- /dev/null +++ b/examples/mesh_utils.py @@ -0,0 +1,129 @@ + +import numba as nb +import numpy as np +import cupy +from raytrace.cuda_utils import calc_dims + +@nb.cuda.jit +def _triangulateTerrain(verts, triangles, data, H, W, scale, stride): + globalId = stride + nb.cuda.grid(1) + if globalId < W*H: + h = globalId // W + w = globalId % W + meshMapIndex = h * W + w + + val = data[h,w] + + offset = 3*meshMapIndex + verts[offset] = w # x_coords[w] # w + verts[offset+1] = h # y_coords[h] # h + verts[offset+2] = val * scale + + if w != W - 1 and h != H - 1: + offset = 6*(h * (W-1) + w) + triangles[offset+0]= np.int32(meshMapIndex + W) + triangles[offset+1]= np.int32(meshMapIndex + W + 1) + triangles[offset+2]= np.int32(meshMapIndex) + triangles[offset+3]= np.int32(meshMapIndex + W + 1) + triangles[offset+4]= np.int32(meshMapIndex + 1) + triangles[offset+5]= np.int32(meshMapIndex) + +@nb.njit(parallel=True) +def triangulateCPU(verts, triangles, data, H, W, scale): + for h in nb.prange(H): + for w in range(W): + meshMapIndex = h * W + w + + val = data[h,w] + + offset = 3*meshMapIndex + verts[offset] = w # x_coords[w] # w + verts[offset+1] = h # y_coords[h] # h + verts[offset+2] = val * scale + + if w != W - 1 and h != H - 1: + offset = 6*(h * (W-1) + w) + triangles[offset+0]= np.int32(meshMapIndex + W) + triangles[offset+1]= np.int32(meshMapIndex + W + 1) + triangles[offset+2]= np.int32(meshMapIndex) + triangles[offset+3]= np.int32(meshMapIndex + W+1) + triangles[offset+4]= np.int32(meshMapIndex + 1) + triangles[offset+5]= np.int32(meshMapIndex) + + +def triangulateTerrain(verts, triangles, terrain, scale=1): + H,W = terrain.shape + if isinstance(terrain.data, np.ndarray): + triangulateCPU(verts, triangles, terrain.data, H, W, scale) + if isinstance(terrain.data, cupy.ndarray): + jobSize = H*W + blockdim = 1024 + griddim = (jobSize + blockdim - 1) // 1024 + d = 100 + offset = 0 + while(jobSize>0): + batch = min(d, griddim) + _triangulateTerrain[batch, blockdim](verts, triangles, terrain.data, H, W, scale, offset) + offset += batch*blockdim + jobSize -= batch*blockdim + return 0 + + +@nb.jit(nopython=True) +def fillContents(content, verts, triangles, numTris): + v = np.empty(12, np.float32) + pad = np.zeros(2, np.int8) + offset = 0 + for i in range(numTris): + t0 = triangles[3*i+0] + t1 = triangles[3*i+1] + t2 = triangles[3*i+2] + v[3*0+0] = 0 + v[3*0+1] = 0 + v[3*0+2] = 0 + v[3*1+0] = verts[3*t0+0] + v[3*1+1] = verts[3*t0+1] + v[3*1+2] = verts[3*t0+2] + v[3*2+0] = verts[3*t1+0] + v[3*2+1] = verts[3*t1+1] + v[3*2+2] = verts[3*t1+2] + v[3*3+0] = verts[3*t2+0] + v[3*3+1] = verts[3*t2+1] + v[3*3+2] = verts[3*t2+2] + + offset = 50*i + content[offset:offset+48] = v.view(np.uint8) + content[offset+48:offset+50] = pad + +def write(name, verts, triangles): + """ + Save a triangulated raster to a standard STL file. + Windows has a default STL viewer and probably all 3D viewers have native support for it + because of its simplicity. Can be used to verify the correctness of the algorithm or + to visualize the mesh to get a notion of the size/complexity etc. + @param name - The name of the mesh file we're going to save. Should end in .stl + @param verts - A numpy array containing all the vertices of the mesh. Format is 3 float32 per vertex (vertex buffer) + @param triangles - A numpy array containing all the triangles of the mesh. Format is 3 int32 per triangle (index buffer) + """ + ib = triangles + vb = verts + if isinstance(ib, cupy.ndarray): + ib = cupy.asnumpy(ib) + if isinstance(vb, cupy.ndarray): + vb = cupy.asnumpy(vb) + + header = np.zeros(80, np.uint8) + nf = np.empty(1, np.uint32) + numTris = triangles.shape[0] // 3 + nf[0] = numTris + f=open(name,'wb') + f.write(header) + f.write(nf) + + # size of 1 triangle in STL is 50 bytes + # 12 floats (each 4 bytes) for a total of 48 + # And additional 2 bytes for padding + content = np.empty(numTris*(50), np.uint8) + fillContents(content, vb, ib, numTris) + f.write(content) + f.close() \ No newline at end of file diff --git a/examples/playground.py b/examples/playground.py new file mode 100644 index 0000000..271ad16 --- /dev/null +++ b/examples/playground.py @@ -0,0 +1,122 @@ +from datashader.transfer_functions import shade +from datashader.transfer_functions import stack +from datashader.transfer_functions import dynspread + +import matplotlib.pyplot as plt + +import numpy as np +import cupy + +import os, sys +#Add the parent folder to python search space +sys.path.append(os.path.abspath(os.getcwd())+"/..") +sys.path.append(os.path.abspath(os.getcwd())) + +from raytrace import hillshade_gpu +from raytrace import viewshed_gpu + +from rtxpy import RTX + +import xarray as xr; + +from raytrace.hillshade import getSunDir +#getSunDir(angle_altitude, azimuth) + +#terrain = xr.open_dataarray("olympic_national_park.nc") +#terrain.data = terrain.data * 0.025 #scale down +terrain = xr.open_dataarray("crater_lake_national_park.nc") + +terrain = terrain[::2, ::2] + +terrain.data = terrain.data * 0.2 #scale down + +azimuth = 225 + + +def debug(x,y): + rays = cupy.float32([x,y,10000,0,0,0,-1,np.inf]) + hits = cupy.float32([0,0,0,0]) + optix = RTX() + res = optix.trace(rays, hits, 1) + norm = cupy.asnumpy(hits[1:]) + sun = getSunDir(25, azimuth) + + return res + +def onclick(event): + """ + Simple click handler for live adjustment of the viewshed origin when running matplotlib live session + """ + ix, iy = event.xdata, event.ydata + print ('x = {}, y = {}'.format(ix, iy)) + + nix = ix/terrain.shape[1] + niy = iy/terrain.shape[0] + + x_coords = terrain.indexes.get('x').values + y_coords = terrain.indexes.get('y').values + rangex = x_coords.max() - x_coords.min() + rangey = y_coords.max() - y_coords.min() + + global vsw, vsh + vsw = x_coords.min() + nix*rangex + vsh = y_coords.max() - niy*rangey + + #debug(ix, iy) + return None + +def test(): + runs = 360 + H,W = terrain.data.shape + if isinstance(terrain.data, np.ndarray): + terrain.data = cupy.array(terrain.data) + + fig = plt.figure() + cid = fig.canvas.mpl_connect('button_press_event', onclick) + colors = np.uint8(np.zeros((H,W,3))) + imgplot = plt.imshow(colors) + + x_coords = terrain.indexes.get('x').values + y_coords = terrain.indexes.get('y').values + midx = x_coords.min() + (x_coords.max() - x_coords.min()) / 2 + midy = y_coords.min() + (y_coords.max() - y_coords.min()) / 2 + + global vsw, vsh + global azimuth + vsw = midx + vsh = midy + + import time + for i in range(runs): + print("Begin Frame ", i) + azimuth = azimuth + 5 + if (azimuth > 360): + azimuth -= 360 + + beforeRT = time.time() + hs = hillshade_gpu(terrain, shadows=True, azimuth=azimuth, angle_altitude=25) + vs = viewshed_gpu(terrain, x=vsw, y=vsh, observer_elev=0.01) + afterRT = time.time() + print(" RT took ", afterRT-beforeRT) + + img = np.uint8(hs.data.get()*225) + + withViewshed = True#False + if withViewshed: + visBuf = np.uint8(vs.data.get() > 0) * 255 + view = np.maximum(visBuf,img) + else: + view = img + colors[:,:,0] = view + colors[:,:,1] = img + colors[:,:,2] = img + imgplot.set_data(colors) + plt.pause(0.001) + + plt.show() + + return + +res = test() + +print("Done") diff --git a/examples/viewshed.py b/examples/viewshed.py new file mode 100644 index 0000000..502c7d9 --- /dev/null +++ b/examples/viewshed.py @@ -0,0 +1,221 @@ +import numpy as np +import numba as nb + +import cupy +import xarray as xr +from rtxpy import RTX +import math + +from typing import Union + +from scipy.spatial.transform import Rotation as R + +from raytrace.cuda_utils import * +from raytrace import mesh_utils + +# view options default values +OBS_ELEV = 0 +TARGET_ELEV = 0 + +# if a cell is invisible, its value is set to -1 +INVISIBLE = -1 + +@nb.cuda.jit +def _generatePrimaryRays(data, x_coords, y_coords, H, W): + i, j = nb.cuda.grid(2) + if i>=0 and i < H and j>=0 and j < W: +# if (j == W-1): +# data[i,j,0] = y_coords[j] - 1e-6 +# else: +# data[i,j,0] = y_coords[j] + 1e-6 +# +# if (i == H-1): +# data[i,j,1] = x_coords[i] - 1e-6 +# else: +# data[i,j,1] = x_coords[i] + 1e-6 + + if (j == W-1): + data[i,j,0] = j - 1e-3 + else: + data[i,j,0] = j + 1e-3 + + if (i == H-1): + data[i,j,1] = i - 1e-3 + else: + data[i,j,1] = i + 1e-3 + + data[i,j,2] = 10000 # Location of the camera (height) + data[i,j,3] = 1e-3 + data[i,j,4] = 0 + data[i,j,5] = 0 + data[i,j,6] = -1 + data[i,j,7] = np.inf + +def generatePrimaryRays(rays, x_coords, y_coords, H, W): + griddim, blockdim = calc_dims((H, W)) + d_y_coords = cupy.array(y_coords) + d_x_coords = cupy.array(x_coords) + _generatePrimaryRays[griddim, blockdim](rays, d_x_coords, d_y_coords, H, W) + return 0 + +@nb.cuda.jit +def _generateViewshedRays(camRays, hits, vsrays, visibility_grid, H, W, vp): + i, j = nb.cuda.grid(2) + if i>=0 and i < H and j>=0 and j < W: + elevationOffset = vp[2] + targetElevation = vp[3] + dist = hits[i,j,0] # distance to surface from camera + norm = make_float3(hits[i,j], 1) # normal vector at intersection with surface + #inorm = norm + if (norm[2] < 0): # if back hit, face forward + norm = invert(norm) + cameraRay = camRays[i,j] + rayOrigin = make_float3(cameraRay, 0) # get the camera ray origin + rayDir = make_float3(cameraRay, 4) # get the camera ray direction + hitP = add(rayOrigin, mul(rayDir,dist)) # calculate intersection point + newOrigin = add(hitP, mul(norm, 1e-3)) # generate new ray origin, and a little offset to avoid self-intersection + newOrigin = add(newOrigin, float3(0, 0, targetElevation)) # move the new origin up by the selected by user targetElevation factor + + w = int(vp[0]) + h = int(vp[1]) + viewshedRay = camRays[h,w] # get the camera ray that was cast for the location of the viewshed origin + dist = hits[h,w,0] # get the distance from the camera to the viewshed point + rayOrigin = make_float3(viewshedRay, 0) # get the origin on the camera of the ray towards VP point + rayDir = make_float3(viewshedRay, 4) # get the direction from camera to VP point + hitP = add(rayOrigin, mul(rayDir,dist)) # calculate distance from camera to VP + viewshedPoint = add(hitP, float3(0, 0, elevationOffset)) # calculate the VP location on the surface and add the VP offset + + newDir = diff(viewshedPoint, newOrigin) # calculate vector from SurfaceHit to VP + length = math.sqrt(dot(newDir, newDir)) # calculate distance from surface to VP + newDir = mul(newDir, 1/length) # normalize the direction (vector v) + + cosine = dot(norm, newDir) # cosine of the angle between n and v + theta = math.acos(cosine) # Cosine angle in radians + theta = (180*theta)/math.pi # Cosine angle in degrees + + # prepare a viewshed ray to cast to determine visibility + vsray = vsrays[i,j] + vsray[0] = newOrigin[0] + vsray[1] = newOrigin[1] + vsray[2] = newOrigin[2] + vsray[3] = 0 + vsray[4] = newDir[0] + vsray[5] = newDir[1] + vsray[6] = newDir[2] + vsray[7] = length + + visibility_grid[i,j] = theta + +def generateViewshedRays(rays, hits, vsrays, visibility_grid, H, W, vp): + griddim, blockdim = calc_dims((H, W)) + _generateViewshedRays[griddim, blockdim](rays, hits, vsrays, visibility_grid, H, W, vp) + return 0 + +@nb.cuda.jit +def _calcViewshed(hits, visibility_grid, H, W): + i, j = nb.cuda.grid(2) + if i>=0 and i < H and j>=0 and j < W: + dist = hits[i,j,0] + # We traced the viewshed rays and now hits contains the intersection data + # if dist > 0, then we were able to hit something along the length of the ray + # which means that the pixel we targeted is not directly visible from the view point + if dist>=0: + visibility_grid[i,j] = INVISIBLE + +def calcViewshed(hits, visibility_grid, H, W): + griddim, blockdim = calc_dims((H, W)) + _calcViewshed[griddim, blockdim](hits, visibility_grid, H, W) + return 0 + +def viewshed_rt(raster: xr.DataArray, + optix: RTX, + x: Union[int, float], + y: Union[int, float], + observer_elev: float = OBS_ELEV, + target_elev: float = TARGET_ELEV) -> xr.DataArray: + + H,W = raster.shape + + y_coords = raster.indexes.get('y').values + x_coords = raster.indexes.get('x').values + + # validate x arg + if x < x_coords.min(): + raise ValueError("x argument outside of raster x_range") + elif x > x_coords.max(): + raise ValueError("x argument outside of raster x_range") + + # validate y arg + if y < y_coords.min(): + raise ValueError("y argument outside of raster y_range") + elif y > y_coords.max(): + raise ValueError("y argument outside of raster y_range") + + selection = raster.sel(x=[x], y=[y], method='nearest') + x = selection.x.values[0] + y = selection.y.values[0] + + y_view = np.where(y_coords == y)[0][0] + x_view = np.where(x_coords == x)[0][0] + + # Device buffers + d_rays = cupy.empty((H,W,8), np.float32) + d_hits = cupy.empty((H,W,4), np.float32) + d_visgrid = cupy.empty((H,W), np.float32) + d_vsrays = cupy.empty((H,W,8), np.float32) + + generatePrimaryRays(d_rays, x_coords, y_coords, H, W) + device = cupy.cuda.Device(0) + device.synchronize() + res = optix.trace(d_rays, d_hits, W*H) + + generateViewshedRays(d_rays, d_hits, d_vsrays, d_visgrid, H, W, (x_view, y_view, observer_elev, target_elev)) + device.synchronize() + res = optix.trace(d_vsrays, d_hits, W*H) + + calcViewshed(d_hits, d_visgrid, H, W) + + if isinstance(raster.data, np.ndarray): + visgrid = cupy.asnumpy(d_visgrid) + else: + visgrid = d_visgrid + + view = xr.DataArray(visgrid, + name="viewshed", + coords=raster.coords, + dims=raster.dims, + attrs=raster.attrs) + return view + +def viewshed_gpu(raster: xr.DataArray, + x: Union[int, float], + y: Union[int, float], + observer_elev: float = OBS_ELEV, + target_elev: float = TARGET_ELEV) -> xr.DataArray: + if not isinstance(raster.data, cupy.ndarray): + raise ValueError("raster.data must be a cupy array") + + H,W = raster.shape + optix = RTX() + + datahash = np.uint64(hash(str(raster.data.get()))) + optixhash = np.uint64(optix.getHash()) + if (optixhash != datahash): + numTris = (H - 1) * (W - 1) * 2 + verts = cupy.empty(H * W * 3, np.float32) + triangles = cupy.empty(numTris * 3, np.int32) + + # Generate a mesh from the terrain (buffers are on the GPU, so generation happens also on GPU) + res = mesh_utils.triangulateTerrain(verts, triangles, raster) + if res: + raise ValueError("Failed to generate mesh from terrain. Error code:{}".format(res)) + res = optix.build(datahash, verts, triangles) + if res: + raise ValueError("OptiX failed to build GAS with error code:{}".format(res)) + #Clear some GPU memory that we no longer need + verts = None + triangles = None + cupy.get_default_memory_pool().free_all_blocks() + + view = viewshed_rt(raster, optix, x, y, observer_elev, target_elev) + return view \ No newline at end of file