diff --git a/experiments/vmath_simd.nim b/experiments/vmath_simd.nim new file mode 100644 index 0000000..5b1d74f --- /dev/null +++ b/experiments/vmath_simd.nim @@ -0,0 +1,70 @@ +when not defined(amd64): + {.error: "experiments/vmath_simd.nim is an amd64 SIMD-only experiment".} + +when defined(gcc) or defined(clang): + {.passc: "-msse4.1 -march=native -mtune=native -ffast-math".} + +import nimsimd/sse41 + +type + Vec3* = M128 + +template vec3*[T: SomeNumber](x, y, z: T): Vec3 = + mm_set_ps(float32(z), float32(z), float32(y), float32(x)) + +template vec3*[T: SomeNumber](v: T): Vec3 = + vec3(v, v, v) + +template vec3*(): Vec3 = + vec3(0'f32) + +template x*(a: Vec3): float32 = + mm_cvtss_f32(a) + +template y*(a: Vec3): float32 = + mm_cvtss_f32(mm_shuffle_ps(a, a, MM_SHUFFLE(1, 1, 1, 1))) + +template z*(a: Vec3): float32 = + mm_cvtss_f32(mm_shuffle_ps(a, a, MM_SHUFFLE(2, 2, 2, 2))) + +template `+`*(a, b: Vec3): Vec3 = + mm_add_ps(a, b) + +template `-`*(a, b: Vec3): Vec3 = + mm_sub_ps(a, b) + +template `-`*(a: Vec3): Vec3 = + mm_sub_ps(mm_setzero_ps(), a) + +template `*`*(a, b: Vec3): Vec3 = + mm_mul_ps(a, b) + +template `*`*(a: Vec3, b: float32): Vec3 = + mm_mul_ps(a, mm_set1_ps(b)) + +template `*`*(a: float32, b: Vec3): Vec3 = + b * a + +template `/`*(a: Vec3, b: float32): Vec3 = + mm_div_ps(a, mm_set1_ps(b)) + +template dot*(a, b: Vec3): float32 = + mm_cvtss_f32(mm_dp_ps(a, b, 0x7f)) + +template lengthSq*(a: Vec3): float32 = + a.dot(a) + +template length*(a: Vec3): float32 = + mm_cvtss_f32(mm_sqrt_ss(mm_dp_ps(a, a, 0x7f))) + +template normalize*(a: Vec3): Vec3 = + mm_div_ps(a, mm_sqrt_ps(mm_dp_ps(a, a, 0x7f))) + +template cross*(a, b: Vec3): Vec3 = + block: + var t1 = mm_shuffle_ps(b, b, MM_SHUFFLE(0, 0, 2, 1)) + t1 = mm_mul_ps(t1, a) + var t2 = mm_shuffle_ps(a, a, MM_SHUFFLE(0, 0, 2, 1)) + t2 = mm_mul_ps(t2, b) + let t3 = mm_sub_ps(t1, t2) + mm_shuffle_ps(t3, t3, MM_SHUFFLE(0, 0, 2, 1)) diff --git a/tests/bench_raytracer.nim b/tests/bench_raytracer.nim index c334715..9c7c965 100644 --- a/tests/bench_raytracer.nim +++ b/tests/bench_raytracer.nim @@ -6,7 +6,7 @@ import std/math, benchy, chroma, pixie, vmath -{.push inline, noinit, checks: off.} +{.push inline, checks: off.} type SurfaceType = enum @@ -21,7 +21,7 @@ type Ray = object start, dir: Vec3 - Thing = ref object + Thing = object surfaceType: SurfaceType case objectType: ObjectType of Sphere: @@ -32,7 +32,7 @@ type offset: float32 Intersection = object - thing: Thing + thingIdx: int ray: Ray dist: float32 @@ -40,7 +40,7 @@ type pos: Vec3 color: Color - Scene = ref object + Scene = object maxDepth: int things: seq[Thing] lights: seq[Light] @@ -84,6 +84,8 @@ proc getNormal(obj: Thing, pos: Vec3): Vec3 = return obj.normal proc objectIntersect(obj: Thing, ray: Ray): Intersection = + result.thingIdx = -1 + result.dist = 0.0 case obj.objectType: of Sphere: let @@ -95,14 +97,12 @@ proc objectIntersect(obj: Thing, ray: Ray): Intersection = if disc >= 0: dist = v - sqrt(disc) if dist != 0.0: - result.thing = obj result.ray = ray result.dist = dist of Plane: let denom = obj.normal.dot(ray.dir) if denom <= 0: result.dist = (obj.normal.dot(ray.start) + obj.offset) / (-denom) - result.thing = obj result.ray = ray proc newSphere(center: Vec3, radius: float32, surfaceType: SurfaceType): Thing = @@ -132,7 +132,6 @@ proc getSurfaceProperties(obj: Thing, pos: Vec3): SurfaceProperties = result.roughness = 150.0 proc newScene(): Scene = - result = Scene() result.maxDepth = 5 result.things = @[ newPlane(vec3(0.0, 1.0, 0.0), 0.0, CheckerBoardSurface), @@ -149,16 +148,18 @@ proc newScene(): Scene = proc intersections(scene: Scene, ray: Ray): Intersection = var closest: float32 = farAway - result.thing = nil - for thing in scene.things: + result.thingIdx = -1 + result.dist = 0.0 + for i, thing in scene.things: let intersect = objectIntersect(thing, ray) - if (not isNil(intersect.thing)) and (intersect.dist < closest): + if intersect.dist != 0.0 and intersect.dist < closest: result = intersect + result.thingIdx = i closest = intersect.dist proc testRay(scene: Scene, ray: Ray): float32 = let intersection = scene.intersections(ray) - if not isNil(intersection.thing): + if intersection.thingIdx >= 0: return intersection.dist return NaN @@ -166,7 +167,7 @@ proc shade(scene: Scene, intersection: Intersection, depth: int): Color proc traceRay(scene: Scene, ray: Ray, depth: int): Color = let intersection = intersections(scene, ray) - if not isNil(intersection.thing): + if intersection.thingIdx >= 0: return scene.shade(intersection, depth) return background @@ -213,19 +214,20 @@ proc getNaturalColor(scene: Scene, thing: Thing, pos, norm, result = result + lightColor + specularColor proc shade(scene: Scene, intersection: Intersection, depth: int): Color = + let thing = scene.things[intersection.thingIdx] var dir = intersection.ray.dir scaled = dir * intersection.dist pos = scaled + intersection.ray.start - normal = intersection.thing.getNormal(pos) + normal = thing.getNormal(pos) reflectDir = dir - (normal * normal.dot(dir) * 2) - naturalColor = background + getNaturalColor(scene, intersection.thing, + naturalColor = background + getNaturalColor(scene, thing, pos, normal, reflectDir) reflectedColor: Color if depth >= scene.maxDepth: reflectedColor = grey else: - reflectedColor = getReflectionColor(scene, intersection.thing, pos, normal, + reflectedColor = getReflectionColor(scene, thing, pos, normal, reflectDir, depth) return naturalColor + reflectedColor diff --git a/tests/bench_raytracer_glm.nim b/tests/bench_raytracer_glm.nim index f88f2a6..66a3e3a 100644 --- a/tests/bench_raytracer_glm.nim +++ b/tests/bench_raytracer_glm.nim @@ -9,7 +9,7 @@ from pixie import Image, newImage, writeFile, dataIndex type Vec3 = glm.Vec3[float32] -{.push inline, noinit, checks: off.} +{.push inline, checks: off.} type SurfaceType = enum @@ -24,7 +24,7 @@ type Ray = object start, dir: Vec3 - Thing = ref object + Thing = object surfaceType: SurfaceType case objectType: ObjectType of Sphere: @@ -35,7 +35,7 @@ type offset: float32 Intersection = object - thing: Thing + thingIdx: int ray: Ray dist: float32 @@ -43,7 +43,7 @@ type pos: Vec3 color: Color - Scene = ref object + Scene = object maxDepth: int things: seq[Thing] lights: seq[Light] @@ -87,6 +87,8 @@ proc getNormal(obj: Thing, pos: Vec3): Vec3 = return obj.normal proc objectIntersect(obj: Thing, ray: Ray): Intersection = + result.thingIdx = -1 + result.dist = 0.0 case obj.objectType: of Sphere: let @@ -98,14 +100,12 @@ proc objectIntersect(obj: Thing, ray: Ray): Intersection = if disc >= 0: dist = v - sqrt(disc) if dist != 0.0: - result.thing = obj result.ray = ray result.dist = dist of Plane: let denom = obj.normal.dot(ray.dir) if denom <= 0: result.dist = (obj.normal.dot(ray.start) + obj.offset) / (-denom) - result.thing = obj result.ray = ray proc newSphere(center: Vec3, radius: float32, surfaceType: SurfaceType): Thing = @@ -135,7 +135,6 @@ proc getSurfaceProperties(obj: Thing, pos: Vec3): SurfaceProperties = result.roughness = 150.0 proc newScene(): Scene = - result = Scene() result.maxDepth = 5 result.things = @[ newPlane(vec3(0.0f, 1.0f, 0.0f), 0.0, CheckerBoardSurface), @@ -152,16 +151,18 @@ proc newScene(): Scene = proc intersections(scene: Scene, ray: Ray): Intersection = var closest: float32 = farAway - result.thing = nil - for thing in scene.things: + result.thingIdx = -1 + result.dist = 0.0 + for i, thing in scene.things: let intersect = objectIntersect(thing, ray) - if (not isNil(intersect.thing)) and (intersect.dist < closest): + if intersect.dist != 0.0 and intersect.dist < closest: result = intersect + result.thingIdx = i closest = intersect.dist proc testRay(scene: Scene, ray: Ray): float32 = let intersection = scene.intersections(ray) - if not isNil(intersection.thing): + if intersection.thingIdx >= 0: return intersection.dist return NaN @@ -169,7 +170,7 @@ proc shade(scene: Scene, intersection: Intersection, depth: int): Color proc traceRay(scene: Scene, ray: Ray, depth: int): Color = let intersection = intersections(scene, ray) - if not isNil(intersection.thing): + if intersection.thingIdx >= 0: return scene.shade(intersection, depth) return background @@ -216,19 +217,20 @@ proc getNaturalColor(scene: Scene, thing: Thing, pos, norm, result = result + lightColor + specularColor proc shade(scene: Scene, intersection: Intersection, depth: int): Color = + let thing = scene.things[intersection.thingIdx] var dir = intersection.ray.dir scaled = dir * intersection.dist pos = scaled + intersection.ray.start - normal = intersection.thing.getNormal(pos) + normal = thing.getNormal(pos) reflectDir = dir - (normal * normal.dot(dir) * 2) - naturalColor = background + getNaturalColor(scene, intersection.thing, + naturalColor = background + getNaturalColor(scene, thing, pos, normal, reflectDir) reflectedColor: Color if depth >= scene.maxDepth: reflectedColor = grey else: - reflectedColor = getReflectionColor(scene, intersection.thing, pos, normal, + reflectedColor = getReflectionColor(scene, thing, pos, normal, reflectDir, depth) return naturalColor + reflectedColor diff --git a/tests/bench_raytracer_simd.nim b/tests/bench_raytracer_simd.nim new file mode 100644 index 0000000..47286e5 --- /dev/null +++ b/tests/bench_raytracer_simd.nim @@ -0,0 +1,272 @@ +## Based on the work of https://github.com/edin/raytracer +## MIT License +## Copyright (c) 2021 Edin Omeragic + +import + std/math, + benchy, chroma, pixie +import ../experiments/vmath_simd as vsimd + +{.push inline, checks: off.} + +type + Vec3 = vsimd.Vec3 + SurfaceType = enum + ShinySurface, CheckerBoardSurface + + ObjectType = enum + Sphere, Plane + + Camera = object + forward, right, up, pos: Vec3 + + Ray = object + start, dir: Vec3 + + Thing = object + surfaceType: SurfaceType + case objectType: ObjectType + of Sphere: + center: Vec3 + radius2: float32 + of Plane: + normal: Vec3 + offset: float32 + + Intersection = object + thingIdx: int + ray: Ray + dist: float32 + + Light = object + pos: Vec3 + color: Color + + Scene = object + maxDepth: int + things: seq[Thing] + lights: seq[Light] + camera: Camera + + SurfaceProperties = object + diffuse, specular: Color + reflect, roughness: float32 + +const + farAway: float32 = 1000000.0 + white = color(1.0, 1.0, 1.0) + grey = color(0.5, 0.5, 0.5) + black = color(0.0, 0.0, 0.0) + background = color(0.0, 0.0, 0.0) + defaultColor = color(0.0, 0.0, 0.0) + +proc `*`(c: Color, k: float32): Color = color(k * c.r, k * c.g, k * c.b) +proc `*`(a: Color, b: Color): Color = color(a.r * b.r, a.g * b.g, a.b * b.b) +proc `+`(a: Color, b: Color): Color = color(a.r + b.r, a.g + b.g, a.b + b.b) + +proc newCamera(pos: Vec3, lookAt: Vec3): Camera = + var + down = vsimd.vec3(0.0, -1.0, 0.0) + forward = lookAt - pos + result.pos = pos + result.forward = forward.normalize() + result.right = result.forward.cross(down) + result.up = result.forward.cross(result.right) + let + rightNorm = result.right.normalize() + upNorm = result.up.normalize() + result.right = rightNorm * 1.5 + result.up = upNorm * 1.5 + +proc getNormal(obj: Thing, pos: Vec3): Vec3 = + case obj.objectType: + of Sphere: + return (pos - obj.center).normalize() + of Plane: + return obj.normal + +proc objectIntersect(obj: Thing, ray: Ray): Intersection = + result.thingIdx = -1 + result.dist = 0.0 + case obj.objectType: + of Sphere: + let + eo = obj.center - ray.start + v = eo.dot(ray.dir) + if v >= 0: + var dist = 0.0 + let disc = obj.radius2 - (eo.dot(eo) - (v * v)) + if disc >= 0: + dist = v - sqrt(disc) + if dist != 0.0: + result.ray = ray + result.dist = dist + of Plane: + let denom = obj.normal.dot(ray.dir) + if denom <= 0: + result.dist = (obj.normal.dot(ray.start) + obj.offset) / (-denom) + result.ray = ray + +proc newSphere(center: Vec3, radius: float32, surfaceType: SurfaceType): Thing = + Thing(surfaceType: surfaceType, objectType: Sphere, center: center, + radius2: radius * radius) + +proc newPlane(normal: Vec3, offset: float32, surfaceType: SurfaceType): Thing = + Thing(surfaceType: surfaceType, objectType: Plane, normal: normal, + offset: offset) + +proc getSurfaceProperties(obj: Thing, pos: Vec3): SurfaceProperties = + case obj.surfaceType: + of ShinySurface: + result.diffuse = white + result.specular = grey + result.reflect = 0.7 + result.roughness = 250.0 + of CheckerBoardSurface: + let val = int(floor(pos.z) + floor(pos.x)) + if val mod 2 != 0: + result.reflect = 0.1 + result.diffuse = white + else: + result.reflect = 0.7 + result.diffuse = black + result.specular = white + result.roughness = 150.0 + +proc newScene(): Scene = + result.maxDepth = 5 + result.things = @[ + newPlane(vsimd.vec3(0.0, 1.0, 0.0), 0.0, CheckerBoardSurface), + newSphere(vsimd.vec3(0.0, 1.0, -0.25), 1.0, ShinySurface), + newSphere(vsimd.vec3(-1.0, 0.5, 1.5), 0.5, ShinySurface) + ] + result.lights = @[ + Light(pos: vsimd.vec3(-2.0, 2.5, 0.0), color: color(0.49, 0.07, 0.07)), + Light(pos: vsimd.vec3(1.5, 2.5, 1.5), color: color(0.07, 0.07, 0.49)), + Light(pos: vsimd.vec3(1.5, 2.5, -1.5), color: color(0.07, 0.49, 0.071)), + Light(pos: vsimd.vec3(0.0, 3.5, 0.0), color: color(0.21, 0.21, 0.35)) + ] + result.camera = newCamera(vsimd.vec3(3.0, 2.0, 4.0), vsimd.vec3(-1.0, 0.5, 0.0)) + +proc intersections(scene: Scene, ray: Ray): Intersection = + var closest: float32 = farAway + result.thingIdx = -1 + result.dist = 0.0 + for i, thing in scene.things: + let intersect = objectIntersect(thing, ray) + if intersect.dist != 0.0 and intersect.dist < closest: + result = intersect + result.thingIdx = i + closest = intersect.dist + +proc testRay(scene: Scene, ray: Ray): float32 = + let intersection = scene.intersections(ray) + if intersection.thingIdx >= 0: + return intersection.dist + return NaN + +proc shade(scene: Scene, intersection: Intersection, depth: int): Color + +proc traceRay(scene: Scene, ray: Ray, depth: int): Color = + let intersection = intersections(scene, ray) + if intersection.thingIdx >= 0: + return scene.shade(intersection, depth) + return background + +proc getReflectionColor( + scene: Scene, thing: Thing, pos: Vec3, normal: Vec3, reflectDir: Vec3, + depth: int +): Color = + var + ray: Ray = Ray(start: pos, dir: reflectDir) + color = scene.traceRay(ray, depth + 1) + properties = getSurfaceProperties(thing, pos) + return color * properties.reflect + +proc getNaturalColor(scene: Scene, thing: Thing, pos, norm, + reflectDir: Vec3 +): Color = + result = black + var + reflectDirNorm = reflectDir.normalize() + sp = getSurfaceProperties(thing, pos) + for light in scene.lights: + let + lightDist = light.pos - pos + lightVec = lightDist.normalize() + lightDistLen = lightDist.length() + ray = Ray(start: pos, dir: lightVec) + neatIntersection = scene.testRay(ray) + isInShadow = neatIntersection.classify != fcNan and + neatIntersection <= lightDistLen + + if not isInShadow: + let + illumination = lightVec.dot(norm) + specular = lightVec.dot(reflectDirNorm) + var + lightColor = + if illumination > 0: light.color * illumination + else: defaultColor + specularColor = + if specular > 0: light.color * pow(specular, sp.roughness) + else: defaultColor + lightColor = lightColor * sp.diffuse + specularColor = specularColor * sp.specular + result = result + lightColor + specularColor + +proc shade(scene: Scene, intersection: Intersection, depth: int): Color = + let thing = scene.things[intersection.thingIdx] + var + dir = intersection.ray.dir + scaled = dir * intersection.dist + pos = scaled + intersection.ray.start + normal = thing.getNormal(pos) + reflectDir = dir - (normal * normal.dot(dir) * 2) + naturalColor = background + getNaturalColor(scene, thing, + pos, normal, reflectDir) + reflectedColor: Color + if depth >= scene.maxDepth: + reflectedColor = grey + else: + reflectedColor = getReflectionColor(scene, thing, pos, normal, + reflectDir, depth) + return naturalColor + reflectedColor + +proc getPoint(x, y: int, camera: Camera, screenWidth, screenHeight: int): Vec3 = + var + sw = float32(screenWidth) + sh = float32(screenHeight) + xf = float32(x) + yf = float32(y) + recenterX = (xf - (sw / 2.0)) / 2.0 / sw + recenterY = -(yf - (sh / 2.0)) / 2.0 / sh + vx = camera.right * recenterX + vy = camera.up * recenterY + v = vx + vy + z = camera.forward + v + return z.normalize() + +proc renderScene(scene: Scene, sceneImage: Image) = + var ray: Ray + let + h = sceneImage.height + w = sceneImage.width + ray.start = scene.camera.pos + for y in 0 ..< h: + var pos = y * w + for x in 0 ..< w: + ray.dir = getPoint(x, y, scene.camera, h, w) + sceneImage.unsafe[x, y] = scene.traceRay(ray, 0).asRgbx() + pos = pos + 1 + +proc render(): Image = + var + scene = newScene() + result = newImage(500, 500) + renderScene(scene, result) + +render().writeFile("tests/raytracer_simd.png") + +timeIt "raytracer", 100: + discard render() diff --git a/tests/raytracer.png b/tests/raytracer.png index d5f86f9..6073e89 100644 Binary files a/tests/raytracer.png and b/tests/raytracer.png differ diff --git a/tests/raytracer_simd.png b/tests/raytracer_simd.png new file mode 100644 index 0000000..d651277 Binary files /dev/null and b/tests/raytracer_simd.png differ