From 4468c7c9b127a6be7e6db3f73f6097dd0f33473b Mon Sep 17 00:00:00 2001 From: Clement Verrier <17490658+cverrier@users.noreply.github.com> Date: Wed, 9 Jul 2025 10:34:43 +0800 Subject: [PATCH] docs: update code generation The documentation for code generation has gone out of date, and the provided code was throwing errors. Update the documentation to the most recent changes (from commit tinygrad/tinygrad@2893feb9f6f3c7eed825494e51a9a9e84c6b8a2e). --- 20241231_intro.md | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/20241231_intro.md b/20241231_intro.md index 158d11a..488263c 100644 --- a/20241231_intro.md +++ b/20241231_intro.md @@ -170,23 +170,20 @@ progressively "lowered" into a form that can be used for actual code generation. ## Code generation The UOp used for code generation is contains much more details and is of lower level. Here I have built an example that you can use -to play around. If things go out of date, commit id is: ae00fa3b2833dbe0595d54d5fb0b679e1731ae01 +to play around. If things go out of date, please refer to commit `tinygrad/tinygrad@2893feb9f6f3c7eed825494e51a9a9e84c6b8a2e`. Suppose we just want to add two numbers: ```python from tinygrad.renderer.cstyle import MetalRenderer -from tinygrad.ops import UOp, Ops -from tinygrad import dtypes +from tinygrad.uop import Ops +from tinygrad import UOP, dtypes const = UOp(Ops.CONST, dtypes.float, arg=1.0) add = UOp(Ops.ADD, dtypes.float, src=(const, const), arg=None) print(add) -print(MetalRenderer().render("example", [ - const, - add -])) +print(MetalRenderer().render([const, add])) ``` The `add` variable shows something like: @@ -203,7 +200,7 @@ let's see the rendered code: ```c++ #include using namespace metal; -kernel void example(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) { +kernel void test(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) { float alu0 = (1.0f+1.0f); } ``` @@ -212,18 +209,15 @@ Let me show you the CUDA version also, where you would replace the import: ```python from tinygrad.renderer.cstyle import CUDARenderer -from tinygrad.ops import UOp, Ops -from tinygrad import dtypes +from tinygrad.uop import Ops +from tinygrad import UOP, dtypes const = UOp(Ops.CONST, dtypes.float, arg=1.0) add = UOp(Ops.ADD, dtypes.float, src=(const, const), arg=None) print(add) -print(CUDARenderer("sm_50").render("example", [ - const, - add -])) +print(CUDARenderer("sm_50").render([const, add])) ``` Note that you have to pass in the "architecture" as argument, it affects the compiler, this value is set automatically @@ -232,7 +226,7 @@ by querying `cuDeviceComputeCapability`, for our render purpose, pass in just tw ```c++ #define INFINITY (__int_as_float(0x7f800000)) #define NAN (__int_as_float(0x7fffffff)) -extern "C" __global__ void __launch_bounds__(1) example() { +extern "C" __global__ void __launch_bounds__(1) test() { float alu0 = (1.0f+1.0f); } ``` @@ -242,15 +236,13 @@ two constants is "folded" before the render stage, so you get the value 2, inste optimization techniques. Let's see another example that renders the thread position: ```python -MetalRenderer().render("example", [ - UOp(Ops.SPECIAL, dtypes.int, arg=("gidx0", 16)) -]) +print(MetalRenderer().render([UOp(Ops.SPECIAL, dtypes.int, arg=("gidx0", 16))])) ``` ```c++ #include using namespace metal; -kernel void example(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) { +kernel void test(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) { int gidx0 = gid.x; /* 16 */ } ``` @@ -260,7 +252,7 @@ On CUDA: ```c++ #define INFINITY (__int_as_float(0x7f800000)) #define NAN (__int_as_float(0x7fffffff)) -extern "C" __global__ void __launch_bounds__(1) example() { +extern "C" __global__ void __launch_bounds__(1) test() { int gidx0 = blockIdx.x; /* 16 */ } ``` @@ -270,14 +262,16 @@ also handle the count, so it renders `.x` `.y` automtically if you pass more tha ```python -print(CUDARenderer("sm_50").render("example", [ +print(CUDARenderer("sm_50").render([ UOp(Ops.SPECIAL, dtypes.int, arg=("gidx0", 16)), UOp(Ops.SPECIAL, dtypes.int, arg=("gidx1", 16)) ])) ``` ```c++ -extern "C" __global__ void __launch_bounds__(1) example() { +#define INFINITY (__int_as_float(0x7f800000)) +#define NAN (__int_as_float(0x7fffffff)) +extern "C" __global__ void __launch_bounds__(1) test() { int gidx0 = blockIdx.x; /* 16 */ int gidx1 = blockIdx.y; /* 16 */ }