From 4468c7c9b127a6be7e6db3f73f6097dd0f33473b Mon Sep 17 00:00:00 2001
From: Clement Verrier <17490658+cverrier@users.noreply.github.com>
Date: Wed, 9 Jul 2025 10:34:43 +0800
Subject: [PATCH] docs: update code generation

The documentation for code generation has gone out of date, and the
provided code was throwing errors.

Update the documentation to the most recent changes (from commit
tinygrad/tinygrad@2893feb9f6f3c7eed825494e51a9a9e84c6b8a2e).
---
 20241231_intro.md | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/20241231_intro.md b/20241231_intro.md
index 158d11a..488263c 100644
--- a/20241231_intro.md
+++ b/20241231_intro.md
@@ -170,23 +170,20 @@ progressively "lowered" into a form that can be used for actual code generation.
 ## Code generation
 
 The UOp used for code generation is contains much more details and is of lower level. Here I have built an example that you can use
-to play around. If things go out of date, commit id is: ae00fa3b2833dbe0595d54d5fb0b679e1731ae01
+to play around. If things go out of date, please refer to commit `tinygrad/tinygrad@2893feb9f6f3c7eed825494e51a9a9e84c6b8a2e`.
 
 Suppose we just want to add two numbers:
 
 ```python
 from tinygrad.renderer.cstyle import MetalRenderer
-from tinygrad.ops import UOp, Ops
-from tinygrad import dtypes
+from tinygrad.uop import Ops
+from tinygrad import UOP, dtypes
 
 const = UOp(Ops.CONST, dtypes.float, arg=1.0)
 add = UOp(Ops.ADD, dtypes.float, src=(const, const), arg=None)
 
 print(add)
-print(MetalRenderer().render("example", [
-  const,
-  add
-]))
+print(MetalRenderer().render([const, add]))
 ```
 
 The `add` variable shows something like:
@@ -203,7 +200,7 @@ let's see the rendered code:
 ```c++
 #include <metal_stdlib>
 using namespace metal;
-kernel void example(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
+kernel void test(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
   float alu0 = (1.0f+1.0f);
 }
 ```
@@ -212,18 +209,15 @@ Let me show you the CUDA version also, where you would replace the import:
 
 ```python
 from tinygrad.renderer.cstyle import CUDARenderer
-from tinygrad.ops import UOp, Ops
-from tinygrad import dtypes
+from tinygrad.uop import Ops
+from tinygrad import UOP, dtypes
 
 const = UOp(Ops.CONST, dtypes.float, arg=1.0)
 add = UOp(Ops.ADD, dtypes.float, src=(const, const), arg=None)
 
 print(add)
 
-print(CUDARenderer("sm_50").render("example", [
-  const,
-  add
-]))
+print(CUDARenderer("sm_50").render([const, add]))
 ``` 
 
 Note that you have to pass in the "architecture" as argument, it affects the compiler, this value is set automatically
@@ -232,7 +226,7 @@ by querying `cuDeviceComputeCapability`, for our render purpose, pass in just tw
 ```c++
 #define INFINITY (__int_as_float(0x7f800000))
 #define NAN (__int_as_float(0x7fffffff))
-extern "C" __global__ void __launch_bounds__(1) example() {
+extern "C" __global__ void __launch_bounds__(1) test() {
   float alu0 = (1.0f+1.0f);
 }
 ```
@@ -242,15 +236,13 @@ two constants is "folded" before the render stage, so you get the value 2, inste
 optimization techniques. Let's see another example that renders the thread position:
 
 ```python
-MetalRenderer().render("example", [
-  UOp(Ops.SPECIAL, dtypes.int, arg=("gidx0", 16))
-])
+print(MetalRenderer().render([UOp(Ops.SPECIAL, dtypes.int, arg=("gidx0", 16))]))
 ```
 
 ```c++
 #include <metal_stdlib>
 using namespace metal;
-kernel void example(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
+kernel void test(uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
   int gidx0 = gid.x; /* 16 */
 }
 ```
@@ -260,7 +252,7 @@ On CUDA:
 ```c++
 #define INFINITY (__int_as_float(0x7f800000))
 #define NAN (__int_as_float(0x7fffffff))
-extern "C" __global__ void __launch_bounds__(1) example() {
+extern "C" __global__ void __launch_bounds__(1) test() {
   int gidx0 = blockIdx.x; /* 16 */
 }
 ```
@@ -270,14 +262,16 @@ also handle the count, so it renders `.x` `.y` automtically if you pass more tha
 
 
 ```python
-print(CUDARenderer("sm_50").render("example", [
+print(CUDARenderer("sm_50").render([
   UOp(Ops.SPECIAL, dtypes.int, arg=("gidx0", 16)),
   UOp(Ops.SPECIAL, dtypes.int, arg=("gidx1", 16))
 ]))
 ```
 
 ```c++
-extern "C" __global__ void __launch_bounds__(1) example() {
+#define INFINITY (__int_as_float(0x7f800000))
+#define NAN (__int_as_float(0x7fffffff))
+extern "C" __global__ void __launch_bounds__(1) test() {
   int gidx0 = blockIdx.x; /* 16 */
   int gidx1 = blockIdx.y; /* 16 */
 }