From 07d9b844b0e896167dc4dcb2e86492e642742869 Mon Sep 17 00:00:00 2001
From: "promptless[bot]" <179508745+promptless[bot]@users.noreply.github.com>
Date: Mon, 23 Mar 2026 14:39:03 +0000
Subject: [PATCH 1/2] Update Flash local testing docs with request format and
 docstring feature

---
 flash/apps/local-testing.mdx | 20 ++++++++++++++++++--
 flash/cli/run.mdx            | 25 ++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 3 deletions(-)
diff --git a/flash/apps/local-testing.mdx b/flash/apps/local-testing.mdx
index a0f12cf3..8ce1a3b9 100644
--- a/flash/apps/local-testing.mdx
+++ b/flash/apps/local-testing.mdx
@@ -36,7 +36,7 @@ flash run --host 0.0.0.0
 # Call a queue-based endpoint (gpu_worker.py)
 curl -X POST http://localhost:8888/gpu_worker/runsync \
   -H "Content-Type: application/json" \
-  -d '{"message": "Hello from Flash"}'
+  -d '{"input": {"message": "Hello from Flash"}}'
 
 # Call a load-balanced endpoint (lb_worker.py)
 curl -X POST http://localhost:8888/lb_worker/process \
@@ -44,10 +44,26 @@ curl -X POST http://localhost:8888/lb_worker/process \
   -d '{"data": "test"}'
 ```
 
+<Note>
+Queue-based endpoints require the `{"input": {...}}` wrapper format to match the deployed endpoint behavior. Load-balanced endpoints accept direct JSON payloads.
+</Note>
+
 ### Using the API explorer
 
 Open [http://localhost:8888/docs](http://localhost:8888/docs) in your browser to access the interactive Swagger UI. You can test all endpoints directly from the browser.
 
+Flash extracts the first line of each function's docstring and displays it as the endpoint description in the API explorer. Add docstrings to your `@Endpoint` functions to make your API self-documenting:
+
+```python
+@Endpoint(name="gpu-worker", gpu=GpuGroup.ANY)
+def process_data(data: dict) -> dict:
+    """Process input data and return computed results."""
+    # Function implementation
+    return {"result": "processed"}
+```
+
+The docstring "Process input data and return computed results" appears in the Swagger UI, making it easier to understand what each endpoint does.
+
 ### Using Python
 
 ```python
@@ -56,7 +72,7 @@ import requests
 # Call queue-based endpoint
 response = requests.post(
     "http://localhost:8888/gpu_worker/runsync",
-    json={"message": "Hello from Flash"}
+    json={"input": {"message": "Hello from Flash"}}
 )
 print(response.json())
 
diff --git a/flash/cli/run.mdx b/flash/cli/run.mdx
index 45b23380..78d7fe6f 100644
--- a/flash/cli/run.mdx
+++ b/flash/cli/run.mdx
@@ -47,6 +47,25 @@ Enable or disable auto-reload on code changes. Enabled by default.
 Auto-provision all Serverless endpoints on startup instead of lazily on first call. Eliminates cold-start delays during development.
 </ResponseField>
 
+## Endpoint descriptions from docstrings
+
+Flash extracts the first line of each function's docstring and uses it in two places:
+
+- **Startup table**: The "Description" column shows the docstring when the server starts.
+- **Swagger UI**: The endpoint summary in the API explorer at `/docs`.
+
+Add docstrings to your `@Endpoint` functions to make your API self-documenting:
+
+```python
+@Endpoint(name="text-processor", gpu=GpuGroup.ANY)
+def analyze_text(text: str) -> dict:
+    """Analyze text and return sentiment scores."""
+    # Implementation here
+    return {"sentiment": "positive"}
+```
+
+When you run `flash run`, the startup table displays "Analyze text and return sentiment scores" as the description for this endpoint, and the same text appears in the Swagger UI summary.
+
 ## Architecture
 
 With `flash run`, Flash starts a local development server alongside remote Serverless endpoints:
@@ -136,7 +155,7 @@ curl http://localhost:8888/
 # Call a queue-based GPU endpoint (gpu_worker.py)
 curl -X POST http://localhost:8888/gpu_worker/runsync \
   -H "Content-Type: application/json" \
-  -d '{"message": "Hello from GPU!"}'
+  -d '{"input": {"message": "Hello from GPU!"}}'
 
 # Call a load-balanced endpoint (lb_worker.py)
 curl -X POST http://localhost:8888/lb_worker/process \
@@ -144,6 +163,10 @@ curl -X POST http://localhost:8888/lb_worker/process \
   -d '{"data": "test"}'
 ```
 
+<Note>
+Queue-based endpoints require the `{"input": {...}}` wrapper format to match deployed endpoint behavior. Load-balanced endpoints accept direct JSON payloads.
+</Note>
+
 Open http://localhost:8888/docs for the interactive API explorer.
 
 ## Requirements

From 66733843845054753734bb9f3c163fc5386a3be0 Mon Sep 17 00:00:00 2001
From: "promptless[bot]" <179508745+promptless[bot]@users.noreply.github.com>
Date: Mon, 23 Mar 2026 14:55:20 +0000
Subject: [PATCH 2/2] Sync documentation updates

---
 flash/apps/local-testing.mdx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/flash/apps/local-testing.mdx b/flash/apps/local-testing.mdx
index 8ce1a3b9..15463e7a 100644
--- a/flash/apps/local-testing.mdx
+++ b/flash/apps/local-testing.mdx
@@ -36,16 +36,16 @@ flash run --host 0.0.0.0
 # Call a queue-based endpoint (gpu_worker.py)
 curl -X POST http://localhost:8888/gpu_worker/runsync \
   -H "Content-Type: application/json" \
-  -d '{"input": {"message": "Hello from Flash"}}'
+  -d '{"input": {"input_data": {"message": "Hello from Flash"}}}'
 
 # Call a load-balanced endpoint (lb_worker.py)
 curl -X POST http://localhost:8888/lb_worker/process \
   -H "Content-Type: application/json" \
-  -d '{"data": "test"}'
+  -d '{"input_data": {"message": "Hello from Flash"}}'
 ```
 
 <Note>
-Queue-based endpoints require the `{"input": {...}}` wrapper format to match the deployed endpoint behavior. Load-balanced endpoints accept direct JSON payloads.
+Queue-based endpoints require the `{"input": {...}}` wrapper to match the deployed endpoint behavior. The inner payload structure maps to your function's parameter names—the skeleton template uses `input_data: dict`, so the payload is `{"input_data": {...}}`. Load-balanced endpoints accept the payload directly without the `input` wrapper.
 </Note>
 
 ### Using the API explorer
@@ -56,7 +56,7 @@ Flash extracts the first line of each function's docstring and displays it as th
 
 ```python
 @Endpoint(name="gpu-worker", gpu=GpuGroup.ANY)
-def process_data(data: dict) -> dict:
+def process_data(input_data: dict) -> dict:
     """Process input data and return computed results."""
     # Function implementation
     return {"result": "processed"}
@@ -72,14 +72,14 @@ import requests
 # Call queue-based endpoint
 response = requests.post(
     "http://localhost:8888/gpu_worker/runsync",
-    json={"input": {"message": "Hello from Flash"}}
+    json={"input": {"input_data": {"message": "Hello from Flash"}}}
 )
 print(response.json())
 
 # Call load-balanced endpoint
 response = requests.post(
     "http://localhost:8888/lb_worker/process",
-    json={"data": "test"}
+    json={"input_data": {"message": "Hello from Flash"}}
 )
 print(response.json())
 ```