Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions agentlightning/verl/async_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from verl.workers.rollout.vllm_rollout.vllm_async_server import AsyncvLLMServer
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, ErrorResponse

from agentlightning.instrumentation.vllm import ChatCompletionResponsePatched, instrument_vllm
from agentlightning.instrumentation.vllm import instrument_vllm
from agentlightning.logging import configure_logger


def _unwrap_ray_remote(cls):
Expand All @@ -19,9 +20,11 @@ def _unwrap_ray_remote(cls):
return cls


logger = configure_logger()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should be logger = logging.getLogger(__name__)



@ray.remote(num_cpus=1)
class PatchedvLLMServer(_unwrap_ray_remote(AsyncvLLMServer)):

def __init__(self, *args, **kwargs):
instrument_vllm()
super().__init__(*args, **kwargs)
Expand All @@ -36,10 +39,14 @@ async def chat_completion(self, raw_request: Request):
"""
request_json = await raw_request.json()
request = ChatCompletionRequest(**request_json)
generator = await self.openai_serving_chat.create_chat_completion(request, raw_request)
generator = await self.openai_serving_chat.create_chat_completion(
request, raw_request
)

if isinstance(generator, ErrorResponse):
return JSONResponse(content=generator.model_dump(), status_code=generator.code)
status_code = getattr(generator, "code", None) or 500
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fallback to 500 may mask situations where generator.code is 0 or another falsy value. Consider explicitly checking for None: status_code = getattr(generator, 'code', None); status_code = status_code if status_code is not None else 500 or using status_code = getattr(generator, 'code', 500).

Suggested change
status_code = getattr(generator, "code", None) or 500
status_code = getattr(generator, "code", None)
status_code = status_code if status_code is not None else 500

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think this comment makes sense?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure. It may be necessary to check the status code to see if it is used in AgentLightning and which code may be returned by verl.

logger.error("vLLM chat completion error: %s", generator.model_dump())
return JSONResponse(content=generator.model_dump(), status_code=status_code)
if request.stream:
return StreamingResponse(content=generator, media_type="text/event-stream")
else:
Expand Down
Loading