From db089d7334d2859f8ea669cb1477e4b0976e359a Mon Sep 17 00:00:00 2001 From: Theodore Ehrenborg Date: Fri, 19 Dec 2025 12:12:05 +0000 Subject: [PATCH 1/4] Don't reuse sampling params --- delphi/clients/offline.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py index ecd07d37..371004a7 100644 --- a/delphi/clients/offline.py +++ b/delphi/clients/offline.py @@ -86,16 +86,17 @@ async def process_func( Process a single request. """ - # This is actually stupid + # Create fresh SamplingParams to avoid vLLM overflow bug when reusing + sampling_params = SamplingParams(max_tokens=self.sampling_params.max_tokens) for kwarg in kwargs: if "logprobs" in kwarg: - self.sampling_params.logprobs = kwarg["top_logprobs"] + sampling_params.logprobs = kwarg["top_logprobs"] if "prompt_logprobs" in kwarg: - self.sampling_params.prompt_logprobs = kwarg["prompt_logprobs"] + sampling_params.prompt_logprobs = kwarg["prompt_logprobs"] if "max_tokens" in kwarg: - self.sampling_params.max_tokens = kwarg["max_tokens"] + sampling_params.max_tokens = kwarg["max_tokens"] if "temperature" in kwarg: - self.sampling_params.temperature = kwarg["temperature"] + sampling_params.temperature = kwarg["temperature"] loop = asyncio.get_running_loop() prompts = [] statistics = [] @@ -124,7 +125,7 @@ async def process_func( partial( self.client.generate, # type: ignore prompts, - sampling_params=self.sampling_params, + sampling_params=sampling_params, # Use fresh params, not self.sampling_params use_tqdm=False, ), ) From 7c9bac8f71a256177eb123afd808df12a5c7f367 Mon Sep 17 00:00:00 2001 From: Theodore Ehrenborg Date: Fri, 19 Dec 2025 12:29:06 +0000 Subject: [PATCH 2/4] Claude: Use constructor --- delphi/clients/offline.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py index 371004a7..353d4470 100644 --- a/delphi/clients/offline.py +++ b/delphi/clients/offline.py @@ -86,17 +86,28 @@ async def process_func( Process a single request. """ - # Create fresh SamplingParams to avoid vLLM overflow bug when reusing - sampling_params = SamplingParams(max_tokens=self.sampling_params.max_tokens) + # Extract params from kwargs - must pass to constructor, not mutate after, + # because SamplingParams.__post_init__ sets skip_reading_prefix_cache based + # on prompt_logprobs, and mutation after construction skips this. + logprobs = None + prompt_logprobs = None + max_tokens = self.sampling_params.max_tokens + temperature = 1.0 for kwarg in kwargs: if "logprobs" in kwarg: - sampling_params.logprobs = kwarg["top_logprobs"] + logprobs = kwarg["top_logprobs"] if "prompt_logprobs" in kwarg: - sampling_params.prompt_logprobs = kwarg["prompt_logprobs"] + prompt_logprobs = kwarg["prompt_logprobs"] if "max_tokens" in kwarg: - sampling_params.max_tokens = kwarg["max_tokens"] + max_tokens = kwarg["max_tokens"] if "temperature" in kwarg: - sampling_params.temperature = kwarg["temperature"] + temperature = kwarg["temperature"] + sampling_params = SamplingParams( + max_tokens=max_tokens, + logprobs=logprobs, + prompt_logprobs=prompt_logprobs, + temperature=temperature, + ) loop = asyncio.get_running_loop() prompts = [] statistics = [] From e0e46467dd9b098555306468cae46cc5be6f5814 Mon Sep 17 00:00:00 2001 From: Theodore Ehrenborg Date: Sat, 20 Dec 2025 21:38:39 +0000 Subject: [PATCH 3/4] Shorten line --- delphi/clients/offline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py index 353d4470..53e6e8f4 100644 --- a/delphi/clients/offline.py +++ b/delphi/clients/offline.py @@ -136,7 +136,7 @@ async def process_func( partial( self.client.generate, # type: ignore prompts, - sampling_params=sampling_params, # Use fresh params, not self.sampling_params + sampling_params=sampling_params, # Use fresh sampling_params use_tqdm=False, ), ) From 5459a22a616445f5fb8aa05d81bad050b570eef4 Mon Sep 17 00:00:00 2001 From: Theodore Ehrenborg Date: Sat, 20 Dec 2025 21:40:02 +0000 Subject: [PATCH 4/4] Be less confident in comment --- delphi/clients/offline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/delphi/clients/offline.py b/delphi/clients/offline.py index 53e6e8f4..7ad8adbf 100644 --- a/delphi/clients/offline.py +++ b/delphi/clients/offline.py @@ -87,8 +87,8 @@ async def process_func( """ # Extract params from kwargs - must pass to constructor, not mutate after, - # because SamplingParams.__post_init__ sets skip_reading_prefix_cache based - # on prompt_logprobs, and mutation after construction skips this. + # because SamplingParams.__post_init__ likely does some extra setup, + # and mutation after construction skips this. logprobs = None prompt_logprobs = None max_tokens = self.sampling_params.max_tokens