Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 5 additions & 17 deletions lightllm/server/api_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .embed_cache.manager import start_cache_manager
from lightllm.utils.log_utils import init_logger
from lightllm.utils.envs_utils import set_env_start_args, set_unique_server_name, get_unique_server_name
from lightllm.utils.envs_utils import get_lightllm_gunicorn_time_out_seconds, get_lightllm_gunicorn_keep_alive
from lightllm.utils.envs_utils import get_lightllm_gunicorn_keep_alive
from .detokenization.manager import start_detokenization_process
from .router.manager import start_router_process
from lightllm.utils.process_check import is_process_active
Expand Down Expand Up @@ -337,13 +337,11 @@ def normal_or_p_d_start(args):
],
)

# 启动 gunicorn
# 启动 Hypercorn
command = [
"gunicorn",
"hypercorn",
"--workers",
f"{args.httpserver_workers}",
"--worker-class",
"uvicorn.workers.UvicornWorker",
"--bind",
f"{args.host}:{args.port}",
"--log-level",
Expand All @@ -353,8 +351,6 @@ def normal_or_p_d_start(args):
"--error-logfile",
"-",
"lightllm.server.api_http:app",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Hypercorn's default worker timeout is 30 seconds, which is significantly lower than the previous Gunicorn timeout of 180 seconds. This could cause issues with long-running requests. It's advisable to explicitly set the worker timeout to maintain similar behavior. I'd recommend making this value configurable, for example by re-introducing a function like the removed get_lightllm_gunicorn_time_out_seconds but for hypercorn.

Suggested change
"lightllm.server.api_http:app",
"lightllm.server.api_http:app",
"--worker-timeout",
"180",

"--timeout",
f"{get_lightllm_gunicorn_time_out_seconds()}",
"--keep-alive",
f"{get_lightllm_gunicorn_keep_alive()}",
]
Expand Down Expand Up @@ -407,11 +403,9 @@ def pd_master_start(args):
)

command = [
"gunicorn",
"hypercorn",
"--workers",
"1",
"--worker-class",
"uvicorn.workers.UvicornWorker",
"--bind",
f"{args.host}:{args.port}",
"--log-level",
Expand All @@ -422,8 +416,6 @@ def pd_master_start(args):
"-",
"--preload",
"lightllm.server.api_http:app",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Hypercorn's default worker timeout is 30 seconds, which is significantly lower than the previous Gunicorn timeout of 180 seconds. This could cause issues with long-running requests. It's advisable to explicitly set the worker timeout to maintain similar behavior. I'd recommend making this value configurable, for example by re-introducing a function like the removed get_lightllm_gunicorn_time_out_seconds but for hypercorn.

Suggested change
"lightllm.server.api_http:app",
"lightllm.server.api_http:app",
"--worker-timeout",
"180",

"--timeout",
f"{get_lightllm_gunicorn_time_out_seconds()}",
"--keep-alive",
f"{get_lightllm_gunicorn_keep_alive()}",
]
Expand All @@ -449,11 +441,9 @@ def config_server_start(args):
set_env_start_args(args)

command = [
"gunicorn",
"hypercorn",
"--workers",
"1",
"--worker-class",
"uvicorn.workers.UvicornWorker",
"--bind",
f"{args.config_server_host}:{args.config_server_port}",
"--log-level",
Expand All @@ -464,8 +454,6 @@ def config_server_start(args):
"-",
"--preload",
"lightllm.server.config_server.api_http:app",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Hypercorn's default worker timeout is 30 seconds, which is significantly lower than the previous Gunicorn timeout of 180 seconds. This could cause issues with long-running requests. It's advisable to explicitly set the worker timeout to maintain similar behavior. I'd recommend making this value configurable, for example by re-introducing a function like the removed get_lightllm_gunicorn_time_out_seconds but for hypercorn.

Suggested change
"lightllm.server.config_server.api_http:app",
"lightllm.server.config_server.api_http:app",
"--worker-timeout",
"180",

"--timeout",
f"{get_lightllm_gunicorn_time_out_seconds()}",
"--keep-alive",
f"{get_lightllm_gunicorn_keep_alive()}",
]
Expand Down
6 changes: 5 additions & 1 deletion lightllm/server/req_id_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,14 @@ def _find_sibling_processes():

# 查找兄弟进程
sibling_processes = []
for proc in psutil.process_iter(["pid", "name"]):
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
try:
# 检查是否是兄弟进程(同一父进程且不是当前进程)
if proc.pid != current_pid and proc.ppid() == parent_process.pid:
# 过滤掉 multiprocessing.resource_tracker 进程
cmdline = proc.cmdline()
if cmdline and "multiprocessing.resource_tracker" in " ".join(cmdline):
continue
sibling_processes.append(proc)
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
Expand Down
4 changes: 0 additions & 4 deletions lightllm/utils/envs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,6 @@ def get_deepep_num_max_dispatch_tokens_per_rank():
return int(os.getenv("NUM_MAX_DISPATCH_TOKENS_PER_RANK", 256))


def get_lightllm_gunicorn_time_out_seconds():
return int(os.getenv("LIGHTLMM_GUNICORN_TIME_OUT", 180))


def get_lightllm_gunicorn_keep_alive():
return int(os.getenv("LIGHTLMM_GUNICORN_KEEP_ALIVE", 10))

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ ujson==5.10.0
frozendict==2.4.6
atomics==1.0.3
easydict==1.13
gunicorn==23.0.0
hypercorn==0.18.0
flashinfer-python==0.2.4
sgl-kernel==0.3.7.post1
httpx==0.28.1
Expand Down