File tree Expand file tree Collapse file tree
zh/get_started/installation Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -480,7 +480,7 @@ python3 -m fastdeploy.entrypoints.openai.api_server \
480480 --cache-queue-port 55660 \
481481 --max-model-len 16384 \
482482 --max-num-batched-tokens 16384 \
483- --max-num-seqs 32 \
483+ --max-num-seqs 64 \
484484 --workers 2 \
485485 --block-size 16
486486```
Original file line number Diff line number Diff line change @@ -480,7 +480,7 @@ python3 -m fastdeploy.entrypoints.openai.api_server \
480480 --cache-queue-port 55660 \
481481 --max-model-len 16384 \
482482 --max-num-batched-tokens 16384 \
483- --max-num-seqs 32 \
483+ --max-num-seqs 64 \
484484 --workers 2 \
485485 --block-size 16
486486```
Original file line number Diff line number Diff line change @@ -577,6 +577,10 @@ def _start_worker_service(self):
577577 if self .cfg .structured_outputs_config .logits_processors is not None :
578578 arguments += f" --logits-processors { ' ' .join (self .cfg .structured_outputs_config .logits_processors )} "
579579
580+ # TODO (iluvatar): remove aftet paddle fix launch error
581+ if current_platform .is_iluvatar () and "CUDA_VISIBLE_DEVICES" in os .environ :
582+ arguments = arguments .replace (f"--devices { self .cfg .parallel_config .device_ids } " , "" )
583+
580584 worker_store_true_flag = {
581585 "enable_expert_parallel" : self .cfg .parallel_config .enable_expert_parallel ,
582586 "enable_chunked_moe" : self .cfg .parallel_config .enable_chunked_moe ,
You can’t perform that action at this time.
0 commit comments