这是一个从部署开始的大模型应用学习记录存储库。
conda activate py311
vllm serve /root/share/new_models/qwen3/Qwen3-0.6B\
--trust-remote-code \
--port 54257 \
--max-model-len 8000 \
--gpu-memory-utilization 0.90 \
--max-num-seqs 8 \
--swap-space 8 \
--enforce-eager
curl http://localhost:54257/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "/root/share/new_models/qwen3/Qwen3-0.6B",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "你好,能帮我写一个关于春天的五言绝句吗?"}
]
}'
新开terminal,激活py310环境
conda activate py311
运行app.py
streamlit run ./app.py