Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
0d91921
Initial commit of ungeneral nemotron model
mattshax Dec 9, 2025
c44e03b
Initial commit of ungeneral nemotron model
mattshax Dec 9, 2025
aa2d95b
Updating the HSP yaml
mattshax Dec 9, 2025
aa3bfe7
Updating branch
mattshax Dec 9, 2025
1163cab
Removing minimum model params
mattshax Dec 9, 2025
a7f9e99
Generalizing the model selection
mattshax Dec 9, 2025
2106e65
Adding fixed localport
mattshax Dec 9, 2025
b286838
Updating model selection
mattshax Dec 9, 2025
ca49ffd
fixing model replacement
mattshax Dec 10, 2025
e93febd
Revising exit with break so workflow finishes on walltime exit
mattshax Dec 10, 2025
3898e47
Cleaning up model location parameters
mattshax Dec 10, 2025
76e584a
Cleaning up model location parameters
mattshax Dec 10, 2025
6d3c4fe
Changing to absolute path
mattshax Dec 10, 2025
ba5d3fa
Move gress and contraint directives to the scheduler_directives param…
avidalto Dec 12, 2025
b99999e
Generalizing the yaml for different useres
mattshax Dec 12, 2025
e5d12f3
Updating readme with manual data requirements for model and container
mattshax Dec 13, 2025
059cc7d
Updating yaml to provide a container pull option
mattshax Dec 13, 2025
a0aae85
Updating container conditional logic
mattshax Dec 13, 2025
81461ef
Updating tooltip
mattshax Dec 13, 2025
4327793
Updating tooltip
mattshax Dec 13, 2025
df8b1a6
Updating tooltip
mattshax Dec 13, 2025
7658ff4
Adding comment for constraints
mattshax Dec 13, 2025
7a30d72
skipping pull if containers already exist
mattshax Dec 13, 2025
3b74aca
skipping pull if containers already exist
mattshax Dec 13, 2025
7752919
Adding ticktoken encodings for gptoss
mattshax Dec 13, 2025
f6b32d6
Fixing singularity loading if already in path
mattshax Dec 13, 2025
eae083b
Merge branch 'main' into nemotron
mattshax Dec 13, 2025
686bfb9
Updating the ATTN settings for latest vllm update
mattshax Dec 14, 2025
24e750b
Adding sagemaker fix for updated vllm
mattshax Dec 14, 2025
20f99f4
Merge branch 'main' into nemotron
mattshax Dec 14, 2025
ac616f1
Updating singularity build file
mattshax Dec 14, 2025
ddfb3a2
More gracefully exit log on fail when container fails
mattshax Dec 14, 2025
c640078
Removing bad char
mattshax Dec 14, 2025
e5382b8
Fixing the shm dir issue
mattshax Dec 15, 2025
dfe3485
Use script submitter to launch jobs
avidalto Dec 18, 2025
a7a1531
Fix the version of the script submitter
avidalto Dec 18, 2025
fd5c93c
Set scheduler to true
avidalto Dec 18, 2025
bfd90ba
Add advanced settings back
avidalto Dec 18, 2025
10d7a4a
Fix typo
avidalto Dec 18, 2025
18211a6
Set default directives
avidalto Dec 18, 2025
a10c227
Touch job.ended file after runner exists
avidalto Dec 18, 2025
841d97f
Rename job from 'Create Environment File' to 'Notify job ended'
avidalto Dec 19, 2025
1587a03
Remove condition from 'Get Hostname' step
avidalto Dec 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 47 additions & 6 deletions singularity/Singularity.vllm
Original file line number Diff line number Diff line change
@@ -1,11 +1,52 @@
Bootstrap: docker
From: vllm/vllm-openai:latest

%post
set -eux

apt-get update
apt-get install -y --no-install-recommends \
clang lld llvm \
build-essential \
git ca-certificates curl \
pkg-config cmake ninja-build
rm -rf /var/lib/apt/lists/*

# Pick a Python interpreter that actually exists in the base image
if command -v python >/dev/null 2>&1; then
PY=python
elif command -v python3 >/dev/null 2>&1; then
PY=python3
elif [ -x /opt/conda/bin/python ]; then
PY=/opt/conda/bin/python
else
echo "No Python interpreter found (python/python3/conda)."; exit 1
fi

# Prefer clang for any builds happening inside the container
echo "export CC=clang" >> /etc/profile.d/clang.sh
echo "export CXX=clang++" >> /etc/profile.d/clang.sh

# Upgrade packaging tooling
$PY -m pip install -U pip setuptools wheel

# Ensure Transformers has the ministral3 config mapping
$PY -m pip uninstall -y transformers || true
$PY -m pip install -U git+https://github.com/huggingface/transformers

$PY - << 'PY'
import transformers
from transformers.models.auto import CONFIG_MAPPING
print("Transformers:", transformers.__version__)
print("ministral3 in CONFIG_MAPPING:", "ministral3" in CONFIG_MAPPING)
PY

%runscript
mkdir -p /app
cd /app
exec /bin/bash -lc "$@"
mkdir -p /app
cd /app
exec /bin/bash -lc "$@"

%startscript
mkdir -p /app
cd /app
exec /bin/bash -lc "$@"
mkdir -p /app
cd /app
exec /bin/bash -lc "$@"
4 changes: 2 additions & 2 deletions singularity/env.sh.example
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export HF_HOME="/root/.cache/huggingface"

# Recommended on T4/V100 and for mistral tokenizer
export DOCS_DIR=./docs
export VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1
export VLLM_ATTENTION_BACKEND=TRITON_ATTN
export VLLM_EXTRA_ARGS="__VLLM_EXTRA_ARGS__"
export TRITON_CC=gcc
export CC=/usr/bin/gcc
Expand All @@ -32,7 +32,7 @@ export CXX=/usr/bin/g++
export TMPDIR=${PWD}/tmp
export CUDA_CACHE_PATH=${TMPDIR}/cuda_cache
export TORCH_EXTENSIONS_DIR=${TMPDIR}/torch_extensions
export FLASHINFER_JIT_DIR=${TMPDIR}/flashinfer_jitß
export FLASHINFER_JIT_DIR=${TMPDIR}/flashinfer_jit

# Other VLLM tuning settings
export VLLM_LOGGING_LEVEL=INFO
Expand Down
1 change: 1 addition & 0 deletions singularity/singularity-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ instances:
volumes:
- ./logs:/logs
- ./cache:/root/.cache
- ./cache/sagemaker_sessions:/dev/shm/sagemaker_sessions
- ./env.sh:/.singularity.d/env/env.sh
- __MODEL_PATH__:/__MODEL_BASE__

Expand Down
40 changes: 37 additions & 3 deletions start_service.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ start_rootless_docker() {
echo '#!/bin/bash' > cancel.sh
chmod +x cancel.sh


# Indicates job started running
echo "touch job.started" >> run.sh
echo "hostname > HOSTNAME" >> run.sh


if [ "$RUNMODE" == "docker" ];then

# Ensure docker service is installed
Expand Down Expand Up @@ -185,6 +191,16 @@ if [ "$RUNMODE" == "docker" ];then

elif [ "$RUNMODE" == "singularity" ]; then

# load singularity via module if not already in path
# Ensure Singularity is available
if ! command -v singularity >/dev/null 2>&1; then
if command -v module >/dev/null 2>&1; then
module load singularity || module load apptainer
else
echo "ERROR: singularity/apptainer not found" >&2
fi
fi

# Check if singularity is installed
if ! command -v singularity >/dev/null 2>&1; then
echo "$(date) ERROR: singularity is not installed"
Expand All @@ -203,7 +219,7 @@ elif [ "$RUNMODE" == "singularity" ]; then

cp singularity/* ./ -Rf
cp env.sh.example env.sh

VLLM_SERVER_PORT=$(findAvailablePort)
RAG_PORT=$(findAvailablePort)
PROXY_PORT=$(findAvailablePort)
Expand Down Expand Up @@ -245,6 +261,13 @@ elif [ "$RUNMODE" == "singularity" ]; then

mkdir -p logs cache cache/chroma $DOCS_DIR

# fixing updated vllm sagemarker sessions issue
mkdir -p cache/sagemaker_sessions
chmod 700 cache/sagemaker_sessions

mkdir -p /dev/shm/sagemaker_sessions
chmod 700 /dev/shm/sagemaker_sessions

# singularity-compose does not support env variables in the yml config file
if [ "$DOCS_DIR" != "./docs" ];then
ln -s $DOCS_DIR ./docs
Expand All @@ -263,7 +286,18 @@ elif [ "$RUNMODE" == "singularity" ]; then
[ "$BUILD" = "true" ] && singularity-compose build "${RUNTYPE}1"
singularity-compose up "${RUNTYPE}1"
fi
# Follow the logs
tail -f logs/*

# Only follow logs if up succeeded
# Make tail die when this script dies (and don't explode if logs don't exist yet)
shopt -s nullglob
logs=(logs/*)
if ((${#logs[@]} > 0)); then
tail -F "${logs[@]}" &
tail_pid=$!
trap 'kill "$tail_pid" >/dev/null 2>&1 || true; cleanup' EXIT
wait "$tail_pid"
else
echo "No logs found under logs/. Skipping tail."
fi

fi
Loading