-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathquant_queue_exl3.sh
More file actions
231 lines (194 loc) · 7.29 KB
/
quant_queue_exl3.sh
File metadata and controls
231 lines (194 loc) · 7.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/bin/bash
USERNAME=""
# Number of GPUs per quant job
GPUS_PER_JOB=2
# List of all available GPUs
DEVICES=(0 1 2 3 4 5 6 7)
# TMUX session name
SESSION="quant_session"
# Default tmux dimensions to ensure pane splits succeed when detached
DEFAULT_TMUX_COLS=200
DEFAULT_TMUX_LINES=60
# Path to job queue file
QUEUE_FILE="queue.txt"
# Lock folder for serializing downloads
LOCKDIR="./locks"
mkdir -p "$LOCKDIR"
# Lock file naming
PANE_READY_PREFIX="pane_"
PANE_READY_SUFFIX=".ready"
DOWNLOAD_LOCK_SUFFIX=".lock"
# Clean stale locks from previous runs
find "$LOCKDIR" -maxdepth 1 -type f -name "${PANE_READY_PREFIX}*${PANE_READY_SUFFIX}" -delete
find "$LOCKDIR" -maxdepth 1 -type f -name "*${DOWNLOAD_LOCK_SUFFIX}" -delete
NUM_DEVICES=${#DEVICES[@]}
PARALLEL=$(( NUM_DEVICES / GPUS_PER_JOB ))
if (( PARALLEL < 1 )); then
echo "Error: Not enough GPUs"
exit 1
fi
quant() {
MODEL="$1"; BPW="$2"; DEVSTR="$3"
[ -z "$DEVSTR" ] && { echo "ERR: empty devices"; return 1; }
WORK_DIRECTORY="./output/$MODEL/temp-${BPW}"
OUTPUT_DIRECTORY="./output/$MODEL/output-${BPW}"
mkdir -p "$WORK_DIRECTORY" "$OUTPUT_DIRECTORY" || return 1
if [ ! -f "${OUTPUT_DIRECTORY}/config.json" ]; then
echo "Running quant on $MODEL at ${BPW}bpw using GPUs $DEVSTR"
python convert.py -i "./models/$MODEL/" -w "$WORK_DIRECTORY" -d "$DEVSTR" -o "$OUTPUT_DIRECTORY" -b "$BPW" || return 1
fi
sed -z "s/---/---\\n### exl3 quant\\n---\\n### check revisions for quants\\n---\\n/2" \
"./models/${MODEL}/README.md" > "${OUTPUT_DIRECTORY}/README.md"
HF_HUB_ENABLE_HF_TRANSFER=1 hf upload --private --revision "${BPW}bpw" "${USERNAME}/${MODEL}-exl3" "${OUTPUT_DIRECTORY}" || true
HF_HUB_ENABLE_HF_TRANSFER=1 hf upload --private "${USERNAME}/${MODEL}-exl3" "${OUTPUT_DIRECTORY}/README.md" ./README.md || true
}
download_model() {
MODEL="$1"; X="$2"; Y="$3"
LOCKFILE="$LOCKDIR/${MODEL}.lock"
mkdir -p "$LOCKDIR"
exec 9>"$LOCKFILE" || return 1
flock -x 9
if [ ! -f "./models/${MODEL}/config.json" ]; then
echo "Downloading $X/$Y ..."
HF_HUB_ENABLE_HF_TRANSFER=1 hf download "${X}/${Y}" \
--exclude "*.arrow" \
--exclude "*checkpoint*" \
--exclude "*global_state*" \
--exclude "*.pth" \
--exclude "*.pt" \
--exclude "*.nemo" \
--local-dir="./models/${MODEL}"
if ls ./models/${MODEL}/*.bin 1>/dev/null 2>&1; then
python ./util/convert_safetensors.py ./models/${MODEL}/*.bin && rm ./models/${MODEL}/*.bin
fi
fi
# release the download lock immediately after ensuring presence
flock -u 9; exec 9>&-
}
process_line() {
src="$1"; devstr="$2"; local_devs="$3"; idx="$4"
# Read URL line from file if a file path is provided
if [ -f "$src" ]; then
line="$(cat "$src")"
else
line="$src"
fi
# Robustly parse https://<host>/<org>/<repo>/<bpw>
modified="$line"
modified="${modified#http://}"
modified="${modified#https://}"
IFS='/' read -r host x y BPW <<< "$modified"
MODEL="${x}_${y}"
echo "[job $idx] -> GPUs $devstr (local: $local_devs)"
echo "[job $idx] url='$line' x='$x' y='$y' bpw='$BPW'"
download_model "$MODEL" "$x" "$y"
quant "$MODEL" "$BPW" "$local_devs"
}
export -f process_line quant download_model
export USERNAME GPUS_PER_JOB LOCKDIR
# Materialize function definitions for tmux panes to source (export -f is not reliable across tmux)
LIB_PATH="$LOCKDIR/lib.sh"
{
declare -f process_line
declare -f quant
declare -f download_model
} > "$LIB_PATH"
# detect venv path
if [ -d "./venv" ]; then
VENV_PATH="./venv/bin/activate"
elif [ -d "./.venv" ]; then
VENV_PATH="./.venv/bin/activate"
else
VENV_PATH=""
fi
# ensure tmux exists
if ! command -v tmux >/dev/null 2>&1; then
echo "Error: tmux is not installed or not on PATH"; exit 1
fi
# Determine number of jobs (ignore blank lines and lines starting with #)
JOB_COUNT=$(awk 'BEGIN{c=0} /^[[:space:]]*#/ {next} NF {c++} END{print c+0}' "$QUEUE_FILE" 2>/dev/null || echo 0)
if (( JOB_COUNT == 0 )); then
echo "No jobs found in $QUEUE_FILE"
exit 0
fi
# Do not create more panes than jobs
if (( PARALLEL > JOB_COUNT )); then
PARALLEL=$JOB_COUNT
fi
# Kill old tmux session if exists and start one fresh
tmux kill-session -t "$SESSION" 2>/dev/null
tmux new-session -d -s "$SESSION" -x "$(tput cols 2>/dev/null || echo "$DEFAULT_TMUX_COLS")" -y "$(tput lines 2>/dev/null || echo "$DEFAULT_TMUX_LINES")" -c "$PWD" bash # start detached with one shell sized to terminal
# Build fixed GPU groups of size GPUS_PER_JOB so groups are never re-used concurrently
# Helper to compute device string for a given pane index
compute_devstr_for_pane() {
local pane_idx=$1
local start=$(( pane_idx * GPUS_PER_JOB ))
local group=( "${DEVICES[@]:$start:$GPUS_PER_JOB}" )
if [ ${#group[@]} -lt $GPUS_PER_JOB ]; then
local needed=$(( GPUS_PER_JOB - ${#group[@]} ))
group+=( "${DEVICES[@]:0:$needed}" )
fi
( IFS=,; echo "${group[*]}" )
}
# Ensure we have exactly PARALLEL panes ready and mark them initially idle
for (( i=1; i<PARALLEL; i++ )); do
if tmux split-window -t "$SESSION:0" -h -d -c "$PWD" bash 2>/dev/null; then
:
elif tmux split-window -t "$SESSION:0" -v -d -c "$PWD" bash 2>/dev/null; then
:
else
echo "Warning: Unable to create additional tmux pane; limiting concurrency."
break
fi
tmux select-layout -t "$SESSION:0" tiled
done
# Adapt PARALLEL to the actual number of panes that were created
actual_panes="$(tmux list-panes -t "$SESSION:0" 2>/dev/null | wc -l | tr -d ' ')"
if [ -n "$actual_panes" ] && [ "$actual_panes" -gt 0 ]; then
PARALLEL="$actual_panes"
fi
next_pane=0
for (( i=0; i<PARALLEL; i++ )); do
touch "$LOCKDIR/${PANE_READY_PREFIX}${i}${PANE_READY_SUFFIX}"
done
find_ready_pane() {
while true; do
for (( k=0; k<PARALLEL; k++ )); do
idx=$(( (k + next_pane) % PARALLEL ))
file="$LOCKDIR/${PANE_READY_PREFIX}${idx}${PANE_READY_SUFFIX}"
if [ -f "$file" ]; then
next_pane=$(( (idx + 1) % PARALLEL ))
echo "$idx"
return 0
fi
done
sleep 1
done
}
job_index=0
pane_index=0
while IFS= read -r line || [[ -n "$line" ]]; do
pane=$(find_ready_pane)
devstr="$(compute_devstr_for_pane "$pane")"
# build local device indices for this process and remap via CUDA_VISIBLE_DEVICES
local_devs=""
for (( j=0; j<GPUS_PER_JOB; j++ )); do
if [ $j -gt 0 ]; then local_devs+=","; fi
local_devs+="$j"
done
# mark pane busy immediately to avoid double-dispatch races
ready_file="$LOCKDIR/${PANE_READY_PREFIX}${pane}${PANE_READY_SUFFIX}"
rm -f "$ready_file"
# write the job line to a file to avoid complex quoting/arg passing issues
job_file="$LOCKDIR/pane_${pane}.job"
printf "%s" "$line" > "$job_file"
cmd="bash -c 'export CUDA_VISIBLE_DEVICES=\"\$1\"; export LOCKDIR=\"\$8\"; if [ -n \"\$2\" ] && [ -f \"\$2\" ]; then . \"\$2\"; fi; . \"\$7\"; echo \"[dispatch] pane ${pane} -> GPUs \$1 (CVD=\$CUDA_VISIBLE_DEVICES)\"; process_line \"\$3\" \"\$1\" \"\$4\" \"\$5\"; status=\$?; touch \"\$6\"; echo \"--- DONE (status=\$status) ---\"; exec bash' -- \"$devstr\" \"$VENV_PATH\" \"$job_file\" \"$local_devs\" \"$job_index\" \"$ready_file\" \"$LIB_PATH\" \"$LOCKDIR\""
tmux respawn-pane -t "$SESSION:0.$pane" -k "$cmd"
((job_index++))
done < "$QUEUE_FILE"
# Attach or switch to the session depending on whether we're already in tmux
if [ -n "$TMUX" ]; then
tmux switch-client -t "$SESSION"
else
exec tmux attach -t "$SESSION"
fi