Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
FROM nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04

#COPY sources.list /etc/apt/sources.list
ARG DEBIAN_FRONTEND=noninteractive

# Base tools and repo setup
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
gnupg \
build-essential \
curl \
ca-certificates \
cmake \
vim \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y --no-install-recommends \
python3.10 \
python3.10-dev \
python3.10-venv \
python3.10-distutils \
&& rm -rf /var/lib/apt/lists/*

# Create a dedicated Python 3.10 venv and make it default on PATH
RUN python3.10 -m venv /opt/py310 \
&& /opt/py310/bin/python -m pip install --upgrade pip setuptools wheel

ENV VIRTUAL_ENV=/opt/py310
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV PIP_NO_CACHE_DIR=1

# Sanity check
RUN python -V && pip -V

RUN pip config set global.extra-index-url "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
RUN pip install torch==2.0.0 --index-url https://download.pytorch.org/whl/cu118
RUN pip install onnxruntime-gpu==1.16.0 onnx==1.14.1

# install app
WORKDIR /workspace
ADD . Dipoorlet
RUN cd Dipoorlet \
&& pip install -r requirements.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
&& python3 setup.py install
46 changes: 35 additions & 11 deletions dipoorlet/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
parser.add_argument("--optim_transformer", help="Transformer model optimization", default=False, action='store_true')
parser.add_argument("--model_type", help="Transformer model type", choices=["unet"], default=None)
parser.add_argument("--quant_format", default="QDQ", type=str, choices=["QOP", "QDQ"])
parser.add_argument("--load_clip", help="Load clip values from a directory or act_clip_val.json path and skip calibration",
default=None)
args = parser.parse_args()

if args.slurm:
Expand Down Expand Up @@ -91,6 +93,18 @@
args.optimzed_model_dir = os.path.join(args.output_dir, 'optim_model.onnx')
logger.parent = None

def _resolve_clip_dir(path):
if os.path.isdir(path):
return path
if os.path.isfile(path):
base = os.path.basename(path)
if base == 'act_clip_val.json' or base == 'weight_clip_val.json':
return os.path.dirname(path)
raise FileNotFoundError(
"load_clip expects a directory or a path to act_clip_val.json/weight_clip_val.json: {}".format(path)
)


start = time.time()
if args.optim_transformer:
model = onnx.load(args.optimzed_model_dir)
Expand All @@ -116,23 +130,33 @@
setattr(args, 'world_size', dist.get_world_size())
if dist.get_rank() == 0:
logger.info("Do tensor calibration...")
act_clip_val, weight_clip_val = tensor_calibration(onnx_graph, args)
tensor_range = copy.deepcopy(act_clip_val)
save_clip_val(act_clip_val, weight_clip_val, args,
act_fname='act_clip_val.json.rank{}'.format(args.rank),
weight_fname='weight_clip_val.json.rank{}'.format(args.rank))
dist.barrier()
if dist.get_rank() == 0:
reduce_clip_val(dist.get_world_size(), args)
dist.barrier()
act_clip_val, weight_clip_val = load_clip_val(args)
if args.load_clip:
clip_dir = _resolve_clip_dir(args.load_clip)
if dist.get_rank() == 0:
logger.info("Load clip values from: {}".format(clip_dir))
act_clip_val, weight_clip_val = load_clip_val(args, base_dir=clip_dir)
tensor_range = copy.deepcopy(act_clip_val)
dist.barrier()
else:
act_clip_val, weight_clip_val = tensor_calibration(onnx_graph, args)
tensor_range = copy.deepcopy(act_clip_val)
save_clip_val(act_clip_val, weight_clip_val, args,
act_fname='act_clip_val.json.rank{}'.format(args.rank),
weight_fname='weight_clip_val.json.rank{}'.format(args.rank))
dist.barrier()
if dist.get_rank() == 0:
reduce_clip_val(dist.get_world_size(), args)
dist.barrier()
act_clip_val, weight_clip_val = load_clip_val(args)

# Weight Transform.
if dist.get_rank() == 0:
logger.info("Weight transform...")
graph, graph_ori, act_clip_val, weight_clip_val = \
weight_calibration(onnx_graph, act_clip_val, weight_clip_val, args)
dist.barrier()
if dist.get_rank() == 0:
save_clip_val(act_clip_val, weight_clip_val, args)

# Profiling Distributed.
if dist.get_rank() == 0:
Expand All @@ -158,4 +182,4 @@
if args.quant_format == 'QOP' and args.model_type is None:
deploy_QOperator(graph.model, tensor_range, args)
end = time.time()
logger.info("Total time cost: {} seconds.".format(int(end - start)))
logger.info("Total time cost: {} seconds.".format(int(end - start)))
5 changes: 4 additions & 1 deletion dipoorlet/deploy/deploy_trt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
import numpy as np

from .deploy_default import deploy_dispatcher

Expand All @@ -8,7 +9,9 @@
def gen_trt_range(graph, clip_val, args, **kwargs):
for k, v in clip_val.items():
# max(-clip_min, clip_max)
clip_val[k] = max(-clip_val[k][0].astype(float), clip_val[k][1].astype(float))
v0 = np.min(clip_val[k][0])
v1 = np.max(clip_val[k][1])
clip_val[k] = max(-float(v0), float(v1))

tensorrt_blob_json = dict()
tensorrt_blob_json['blob_range'] = clip_val
Expand Down
Loading