Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ struct SDContextParams {
rng_type_t rng_type = CUDA_RNG;
rng_type_t sampler_rng_type = RNG_TYPE_COUNT;
bool offload_params_to_cpu = false;
bool use_mmap = false;
bool control_net_cpu = false;
bool clip_on_cpu = false;
bool vae_on_cpu = false;
Expand Down Expand Up @@ -639,6 +640,10 @@ struct SDContextParams {
"--offload-to-cpu",
"place the weights in RAM to save VRAM, and automatically load them into VRAM when needed",
true, &offload_params_to_cpu},
{"",
"--use-mmap",
"use mmap to load weights",
true, &use_mmap},
{"",
"--control-net-cpu",
"keep controlnet in cpu (for low vram)",
Expand Down Expand Up @@ -874,6 +879,7 @@ struct SDContextParams {
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
<< " flow_shift: " << (std::isinf(flow_shift) ? "INF" : std::to_string(flow_shift)) << "\n"
<< " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n"
<< " use_mmap: " << (use_mmap ? "true" : "false") << ",\n"
<< " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
<< " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n"
<< " vae_on_cpu: " << (vae_on_cpu ? "true" : "false") << ",\n"
Expand Down Expand Up @@ -924,6 +930,7 @@ struct SDContextParams {
prediction,
lora_apply_mode,
offload_params_to_cpu,
use_mmap,
clip_on_cpu,
control_net_cpu,
vae_on_cpu,
Expand Down
23 changes: 19 additions & 4 deletions model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,7 @@ std::string ModelLoader::load_umt5_tokenizer_json() {
return json_str;
}

bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p) {
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool use_mmap) {
int64_t process_time_ms = 0;
std::atomic<int64_t> read_time_ms(0);
std::atomic<int64_t> memcpy_time_ms(0);
Expand Down Expand Up @@ -1387,6 +1387,15 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
}
}

std::shared_ptr<MmapWrapper> mmapped;
if (use_mmap && !is_zip) {
LOG_DEBUG("using mmap for I/O");
mmapped = MmapWrapper::create(file_path);
if (!mmapped) {
LOG_WARN("failed to memory-map '%s'", file_path.c_str());
}
}

int n_threads = is_zip ? 1 : std::min(num_threads_to_use, (int)file_tensors.size());
if (n_threads < 1) {
n_threads = 1;
Expand All @@ -1408,7 +1417,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
failed = true;
return;
}
} else {
} else if (!mmapped) {
file.open(file_path, std::ios::binary);
if (!file.is_open()) {
LOG_ERROR("failed to open '%s'", file_path.c_str());
Expand Down Expand Up @@ -1461,6 +1470,11 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
zip_entry_noallocread(zip, (void*)buf, n);
}
zip_entry_close(zip);
} else if (mmapped) {
if (!mmapped->copy_data(buf, n, tensor_storage.offset)) {
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
failed = true;
}
} else {
file.seekg(tensor_storage.offset);
file.read(buf, n);
Expand Down Expand Up @@ -1580,7 +1594,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread

bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
std::set<std::string> ignore_tensors,
int n_threads) {
int n_threads,
bool use_mmap) {
std::set<std::string> tensor_names_in_file;
std::mutex tensor_names_mutex;
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
Expand Down Expand Up @@ -1623,7 +1638,7 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
return true;
};

bool success = load_tensors(on_new_tensor_cb, n_threads);
bool success = load_tensors(on_new_tensor_cb, n_threads, use_mmap);
if (!success) {
LOG_ERROR("load tensors from file failed");
return false;
Expand Down
5 changes: 3 additions & 2 deletions model.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,10 +308,11 @@ class ModelLoader {
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
void set_wtype_override(ggml_type wtype, std::string tensor_type_rules = "");
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0);
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0, bool use_mmap = false);
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
std::set<std::string> ignore_tensors = {},
int n_threads = 0);
int n_threads = 0,
bool use_mmap = false);

std::vector<std::string> get_tensor_names() const {
std::vector<std::string> names;
Expand Down
3 changes: 2 additions & 1 deletion stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,7 +693,7 @@ class StableDiffusionGGML {
if (version == VERSION_SVD) {
ignore_tensors.insert("conditioner.embedders.3");
}
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads);
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads, sd_ctx_params->use_mmap);
if (!success) {
LOG_ERROR("load tensors from model loader failed");
ggml_free(ctx);
Expand Down Expand Up @@ -2478,6 +2478,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
sd_ctx_params->prediction = PREDICTION_COUNT;
sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO;
sd_ctx_params->offload_params_to_cpu = false;
sd_ctx_params->use_mmap = false;
sd_ctx_params->keep_clip_on_cpu = false;
sd_ctx_params->keep_control_net_on_cpu = false;
sd_ctx_params->keep_vae_on_cpu = false;
Expand Down
1 change: 1 addition & 0 deletions stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ typedef struct {
enum prediction_t prediction;
enum lora_apply_mode_t lora_apply_mode;
bool offload_params_to_cpu;
bool use_mmap;
bool keep_clip_on_cpu;
bool keep_control_net_on_cpu;
bool keep_vae_on_cpu;
Expand Down
127 changes: 127 additions & 0 deletions util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,78 @@ std::string get_full_path(const std::string& dir, const std::string& filename) {
}
}

class MmapWrapperImpl : public MmapWrapper {
public:
MmapWrapperImpl(void* data, size_t size, HANDLE hfile, HANDLE hmapping)
: MmapWrapper(data, size), hfile_(hfile), hmapping_(hmapping) {}

~MmapWrapperImpl() override {
if (data_) {
UnmapViewOfFile(data_);
}
if (hmapping_ != NULL) {
CloseHandle(hmapping_);
}
if (hfile_ != INVALID_HANDLE_VALUE) {
CloseHandle(hfile_);
}
}

private:
HANDLE hfile_;
HANDLE hmapping_;
};

std::shared_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
void* mapped_data = nullptr;
size_t file_size = 0;

HANDLE file_handle = CreateFileA(
filename.c_str(),
GENERIC_READ,
FILE_SHARE_READ,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL
);

if (file_handle == INVALID_HANDLE_VALUE) {
return nullptr;
}

LARGE_INTEGER size;
if (!GetFileSizeEx(file_handle, &size)) {
CloseHandle(file_handle);
return nullptr;
}

file_size = static_cast<size_t>(size.QuadPart);

HANDLE mapping_handle = CreateFileMapping(file_handle, NULL, PAGE_READONLY, 0, 0, NULL);

if (mapping_handle == NULL) {
CloseHandle(file_handle);
return nullptr;
}

mapped_data = MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, file_size);

if (mapped_data == NULL) {
CloseHandle(mapping_handle);
CloseHandle(file_handle);
return nullptr;
}

return std::make_shared<MmapWrapperImpl>(mapped_data, file_size, file_handle, mapping_handle);
}

#else // Unix
#include <dirent.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>

bool file_exists(const std::string& filename) {
struct stat buffer;
Expand Down Expand Up @@ -143,8 +212,66 @@ std::string get_full_path(const std::string& dir, const std::string& filename) {
return "";
}

class MmapWrapperImpl : public MmapWrapper {
public:
MmapWrapperImpl(void* data, size_t size) : MmapWrapper(data, size) {}

~MmapWrapperImpl() override {
if (data_) {
munmap(data_, size_);
}
}
};

std::shared_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {

int file_descriptor = open(filename.c_str(), O_RDONLY);
if (file_descriptor == -1) {
return nullptr;
}

int mmap_flags = MAP_PRIVATE;

#ifdef __linux__
// performance flags used by llama.cpp
//posix_fadvise(file_descriptor, 0, 0, POSIX_FADV_SEQUENTIAL);
//mmap_flags |= MAP_POPULATE;
#endif

struct stat sb;
if (fstat(file_descriptor, &sb) == -1) {
close(file_descriptor);
return nullptr;
}

size_t file_size = sb.st_size;

void* mapped_data = mmap(NULL, file_size, PROT_READ, mmap_flags, file_descriptor, 0);

close(file_descriptor);

if (mapped_data == MAP_FAILED) {
return nullptr;
}

#ifdef __linux__
// performance flags used by llama.cpp
//posix_madvise(mapped_data, file_size, POSIX_MADV_WILLNEED);
#endif

return std::make_shared<MmapWrapperImpl>(mapped_data, file_size);
}

#endif

bool MmapWrapper::copy_data(void* buf, size_t n, size_t offset) const {
if (offset >= size_ || n > (size_ - offset)) {
return false;
}
std::memcpy(buf, data() + offset, n);
return true;
}

// get_num_physical_cores is copy from
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
Expand Down
23 changes: 23 additions & 0 deletions util.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define __UTIL_H__

#include <cstdint>
#include <memory>
#include <string>
#include <vector>

Expand Down Expand Up @@ -44,6 +45,28 @@ sd_image_f32_t resize_sd_image_f32_t(sd_image_f32_t image, int target_width, int

sd_image_f32_t clip_preprocess(sd_image_f32_t image, int target_width, int target_height);

class MmapWrapper {
public:
static std::shared_ptr<MmapWrapper> create(const std::string& filename);

virtual ~MmapWrapper() = default;

MmapWrapper(const MmapWrapper&) = delete;
MmapWrapper& operator=(const MmapWrapper&) = delete;
MmapWrapper(MmapWrapper&&) = delete;
MmapWrapper& operator=(MmapWrapper&&) = delete;

const uint8_t* data() const { return static_cast<uint8_t*>(data_); }
size_t size() const { return size_; }
bool copy_data(void* buf, size_t n, size_t offset) const;

protected:
MmapWrapper(void* data, size_t size)
: data_(data), size_(size) {}
void* data_ = nullptr;
size_t size_ = 0;
};

std::string path_join(const std::string& p1, const std::string& p2);
std::vector<std::string> split_string(const std::string& str, char delimiter);
void pretty_progress(int step, int steps, float time);
Expand Down
Loading