From cb17d7661c467d8b490b7b73e46c6f83824999fa Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Sat, 26 Oct 2024 11:07:38 +0200 Subject: [PATCH 1/3] intiial --- encodec.cpp | 4 +++- quantizer.h | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/encodec.cpp b/encodec.cpp index 2a1b779..5967cb2 100644 --- a/encodec.cpp +++ b/encodec.cpp @@ -705,7 +705,9 @@ void encodec_build_graph(struct encodec_context *ectx, const int32_t *codes, static void encodec_zero_tensor(struct ggml_cgraph *gf, const char *name) { struct ggml_tensor *tensor = ggml_graph_get_tensor(gf, name); - ggml_set_zero(tensor); + // create a zero array + // ggml_backend_tensor_set(tensor, zeros, 0, tensor->ne * ggml_element_size(tensor)); + // ggml_set_zero(tensor); } bool encodec_eval_internal(struct encodec_context *ectx, const float * raw_audio, diff --git a/quantizer.h b/quantizer.h index 9986561..f7eb02e 100644 --- a/quantizer.h +++ b/quantizer.h @@ -58,9 +58,8 @@ struct ggml_tensor *encodec_forward_quantizer_encode( // [seq_length, n_bins] struct ggml_tensor *dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp); dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist); - dist = ggml_neg(ctx0, dist); + dist = ggml_scale(ctx0, dist, -1.0f); - // take the argmax over the column dimension // [seq_length] indices = ggml_argmax(ctx0, dist); From f199201cb937d482447416ae17c0f68215ae656c Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 4 Dec 2024 20:57:49 +0100 Subject: [PATCH 2/3] compiling --- README.md | 2 ++ encodec.cpp | 1 + ggml | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d06657b..365702e 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,8 @@ cmake -DGGML_METAL=ON -DBUILD_SHARED_LIBS=Off .. cmake --build . --config Release ``` +Once built, pass the `--n-gpu-layers` flag with a value greather than 0 to the executable. + ### Using cuBLAS The inference can be offloaded on a CUDA backend with cuBLAS. diff --git a/encodec.cpp b/encodec.cpp index 5967cb2..1346b78 100644 --- a/encodec.cpp +++ b/encodec.cpp @@ -1,5 +1,6 @@ #include "ggml-alloc.h" #include "ggml-backend.h" +#include "ggml-cpu.h" #include "ggml.h" #include "ggml/src/ggml-impl.h" diff --git a/ggml b/ggml index c18f9ba..74d66b6 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit c18f9baeea2f3aea1ffc4afa4ad4496e51b7ff8a +Subproject commit 74d66b63eaf207a24f3e93bb922aba131cbf2906 From bd9abb4353191371768e96835baaf2ddddbac4d3 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Wed, 4 Dec 2024 22:30:00 +0100 Subject: [PATCH 3/3] cosmit --- decoder.h | 50 ++++++++------- encodec.cpp | 176 ++++++++++++++++++++++++++++------------------------ encodec.h | 65 ++++++++----------- encoder.h | 50 ++++++++------- lstm.h | 54 ++++++++-------- ops.cpp | 53 ++++++++++------ ops.h | 28 ++++++--- quantizer.h | 57 +++++++++-------- 8 files changed, 284 insertions(+), 249 deletions(-) diff --git a/decoder.h b/decoder.h index 7f37544..83a602e 100644 --- a/decoder.h +++ b/decoder.h @@ -12,61 +12,65 @@ struct encodec_decoder_block { // upsampling layers - struct ggml_tensor *us_conv_w; - struct ggml_tensor *us_conv_b; + struct ggml_tensor * us_conv_w; + struct ggml_tensor * us_conv_b; // conv1 - struct ggml_tensor *conv_1_w; - struct ggml_tensor *conv_1_b; + struct ggml_tensor * conv_1_w; + struct ggml_tensor * conv_1_b; // conv2 - struct ggml_tensor *conv_2_w; - struct ggml_tensor *conv_2_b; + struct ggml_tensor * conv_2_w; + struct ggml_tensor * conv_2_b; // shortcut - struct ggml_tensor *conv_sc_w; - struct ggml_tensor *conv_sc_b; + struct ggml_tensor * conv_sc_w; + struct ggml_tensor * conv_sc_b; }; struct encodec_decoder { - struct ggml_tensor *init_conv_w; - struct ggml_tensor *init_conv_b; + struct ggml_tensor * init_conv_w; + struct ggml_tensor * init_conv_b; encodec_lstm lstm; - struct ggml_tensor *final_conv_w; - struct ggml_tensor *final_conv_b; + struct ggml_tensor * final_conv_w; + struct ggml_tensor * final_conv_b; std::vector blocks; }; -struct ggml_tensor *encodec_forward_decoder( - const struct encodec_decoder *decoder, struct ggml_context *ctx0, - struct ggml_tensor *quantized_out, const int *ratios, const int kernel_size, const int res_kernel_size, - const int stride) { +struct ggml_tensor * encodec_forward_decoder( + const struct encodec_decoder * decoder, + struct ggml_context * ctx0, + struct ggml_tensor * quantized_out, + const int * ratios, + const int kernel_size, + const int res_kernel_size, + const int stride) { if (!quantized_out) { fprintf(stderr, "%s: null input tensor\n", __func__); return NULL; } - struct ggml_tensor *inpL = strided_conv_1d( + struct ggml_tensor * inpL = strided_conv_1d( ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride); // lstm { - struct ggml_tensor *cur = inpL; + struct ggml_tensor * cur = inpL; const encodec_lstm lstm = decoder->lstm; // first lstm layer char l0_prefix[7] = "dec_l0"; - struct ggml_tensor *hs1 = forward_pass_lstm_unilayer( + struct ggml_tensor * hs1 = forward_pass_lstm_unilayer( ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix); // second lstm layer char l1_prefix[7] = "dec_l1"; - struct ggml_tensor *out = forward_pass_lstm_unilayer( + struct ggml_tensor * out = forward_pass_lstm_unilayer( ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix); inpL = ggml_add(ctx0, inpL, out); @@ -81,10 +85,10 @@ struct ggml_tensor *encodec_forward_decoder( inpL = strided_conv_transpose_1d( ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]); - struct ggml_tensor *current = inpL; + struct ggml_tensor * current = inpL; // shortcut - struct ggml_tensor *shortcut = strided_conv_1d( + struct ggml_tensor * shortcut = strided_conv_1d( ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride); // conv1 @@ -106,7 +110,7 @@ struct ggml_tensor *encodec_forward_decoder( // final conv inpL = ggml_elu(ctx0, inpL); - struct ggml_tensor *decoded_inp = strided_conv_1d( + struct ggml_tensor * decoded_inp = strided_conv_1d( ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride); return decoded_inp; diff --git a/encodec.cpp b/encodec.cpp index 2a1b779..3d96b4b 100644 --- a/encodec.cpp +++ b/encodec.cpp @@ -2,6 +2,7 @@ #include "ggml-backend.h" #include "ggml.h" #include "ggml/src/ggml-impl.h" +#include "ggml-cpu.h" #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" @@ -83,12 +84,12 @@ struct encodec_hparams { struct encodec_model { encodec_hparams hparams; - encodec_encoder encoder; + encodec_encoder encoder; encodec_quantizer quantizer; - encodec_decoder decoder; + encodec_decoder decoder; // context - struct ggml_context *ctx; + struct ggml_context * ctx; int n_loaded; ggml_backend_t backend = NULL; @@ -127,18 +128,18 @@ struct encodec_context { ggml_gallocr_t allocr = NULL; // intermediate steps - struct ggml_tensor *encoded = NULL; // Encoded audio - struct ggml_tensor *codes = NULL; // Quantized representation of audio in codebook - struct ggml_tensor *decoded = NULL; // Reconstructed audio from codes + struct ggml_tensor * encoded = NULL; // Encoded audio + struct ggml_tensor * codes = NULL; // Quantized representation of audio in codebook + struct ggml_tensor * decoded = NULL; // Reconstructed audio from codes std::vector out_codes; - std::vector out_audio; + std::vector out_audio; // statistics encodec_statistics stats; }; -bool encodec_load_model_weights(std::ifstream &infile, encodec_model &model, int n_gpu_layers) { +bool encodec_load_model_weights(std::ifstream & infile, encodec_model & model, int n_gpu_layers) { // verify magic (i.e. ggml signature in hex format) { uint32_t magic; @@ -543,10 +544,10 @@ static struct ggml_cgraph * encodec_ggml_cgraph_create(size_t size) { return cgraph; } -void encodec_build_graph(struct encodec_context *ectx, - const float * inp_audio, - const int n_samples, - const encodec_run_mode_t mode) { +void encodec_build_graph(struct encodec_context * ectx, + const float * inp_audio, + const int n_samples, + const encodec_run_mode_t mode) { assert(mode == encodec_run_mode_t::FULL || mode == encodec_run_mode_t::ENCODE); const auto & model = ectx->model; @@ -555,16 +556,16 @@ void encodec_build_graph(struct encodec_context *ectx, auto & gf = ectx->gf; - const int *ratios = hparams.ratios; - const int kernel_size = hparams.kernel_size; - const int res_kernel_sz = hparams.residual_kernel_size; - const int stride = hparams.stride; - const int n_bins = hparams.n_bins; - const int n_q = hparams.n_q; - const int sr = hparams.sr; - const int bandwidth = hparams.bandwidth; - const int hop_length = hparams.hop_length; - const int hidden_dim = hparams.hidden_dim; + const int * ratios = hparams.ratios; + const int kernel_size = hparams.kernel_size; + const int res_kernel_sz = hparams.residual_kernel_size; + const int stride = hparams.stride; + const int n_bins = hparams.n_bins; + const int n_q = hparams.n_q; + const int sr = hparams.sr; + const int bandwidth = hparams.bandwidth; + const int hop_length = hparams.hop_length; + const int hidden_dim = hparams.hidden_dim; // since we are using ggml-alloc, this buffer only needs enough space to hold the // ggml_tensor and ggml_cgraph structs, but not the tensor data @@ -577,28 +578,28 @@ void encodec_build_graph(struct encodec_context *ectx, /*.no_alloc =*/ true, // the tensors will be allocated later by ggml_gallocr_alloc_graph() }; - struct ggml_context *ctx0 = ggml_init(ggml_params); + struct ggml_context * ctx0 = ggml_init(ggml_params); gf = std::unique_ptr(encodec_ggml_cgraph_create(ENCODEC_MAX_NODES)); - struct ggml_tensor *inp = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, n_samples); + struct ggml_tensor * inp = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, n_samples); ggml_set_name(inp, "inp"); ggml_set_input(inp); - const struct encodec_encoder * encoder = &model.encoder; - const struct encodec_quantizer * quantizer = &model.quantizer; - const struct encodec_decoder * decoder = &model.decoder; + const struct encodec_encoder * encoder = & model.encoder; + const struct encodec_quantizer * quantizer = & model.quantizer; + const struct encodec_decoder * decoder = & model.decoder; - struct ggml_tensor * encoded = encodec_forward_encoder( + struct ggml_tensor * encoded = encodec_forward_encoder( encoder, ctx0, inp, ratios, kernel_size, res_kernel_sz, stride); - struct ggml_tensor * codes = encodec_forward_quantizer_encode( + struct ggml_tensor * codes = encodec_forward_quantizer_encode( quantizer, ctx0, encoded, n_bins, sr, bandwidth, hop_length); struct ggml_tensor * quantized = encodec_forward_quantizer_decode( quantizer, ctx0, codes, hidden_dim, n_bins, sr, bandwidth, hop_length); - struct ggml_tensor * decoded = encodec_forward_decoder( + struct ggml_tensor * decoded = encodec_forward_decoder( decoder, ctx0, quantized, ratios, kernel_size, res_kernel_sz, stride); switch (mode) { @@ -627,8 +628,10 @@ void encodec_build_graph(struct encodec_context *ectx, ectx->decoded = decoded; } -void encodec_build_graph(struct encodec_context *ectx, const int32_t *codes, - const int n_codes, const encodec_run_mode_t mode) { +void encodec_build_graph(struct encodec_context * ectx, + const int32_t * codes, + const int n_codes, + const encodec_run_mode_t mode) { assert(mode == encodec_run_mode_t::DECODE); const auto & model = ectx->model; @@ -666,22 +669,22 @@ void encodec_build_graph(struct encodec_context *ectx, const int32_t *codes, /*.no_alloc =*/ true, }; - struct ggml_context *ctx0 = ggml_init(ggml_params); + struct ggml_context * ctx0 = ggml_init(ggml_params); gf = std::unique_ptr(encodec_ggml_cgraph_create(ENCODEC_MAX_NODES)); - struct ggml_tensor *inp_codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, N, n_q); + struct ggml_tensor * inp_codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, N, n_q); ggml_set_name(inp_codes, "inp_codes"); ggml_set_input(inp_codes); - const struct encodec_quantizer * quantizer = &model.quantizer; - const struct encodec_decoder * decoder = &model.decoder; + const struct encodec_quantizer * quantizer = & model.quantizer; + const struct encodec_decoder * decoder = & model.decoder; - struct ggml_tensor *quantized = encodec_forward_quantizer_decode( + struct ggml_tensor * quantized = encodec_forward_quantizer_decode( quantizer, ctx0, inp_codes, hidden_dim, n_bins, sr, bandwidth, hop_length ); - struct ggml_tensor *decoded = encodec_forward_decoder( + struct ggml_tensor * decoded = encodec_forward_decoder( decoder, ctx0, quantized, ratios, kernel_size, res_kernel_sz, stride ); @@ -703,14 +706,16 @@ void encodec_build_graph(struct encodec_context *ectx, const int32_t *codes, ectx->decoded = decoded; } -static void encodec_zero_tensor(struct ggml_cgraph *gf, const char *name) { - struct ggml_tensor *tensor = ggml_graph_get_tensor(gf, name); +static void encodec_zero_tensor(struct ggml_cgraph * gf, const char * name) { + struct ggml_tensor * tensor = ggml_graph_get_tensor(gf, name); ggml_set_zero(tensor); } -bool encodec_eval_internal(struct encodec_context *ectx, const float * raw_audio, - const int n_samples, const int n_threads, - const encodec_run_mode_t mode) { +bool encodec_eval_internal(struct encodec_context * ectx, + const float * raw_audio, + const int n_samples, + const int n_threads, + const encodec_run_mode_t mode) { auto & model = ectx->model; auto & allocr = ectx->allocr; auto & gf = ectx->gf; @@ -747,9 +752,11 @@ bool encodec_eval_internal(struct encodec_context *ectx, const float * raw_audio return true; } -bool encodec_eval_internal(struct encodec_context *ectx, const int32_t *codes, - const int n_codes, const int n_threads, - const encodec_run_mode_t mode) { +bool encodec_eval_internal(struct encodec_context * ectx, + const int32_t * codes, + const int n_codes, + const int n_threads, + const encodec_run_mode_t mode) { auto & model = ectx->model; auto & allocr = ectx->allocr; auto & gf = ectx->gf; @@ -786,9 +793,11 @@ bool encodec_eval_internal(struct encodec_context *ectx, const int32_t *codes, return true; } -bool encodec_eval(struct encodec_context *ectx, const float *raw_audio, - const int n_samples, const int n_threads, - const encodec_run_mode_t mode) { +bool encodec_eval(struct encodec_context * ectx, + const float * raw_audio, + const int n_samples, + const int n_threads, + const encodec_run_mode_t mode) { const int64_t t_start_us = ggml_time_us(); // allocate the compute buffer @@ -816,9 +825,11 @@ bool encodec_eval(struct encodec_context *ectx, const float *raw_audio, return true; } -bool encodec_eval(struct encodec_context *ectx, const int32_t *codes, - const int n_codes, const int n_threads, - const encodec_run_mode_t mode) { +bool encodec_eval(struct encodec_context * ectx, + const int32_t * codes, + const int n_codes, + const int n_threads, + const encodec_run_mode_t mode) { const int64_t t_start_ms = ggml_time_us(); // allocate the compute buffer @@ -847,8 +858,10 @@ bool encodec_eval(struct encodec_context *ectx, const int32_t *codes, return true; } -bool encodec_reconstruct_audio(struct encodec_context *ectx, const float *raw_audio, - const int n_samples, const int n_threads) { +bool encodec_reconstruct_audio(struct encodec_context * ectx, + const float * raw_audio, + const int n_samples, + const int n_threads) { if (raw_audio == nullptr) { fprintf(stderr, "%s: null input audio\n", __func__); return false; @@ -864,9 +877,9 @@ bool encodec_reconstruct_audio(struct encodec_context *ectx, const float *raw_au return false; } - struct ggml_tensor *decoded = ectx->decoded; + struct ggml_tensor * decoded = ectx->decoded; - auto &out_audio = ectx->out_audio; + auto & out_audio = ectx->out_audio; int out_length = decoded->ne[0]; out_audio.resize(out_length); @@ -876,8 +889,10 @@ bool encodec_reconstruct_audio(struct encodec_context *ectx, const float *raw_au return true; } -bool encodec_compress_audio(struct encodec_context *ectx, const float *raw_audio, - const int n_samples, const int n_threads) { +bool encodec_compress_audio(struct encodec_context * ectx, + const float * raw_audio, + const int n_samples, + const int n_threads) { if (!encodec_eval(ectx, raw_audio, n_samples, n_threads, encodec_run_mode_t::ENCODE)) { fprintf(stderr, "%s: failed to run encodec eval\n", __func__); return false; @@ -888,9 +903,9 @@ bool encodec_compress_audio(struct encodec_context *ectx, const float *raw_audio return false; } - struct ggml_tensor *codes = ectx->codes; + struct ggml_tensor * codes = ectx->codes; - auto &out_codes = ectx->out_codes; + auto & out_codes = ectx->out_codes; int out_length = codes->ne[0] * codes->ne[1]; out_codes.resize(out_length); @@ -900,8 +915,10 @@ bool encodec_compress_audio(struct encodec_context *ectx, const float *raw_audio return true; } -bool encodec_decompress_audio(struct encodec_context *ectx, const int32_t *codes, - const int n_codes, const int n_threads) { +bool encodec_decompress_audio(struct encodec_context * ectx, + const int32_t * codes, + const int n_codes, + const int n_threads) { if (!encodec_eval(ectx, codes, n_codes, n_threads, encodec_run_mode_t::DECODE)) { fprintf(stderr, "%s: failed to run encodec eval\n", __func__); return false; @@ -912,9 +929,9 @@ bool encodec_decompress_audio(struct encodec_context *ectx, const int32_t *codes return false; } - struct ggml_tensor *decoded = ectx->decoded; + struct ggml_tensor * decoded = ectx->decoded; - auto &out_audio = ectx->out_audio; + auto & out_audio = ectx->out_audio; int out_length = decoded->ne[0]; out_audio.resize(out_length); @@ -931,7 +948,7 @@ bool encodec_decompress_audio(struct encodec_context *ectx, const int32_t *codes // model, hence the model is loaded from the offset. This is the case for Bark. // Note that we used to have an encodec_load_model taking a reference to a file stream // but it was removed to comply the C-header requirements. -struct encodec_context *encodec_load_model(const char* model_path, const int offset, int n_gpu_layers) { +struct encodec_context * encodec_load_model(const char * model_path, const int offset, int n_gpu_layers) { int64_t t_start_load_us = ggml_time_us(); auto infile = std::ifstream(model_path, std::ios::binary); @@ -944,7 +961,7 @@ struct encodec_context *encodec_load_model(const char* model_path, const int off infile.seekg(offset); } - struct encodec_context *ectx = new encodec_context(); + struct encodec_context * ectx = new encodec_context(); ectx->model = encodec_model(); if (!encodec_load_model_weights(infile, ectx->model, n_gpu_layers)) { @@ -970,18 +987,15 @@ struct encodec_context *encodec_load_model(const char* model_path, const int off return ectx; } -void encodec_free(struct encodec_context *ectx) { - if (!ectx) { +void encodec_free(struct encodec_context * ectx) { + if (!ectx) return; - } - if (ectx->model.ctx) { + if (ectx->model.ctx) ggml_free(ectx->model.ctx); - } - if (ectx->buf_compute) { + if (ectx->buf_compute) ggml_backend_buffer_free(ectx->buf_compute); - } ggml_backend_buffer_free(ectx->model.buffer_w); ggml_backend_free(ectx->model.backend); @@ -989,15 +1003,15 @@ void encodec_free(struct encodec_context *ectx) { delete ectx; } -void encodec_set_target_bandwidth(struct encodec_context *ectx, int bandwidth) { +void encodec_set_target_bandwidth(struct encodec_context * ectx, int bandwidth) { ectx->model.hparams.bandwidth = bandwidth; } -void encodec_set_sample_rate(struct encodec_context *ectx, int sample_rate) { +void encodec_set_sample_rate(struct encodec_context * ectx, int sample_rate) { ectx->model.hparams.sr = sample_rate; } -const struct encodec_statistics* encodec_get_statistics(struct encodec_context *ectx) { +const struct encodec_statistics * encodec_get_statistics(struct encodec_context * ectx) { if (!ectx) { fprintf(stderr, "%s: null context\n", __func__); return nullptr; @@ -1005,7 +1019,7 @@ const struct encodec_statistics* encodec_get_statistics(struct encodec_context * return &ectx->stats; } -void encodec_reset_statistics(struct encodec_context *ectx) { +void encodec_reset_statistics(struct encodec_context * ectx) { if (!ectx) { fprintf(stderr, "%s: null context\n", __func__); return; @@ -1013,7 +1027,7 @@ void encodec_reset_statistics(struct encodec_context *ectx) { memset(&ectx->stats, 0, sizeof(ectx->stats)); } -float * encodec_get_audio(struct encodec_context *ectx) { +float * encodec_get_audio(struct encodec_context * ectx) { if (!ectx) { fprintf(stderr, "%s: null context\n", __func__); return nullptr; @@ -1021,7 +1035,7 @@ float * encodec_get_audio(struct encodec_context *ectx) { return ectx->out_audio.data(); } -int encodec_get_audio_size(struct encodec_context *ectx) { +int encodec_get_audio_size(struct encodec_context * ectx) { if (!ectx) { fprintf(stderr, "%s: null context\n", __func__); return 0; @@ -1029,7 +1043,7 @@ int encodec_get_audio_size(struct encodec_context *ectx) { return ectx->out_audio.size(); } -int32_t * encodec_get_codes(struct encodec_context *ectx) { +int32_t * encodec_get_codes(struct encodec_context * ectx) { if (!ectx) { fprintf(stderr, "%s: null context\n", __func__); return nullptr; @@ -1037,7 +1051,7 @@ int32_t * encodec_get_codes(struct encodec_context *ectx) { return ectx->out_codes.data(); } -int encodec_get_codes_size(struct encodec_context *ectx) { +int encodec_get_codes_size(struct encodec_context * ectx) { if (!ectx) { fprintf(stderr, "%s: null context\n", __func__); return 0; diff --git a/encodec.h b/encodec.h index 0a8d8b9..5996018 100644 --- a/encodec.h +++ b/encodec.h @@ -48,10 +48,9 @@ extern "C" { * @param n_gpu_layers The number of GPU layers to use. * @return A pointer to the encodec context struct. */ - struct encodec_context *encodec_load_model( - const char *model_path, - const int offset, - int n_gpu_layers); + struct encodec_context * encodec_load_model(const char * model_path, + const int offset, + int n_gpu_layers); /** * Sets the target bandwidth for the given encodec context. @@ -59,9 +58,8 @@ extern "C" { * @param ectx The encodec context to set the target bandwidth for. * @param bandwidth The target bandwidth to set, in bits per second. */ - void encodec_set_target_bandwidth( - struct encodec_context *ectx, - int bandwidth); + void encodec_set_target_bandwidth(struct encodec_context * ectx, + int bandwidth); /** * Sets the sample rate for the given encodec context. @@ -69,9 +67,8 @@ extern "C" { * @param ectx The encodec context to set the target bandwidth for. * @param sample_rate The sample rate to set. */ - void encodec_set_sample_rate( - struct encodec_context *ectx, - int sample_rate); + void encodec_set_sample_rate(struct encodec_context * ectx, + int sample_rate); /** * Reconstructs audio from raw audio data using the specified encodec context. @@ -82,11 +79,10 @@ extern "C" { * @param n_threads The number of threads to use for reconstruction. * @return True if the reconstruction was successful, false otherwise. */ - bool encodec_reconstruct_audio( - struct encodec_context *ectx, - const float *raw_audio, - const int n_samples, - int n_threads); + bool encodec_reconstruct_audio(struct encodec_context * ectx, + const float * raw_audio, + const int n_samples, + int n_threads); /** * Compresses audio data using the specified encodec context. @@ -97,11 +93,10 @@ extern "C" { * @param n_threads The number of threads to use for compression. * @return True if the compression was successful, false otherwise. */ - bool encodec_compress_audio( - struct encodec_context *ectx, - const float *raw_audio, - const int n_samples, - int n_threads); + bool encodec_compress_audio(struct encodec_context * ectx, + const float * raw_audio, + const int n_samples, + int n_threads); /** * Decompresses audio data using the specified encodec context. @@ -112,11 +107,10 @@ extern "C" { * @param n_threads The number of threads to use for decompression. * @return True if the audio data was successfully decompressed, false otherwise. */ - bool encodec_decompress_audio( - struct encodec_context *ectx, - const int32_t *codes, - const int n_codes, - int n_threads); + bool encodec_decompress_audio(struct encodec_context * ectx, + const int32_t * codes, + const int n_codes, + int n_threads); /** * Gets the audio data from the given encodec context. @@ -124,8 +118,7 @@ extern "C" { * @param ectx The encodec context to get the audio data from. * @return A pointer to the audio data. */ - float * encodec_get_audio( - struct encodec_context *ectx); + float * encodec_get_audio(struct encodec_context * ectx); /** * Gets the size of the audio data from the given encodec context. @@ -133,8 +126,7 @@ extern "C" { * @param ectx The encodec context to get the audio size from. * @return The size of the audio data. */ - int encodec_get_audio_size( - struct encodec_context *ectx); + int encodec_get_audio_size(struct encodec_context * ectx); /** * Gets the code data from the given encodec context. @@ -142,8 +134,7 @@ extern "C" { * @param ectx The encodec context to get the code data from. * @return A pointer to the code data. */ - int32_t * encodec_get_codes( - struct encodec_context *ectx); + int32_t * encodec_get_codes(struct encodec_context * ectx); /** * Gets the size of the code data from the given encodec context. @@ -151,8 +142,7 @@ extern "C" { * @param ectx The encodec context to get the code size from. * @return The size of the code data. */ - int encodec_get_codes_size( - struct encodec_context *ectx); + int encodec_get_codes_size(struct encodec_context * ectx); /** * Gets the statistics for the given encodec context. @@ -160,24 +150,21 @@ extern "C" { * @param ectx The encodec context to get the statistics for. * @return A pointer to the statistics struct. */ - const struct encodec_statistics* encodec_get_statistics( - struct encodec_context *ectx); + const struct encodec_statistics * encodec_get_statistics(struct encodec_context * ectx); /** * Reset the statistics for the given encodec context. * * @param ectx The encodec context to reset the statistics for. */ - void encodec_reset_statistics( - struct encodec_context *ectx); + void encodec_reset_statistics(struct encodec_context * ectx); /** * @brief Frees the memory allocated for an encodec context. * * @param ectx The encodec context to free. */ - void encodec_free( - struct encodec_context *ectx); + void encodec_free(struct encodec_context * ectx); #ifdef __cplusplus } diff --git a/encoder.h b/encoder.h index 15b4e3f..c0fa570 100644 --- a/encoder.h +++ b/encoder.h @@ -8,54 +8,56 @@ // res + downsample block at some ratio struct encodec_encoder_block { // conv1 - struct ggml_tensor *conv_1_w; - struct ggml_tensor *conv_1_b; + struct ggml_tensor * conv_1_w; + struct ggml_tensor * conv_1_b; // conv2 - struct ggml_tensor *conv_2_w; - struct ggml_tensor *conv_2_b; + struct ggml_tensor * conv_2_w; + struct ggml_tensor * conv_2_b; // shortcut - struct ggml_tensor *conv_sc_w; - struct ggml_tensor *conv_sc_b; + struct ggml_tensor * conv_sc_w; + struct ggml_tensor * conv_sc_b; // downsampling layers - struct ggml_tensor *ds_conv_w; - struct ggml_tensor *ds_conv_b; + struct ggml_tensor * ds_conv_w; + struct ggml_tensor * ds_conv_b; }; struct encodec_encoder { - struct ggml_tensor *init_conv_w; - struct ggml_tensor *init_conv_b; + struct ggml_tensor * init_conv_w; + struct ggml_tensor * init_conv_b; encodec_lstm lstm; - struct ggml_tensor *final_conv_w; - struct ggml_tensor *final_conv_b; + struct ggml_tensor * final_conv_w; + struct ggml_tensor * final_conv_b; std::vector blocks; }; -struct ggml_tensor *encodec_forward_encoder( - const struct encodec_encoder *encoder, struct ggml_context *ctx0, - struct ggml_tensor *inp, const int * ratios, const int kernel_size, const int res_kernel_size, - const int stride) { - +struct ggml_tensor *encodec_forward_encoder(const struct encodec_encoder * encoder, + struct ggml_context * ctx0, + struct ggml_tensor * inp, + const int * ratios, + const int kernel_size, + const int res_kernel_size, + const int stride) { if (!inp) { fprintf(stderr, "%s: null input tensor\n", __func__); return NULL; } - struct ggml_tensor *inpL = strided_conv_1d( + struct ggml_tensor * inpL = strided_conv_1d( ctx0, inp, encoder->init_conv_w, encoder->init_conv_b, stride); for (int layer_ix = 0; layer_ix < 4; layer_ix++) { encodec_encoder_block block = encoder->blocks[layer_ix]; - struct ggml_tensor *current = inpL; + struct ggml_tensor * current = inpL; // shortcut - struct ggml_tensor *shortcut = strided_conv_1d( + struct ggml_tensor * shortcut = strided_conv_1d( ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride); // conv1 @@ -82,18 +84,18 @@ struct ggml_tensor *encodec_forward_encoder( // lstm { - struct ggml_tensor *cur = inpL; + struct ggml_tensor * cur = inpL; const encodec_lstm lstm = encoder->lstm; // first lstm layer char l0_prefix[7] = "enc_l0"; - struct ggml_tensor *hs1 = forward_pass_lstm_unilayer( + struct ggml_tensor * hs1 = forward_pass_lstm_unilayer( ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix); // second lstm layer char l1_prefix[7] = "enc_l1"; - struct ggml_tensor *out = forward_pass_lstm_unilayer( + struct ggml_tensor * out = forward_pass_lstm_unilayer( ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix); inpL = ggml_add(ctx0, inpL, out); @@ -102,7 +104,7 @@ struct ggml_tensor *encodec_forward_encoder( // final conv inpL = ggml_elu(ctx0, inpL); - struct ggml_tensor *encoded_inp = strided_conv_1d( + struct ggml_tensor * encoded_inp = strided_conv_1d( ctx0, inpL, encoder->final_conv_w, encoder->final_conv_b, stride); return encoded_inp; diff --git a/lstm.h b/lstm.h index ac23ed1..edec8f0 100644 --- a/lstm.h +++ b/lstm.h @@ -6,26 +6,26 @@ #include "ops.h" struct encodec_lstm { - struct ggml_tensor *l0_ih_w; - struct ggml_tensor *l0_hh_w; + struct ggml_tensor * l0_ih_w; + struct ggml_tensor * l0_hh_w; - struct ggml_tensor *l0_ih_b; - struct ggml_tensor *l0_hh_b; + struct ggml_tensor * l0_ih_b; + struct ggml_tensor * l0_hh_b; - struct ggml_tensor *l1_ih_w; - struct ggml_tensor *l1_hh_w; + struct ggml_tensor * l1_ih_w; + struct ggml_tensor * l1_hh_w; - struct ggml_tensor *l1_ih_b; - struct ggml_tensor *l1_hh_b; + struct ggml_tensor * l1_ih_b; + struct ggml_tensor * l1_hh_b; }; -struct ggml_tensor *forward_pass_lstm_unilayer(struct ggml_context *ctx0, - struct ggml_tensor *inp, - struct ggml_tensor *weight_ih, - struct ggml_tensor *weight_hh, - struct ggml_tensor *bias_ih, - struct ggml_tensor *bias_hh, - char *prefix) { +struct ggml_tensor * forward_pass_lstm_unilayer(struct ggml_context * ctx0, + struct ggml_tensor * inp, + struct ggml_tensor * weight_ih, + struct ggml_tensor * weight_hh, + struct ggml_tensor * bias_ih, + struct ggml_tensor * bias_hh, + char * prefix) { const int seq_length = inp->ne[0]; const int input_dim = inp->ne[1]; const int hidden_dim = weight_ih->ne[1] / 4; @@ -36,34 +36,34 @@ struct ggml_tensor *forward_pass_lstm_unilayer(struct ggml_context *ctx0, snprintf(ct_name, 10, "%s_ct", prefix); snprintf(ht_name, 10, "%s_ht", prefix); - struct ggml_tensor *hs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length); + struct ggml_tensor * hs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length); ggml_set_input(hs); - struct ggml_tensor *c_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim); + struct ggml_tensor * c_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim); ggml_set_input(c_t); ggml_set_name(c_t, ct_name); - struct ggml_tensor *h_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim); + struct ggml_tensor * h_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim); ggml_set_input(h_t); ggml_set_name(h_t, ht_name); - struct ggml_tensor *current = ggml_cont(ctx0, ggml_transpose(ctx0, inp)); + struct ggml_tensor * current = ggml_cont(ctx0, ggml_transpose(ctx0, inp)); for (int t = 0; t < seq_length; t++) { - struct ggml_tensor *x_t = ggml_view_1d(ctx0, current, input_dim, t * current->nb[1]); + struct ggml_tensor * x_t = ggml_view_1d(ctx0, current, input_dim, t * current->nb[1]); - struct ggml_tensor *inp_gates = ggml_mul_mat(ctx0, weight_ih, x_t); + struct ggml_tensor * inp_gates = ggml_mul_mat(ctx0, weight_ih, x_t); inp_gates = ggml_add(ctx0, inp_gates, bias_ih); - struct ggml_tensor *hid_gates = ggml_mul_mat(ctx0, weight_hh, h_t); + struct ggml_tensor * hid_gates = ggml_mul_mat(ctx0, weight_hh, h_t); hid_gates = ggml_add(ctx0, hid_gates, bias_hh); - struct ggml_tensor *out_gates = ggml_add(ctx0, inp_gates, hid_gates); + struct ggml_tensor * out_gates = ggml_add(ctx0, inp_gates, hid_gates); - struct ggml_tensor *i_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 0 * sizeof(float) * hidden_dim)); - struct ggml_tensor *f_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 1 * sizeof(float) * hidden_dim)); - struct ggml_tensor *g_t = ggml_tanh(ctx0 , ggml_view_1d(ctx0, out_gates, hidden_dim, 2 * sizeof(float) * hidden_dim)); - struct ggml_tensor *o_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 3 * sizeof(float) * hidden_dim)); + struct ggml_tensor * i_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 0 * sizeof(float) * hidden_dim)); + struct ggml_tensor * f_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 1 * sizeof(float) * hidden_dim)); + struct ggml_tensor * g_t = ggml_tanh (ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 2 * sizeof(float) * hidden_dim)); + struct ggml_tensor * o_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 3 * sizeof(float) * hidden_dim)); c_t = ggml_add(ctx0, ggml_mul(ctx0, f_t, c_t), ggml_mul(ctx0, i_t, g_t)); diff --git a/ops.cpp b/ops.cpp index b245f42..77a7ed6 100644 --- a/ops.cpp +++ b/ops.cpp @@ -7,16 +7,23 @@ #include "ops.h" -static int get_extra_padding_for_conv_1d(struct ggml_tensor *inp, float kernel_size, - float stride, float padding_total) { +static int get_extra_padding_for_conv_1d(struct ggml_tensor * inp, + float kernel_size, + float stride, + float padding_total) { float length = inp->ne[0]; + float n_frames = (length - kernel_size + padding_total) / stride + 1.0f; + int ideal_length = (ceilf(n_frames) - 1) * stride + (kernel_size - padding_total); + return ideal_length - length; } -struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, - int padding_left, int padding_right) { +struct ggml_tensor * pad_1d(struct ggml_context * ctx0, + struct ggml_tensor * inp, + int padding_left, + int padding_right) { int length = inp->ne[0]; int dim = inp->ne[1]; @@ -27,20 +34,22 @@ struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, extra_pad = max_pad - length + 1; // constant padding - struct ggml_tensor *out = ggml_new_tensor_2d(ctx0, inp->type, length + extra_pad, dim); + struct ggml_tensor * out = ggml_new_tensor_2d(ctx0, inp->type, length + extra_pad, dim); out = ggml_set_2d(ctx0, out, inp, out->nb[1], 0); } - struct ggml_tensor *padded = ggml_pad_reflect_1d(ctx0, inp, padding_left, padding_right); + struct ggml_tensor * padded = ggml_pad_reflect_1d(ctx0, inp, padding_left, padding_right); const int end = padded->ne[0] - extra_pad; - struct ggml_tensor *dest = ggml_view_2d(ctx0, padded, end, dim, padded->nb[1], 0); + struct ggml_tensor * dest = ggml_view_2d(ctx0, padded, end, dim, padded->nb[1], 0); return dest; } -struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, - int padding_left, int padding_right) { +struct ggml_tensor * unpad_1d(struct ggml_context * ctx0, + struct ggml_tensor * inp, + int padding_left, + int padding_right) { int length = inp->ne[0]; int dim = inp->ne[1]; @@ -51,20 +60,22 @@ struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, int end = length - padding_right; int offset = padding_left * inp->nb[1]; - struct ggml_tensor *dst = ggml_view_2d(ctx0, inp, end, dim, inp->nb[1], offset); + struct ggml_tensor * dst = ggml_view_2d(ctx0, inp, end, dim, inp->nb[1], offset); return dst; } -struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, - struct ggml_tensor *conv_w, struct ggml_tensor *conv_b, - int stride) { - int kernel_size = conv_w->ne[0]; +struct ggml_tensor * strided_conv_1d(struct ggml_context * ctx0, + struct ggml_tensor * inp, + struct ggml_tensor * conv_w, + struct ggml_tensor * conv_b, + int stride) { + int kernel_size = conv_w->ne[0]; int padding_total = kernel_size - stride; int extra_padding = get_extra_padding_for_conv_1d(inp, kernel_size, stride, padding_total); - struct ggml_tensor *padded_inp = pad_1d(ctx0, inp, padding_total, extra_padding); - struct ggml_tensor *dst = ggml_conv_1d(ctx0, conv_w, padded_inp, stride, 0, 1); + struct ggml_tensor * padded_inp = pad_1d(ctx0, inp, padding_total, extra_padding); + struct ggml_tensor * dst = ggml_conv_1d(ctx0, conv_w, padded_inp, stride, 0, 1); // add bias dst = ggml_transpose(ctx0, dst); @@ -74,10 +85,12 @@ struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tenso return dst; } -struct ggml_tensor *strided_conv_transpose_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, - struct ggml_tensor *conv_w, struct ggml_tensor *conv_b, - int stride) { - struct ggml_tensor *dst = ggml_conv_transpose_1d( +struct ggml_tensor * strided_conv_transpose_1d(struct ggml_context * ctx0, + struct ggml_tensor * inp, + struct ggml_tensor * conv_w, + struct ggml_tensor * conv_b, + int stride) { + struct ggml_tensor * dst = ggml_conv_transpose_1d( ctx0, conv_w, inp, stride, 0 /* p0 */, 1 /* d0 */); // add bias diff --git a/ops.h b/ops.h index e935b91..1795abf 100644 --- a/ops.h +++ b/ops.h @@ -2,16 +2,24 @@ #include "ggml.h" -struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, - int padding_left, int padding_right); +struct ggml_tensor * pad_1d(struct ggml_context * ctx0, + struct ggml_tensor * inp, + int padding_left, + int padding_right); -struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, - int padding_left, int padding_right); +struct ggml_tensor * unpad_1d(struct ggml_context * ctx0, + struct ggml_tensor * inp, + int padding_left, + int padding_right); -struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, - struct ggml_tensor *conv_w, struct ggml_tensor *conv_b, - int stride); +struct ggml_tensor * strided_conv_1d(struct ggml_context * ctx0, + struct ggml_tensor * inp, + struct ggml_tensor * conv_w, + struct ggml_tensor * conv_b, + int stride); -struct ggml_tensor *strided_conv_transpose_1d(struct ggml_context *ctx0, struct ggml_tensor *inp, - struct ggml_tensor *conv_w, struct ggml_tensor *conv_b, - int stride); +struct ggml_tensor * strided_conv_transpose_1d(struct ggml_context * ctx0, + struct ggml_tensor * inp, + struct ggml_tensor * conv_w, + struct ggml_tensor * conv_b, + int stride); diff --git a/quantizer.h b/quantizer.h index 9986561..e050854 100644 --- a/quantizer.h +++ b/quantizer.h @@ -10,18 +10,21 @@ #include "utils.h" struct encodec_quant_block { - struct ggml_tensor *embed; + struct ggml_tensor * embed; }; struct encodec_quantizer { std::vector blocks; }; -struct ggml_tensor *encodec_forward_quantizer_encode( - const struct encodec_quantizer *quantizer, struct ggml_context *ctx0, - struct ggml_tensor *encoded_inp, const int n_bins, const int sr, const int bandwidth, - const int hop_length) { - +struct ggml_tensor * encodec_forward_quantizer_encode( + const struct encodec_quantizer * quantizer, + struct ggml_context * ctx0, + struct ggml_tensor * encoded_inp, + const int n_bins, + const int sr, + const int bandwidth, + const int hop_length) { if (!encoded_inp) { fprintf(stderr, "%s: null input tensor\n", __func__); return NULL; @@ -32,31 +35,31 @@ struct ggml_tensor *encodec_forward_quantizer_encode( const int seq_length = encoded_inp->ne[0]; - struct ggml_tensor *codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q); + struct ggml_tensor * codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q); ggml_set_input(codes); - struct ggml_tensor *inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp)); - struct ggml_tensor *residual = inpL; - struct ggml_tensor *indices; + struct ggml_tensor * inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp)); + struct ggml_tensor * residual = inpL; + struct ggml_tensor * indices; for (int i = 0; i < n_q; i++) { encodec_quant_block block = quantizer->blocks[i]; // compute distance // [seq_length, n_bins] - struct ggml_tensor *dp = ggml_scale( + struct ggml_tensor * dp = ggml_scale( ctx0, ggml_mul_mat(ctx0, block.embed, residual), -2.0f); // [n_bins] - struct ggml_tensor *sqr_embed = ggml_sqr(ctx0, block.embed); - struct ggml_tensor *sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed); + struct ggml_tensor * sqr_embed = ggml_sqr(ctx0, block.embed); + struct ggml_tensor * sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed); // [seq_length] - struct ggml_tensor *sqr_inp = ggml_sqr(ctx0, residual); - struct ggml_tensor *sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp); + struct ggml_tensor * sqr_inp = ggml_sqr(ctx0, residual); + struct ggml_tensor * sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp); // [seq_length, n_bins] - struct ggml_tensor *dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp); + struct ggml_tensor * dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp); dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist); dist = ggml_neg(ctx0, dist); @@ -65,7 +68,7 @@ struct ggml_tensor *encodec_forward_quantizer_encode( indices = ggml_argmax(ctx0, dist); // look up in embedding table - struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices); + struct ggml_tensor * quantized = ggml_get_rows(ctx0, block.embed, indices); residual = ggml_sub(ctx0, residual, quantized); @@ -75,11 +78,15 @@ struct ggml_tensor *encodec_forward_quantizer_encode( return codes; } -struct ggml_tensor *encodec_forward_quantizer_decode( - const struct encodec_quantizer *quantizer, struct ggml_context *ctx0, - struct ggml_tensor *codes, const int hidden_dim, const int n_bins, const int sr, const int bandwidth, - const int hop_length) { - +struct ggml_tensor * encodec_forward_quantizer_decode( + const struct encodec_quantizer * quantizer, + struct ggml_context * ctx0, + struct ggml_tensor * codes, + const int hidden_dim, + const int n_bins, + const int sr, + const int bandwidth, + const int hop_length) { if (!codes) { fprintf(stderr, "%s: null input tensor\n", __func__); return NULL; @@ -92,15 +99,15 @@ struct ggml_tensor *encodec_forward_quantizer_decode( assert(n_q == codes->ne[1]); - struct ggml_tensor *quantized_out = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length); + struct ggml_tensor * quantized_out = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length); ggml_set_input(quantized_out); ggml_set_name(quantized_out, "quantized_out"); for (int i = 0; i < n_q; i++) { encodec_quant_block block = quantizer->blocks[i]; - struct ggml_tensor *indices = ggml_view_1d(ctx0, codes, seq_length, i * codes->nb[1]); - struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices); + struct ggml_tensor * indices = ggml_view_1d(ctx0, codes, seq_length, i * codes->nb[1]); + struct ggml_tensor * quantized = ggml_get_rows(ctx0, block.embed, indices); quantized_out = ggml_add(ctx0, quantized_out, quantized); }