From cb17d7661c467d8b490b7b73e46c6f83824999fa Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Sat, 26 Oct 2024 11:07:38 +0200
Subject: [PATCH 1/3] intiial

---
 encodec.cpp | 4 +++-
 quantizer.h | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/encodec.cpp b/encodec.cpp
index 2a1b779..5967cb2 100644
--- a/encodec.cpp
+++ b/encodec.cpp
@@ -705,7 +705,9 @@ void encodec_build_graph(struct encodec_context *ectx, const int32_t *codes,
 
 static void encodec_zero_tensor(struct ggml_cgraph *gf, const char *name) {
     struct ggml_tensor *tensor = ggml_graph_get_tensor(gf, name);
-    ggml_set_zero(tensor);
+    // create a zero array
+    // ggml_backend_tensor_set(tensor, zeros, 0, tensor->ne * ggml_element_size(tensor));
+    // ggml_set_zero(tensor);
 }
 
 bool encodec_eval_internal(struct encodec_context *ectx, const float * raw_audio,
diff --git a/quantizer.h b/quantizer.h
index 9986561..f7eb02e 100644
--- a/quantizer.h
+++ b/quantizer.h
@@ -58,9 +58,8 @@ struct ggml_tensor *encodec_forward_quantizer_encode(
         // [seq_length, n_bins]
         struct ggml_tensor *dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp);
         dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist);
-        dist = ggml_neg(ctx0, dist);
+        dist = ggml_scale(ctx0, dist, -1.0f);
 
-        // take the argmax over the column dimension
         // [seq_length]
         indices = ggml_argmax(ctx0, dist);
 

From f199201cb937d482447416ae17c0f68215ae656c Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 4 Dec 2024 20:57:49 +0100
Subject: [PATCH 2/3] compiling

---
 README.md   | 2 ++
 encodec.cpp | 1 +
 ggml        | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d06657b..365702e 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,8 @@ cmake -DGGML_METAL=ON -DBUILD_SHARED_LIBS=Off ..
 cmake --build . --config Release
 ```
 
+Once built, pass the `--n-gpu-layers` flag with a value greather than 0 to the executable.
+
 ### Using cuBLAS
 
 The inference can be offloaded on a CUDA backend with cuBLAS.
diff --git a/encodec.cpp b/encodec.cpp
index 5967cb2..1346b78 100644
--- a/encodec.cpp
+++ b/encodec.cpp
@@ -1,5 +1,6 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
+#include "ggml-cpu.h"
 #include "ggml.h"
 #include "ggml/src/ggml-impl.h"
 
diff --git a/ggml b/ggml
index c18f9ba..74d66b6 160000
--- a/ggml
+++ b/ggml
@@ -1 +1 @@
-Subproject commit c18f9baeea2f3aea1ffc4afa4ad4496e51b7ff8a
+Subproject commit 74d66b63eaf207a24f3e93bb922aba131cbf2906

From bd9abb4353191371768e96835baaf2ddddbac4d3 Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Wed, 4 Dec 2024 22:30:00 +0100
Subject: [PATCH 3/3] cosmit

---
 decoder.h   |  50 ++++++++-------
 encodec.cpp | 176 ++++++++++++++++++++++++++++------------------------
 encodec.h   |  65 ++++++++-----------
 encoder.h   |  50 ++++++++-------
 lstm.h      |  54 ++++++++--------
 ops.cpp     |  53 ++++++++++------
 ops.h       |  28 ++++++---
 quantizer.h |  57 +++++++++--------
 8 files changed, 284 insertions(+), 249 deletions(-)

diff --git a/decoder.h b/decoder.h
index 7f37544..83a602e 100644
--- a/decoder.h
+++ b/decoder.h
@@ -12,61 +12,65 @@
 
 struct encodec_decoder_block {
     // upsampling layers
-    struct ggml_tensor *us_conv_w;
-    struct ggml_tensor *us_conv_b;
+    struct ggml_tensor * us_conv_w;
+    struct ggml_tensor * us_conv_b;
 
     // conv1
-    struct ggml_tensor *conv_1_w;
-    struct ggml_tensor *conv_1_b;
+    struct ggml_tensor * conv_1_w;
+    struct ggml_tensor * conv_1_b;
 
     // conv2
-    struct ggml_tensor *conv_2_w;
-    struct ggml_tensor *conv_2_b;
+    struct ggml_tensor * conv_2_w;
+    struct ggml_tensor * conv_2_b;
 
     // shortcut
-    struct ggml_tensor *conv_sc_w;
-    struct ggml_tensor *conv_sc_b;
+    struct ggml_tensor * conv_sc_w;
+    struct ggml_tensor * conv_sc_b;
 };
 
 struct encodec_decoder {
-    struct ggml_tensor *init_conv_w;
-    struct ggml_tensor *init_conv_b;
+    struct ggml_tensor * init_conv_w;
+    struct ggml_tensor * init_conv_b;
 
     encodec_lstm lstm;
 
-    struct ggml_tensor *final_conv_w;
-    struct ggml_tensor *final_conv_b;
+    struct ggml_tensor * final_conv_w;
+    struct ggml_tensor * final_conv_b;
 
     std::vector<encodec_decoder_block> blocks;
 };
 
-struct ggml_tensor *encodec_forward_decoder(
-    const struct encodec_decoder *decoder, struct ggml_context *ctx0,
-    struct ggml_tensor *quantized_out, const int *ratios, const int kernel_size, const int res_kernel_size,
-    const int stride) {
+struct ggml_tensor * encodec_forward_decoder(
+    const struct encodec_decoder * decoder,
+             struct ggml_context * ctx0,
+              struct ggml_tensor * quantized_out,
+                       const int * ratios,
+                       const int   kernel_size,
+                       const int   res_kernel_size,
+                       const int   stride) {
 
     if (!quantized_out) {
         fprintf(stderr, "%s: null input tensor\n", __func__);
         return NULL;
     }
 
-    struct ggml_tensor *inpL = strided_conv_1d(
+    struct ggml_tensor * inpL = strided_conv_1d(
         ctx0, quantized_out, decoder->init_conv_w, decoder->init_conv_b, stride);
 
     // lstm
     {
-        struct ggml_tensor *cur = inpL;
+        struct ggml_tensor * cur = inpL;
 
         const encodec_lstm lstm = decoder->lstm;
 
         // first lstm layer
         char l0_prefix[7] = "dec_l0";
-        struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
+        struct ggml_tensor * hs1 = forward_pass_lstm_unilayer(
             ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
 
         // second lstm layer
         char l1_prefix[7] = "dec_l1";
-        struct ggml_tensor *out = forward_pass_lstm_unilayer(
+        struct ggml_tensor * out = forward_pass_lstm_unilayer(
             ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
 
         inpL = ggml_add(ctx0, inpL, out);
@@ -81,10 +85,10 @@ struct ggml_tensor *encodec_forward_decoder(
         inpL = strided_conv_transpose_1d(
             ctx0, inpL, block.us_conv_w, block.us_conv_b, ratios[layer_ix]);
 
-        struct ggml_tensor *current = inpL;
+        struct ggml_tensor * current = inpL;
 
         // shortcut
-        struct ggml_tensor *shortcut = strided_conv_1d(
+        struct ggml_tensor * shortcut = strided_conv_1d(
             ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
 
         // conv1
@@ -106,7 +110,7 @@ struct ggml_tensor *encodec_forward_decoder(
     // final conv
     inpL = ggml_elu(ctx0, inpL);
 
-    struct ggml_tensor *decoded_inp = strided_conv_1d(
+    struct ggml_tensor * decoded_inp = strided_conv_1d(
         ctx0, inpL, decoder->final_conv_w, decoder->final_conv_b, stride);
 
     return decoded_inp;
diff --git a/encodec.cpp b/encodec.cpp
index 2a1b779..3d96b4b 100644
--- a/encodec.cpp
+++ b/encodec.cpp
@@ -2,6 +2,7 @@
 #include "ggml-backend.h"
 #include "ggml.h"
 #include "ggml/src/ggml-impl.h"
+#include "ggml-cpu.h"
 
 #ifdef GGML_USE_CUBLAS
 #include "ggml-cuda.h"
@@ -83,12 +84,12 @@ struct encodec_hparams {
 struct encodec_model {
     encodec_hparams hparams;
 
-    encodec_encoder encoder;
+    encodec_encoder   encoder;
     encodec_quantizer quantizer;
-    encodec_decoder decoder;
+    encodec_decoder   decoder;
 
     // context
-    struct ggml_context *ctx;
+    struct ggml_context * ctx;
     int n_loaded;
 
     ggml_backend_t backend = NULL;
@@ -127,18 +128,18 @@ struct encodec_context {
     ggml_gallocr_t allocr = NULL;
 
     // intermediate steps
-    struct ggml_tensor *encoded = NULL;  // Encoded audio
-    struct ggml_tensor *codes = NULL;    // Quantized representation of audio in codebook
-    struct ggml_tensor *decoded = NULL;  // Reconstructed audio from codes
+    struct ggml_tensor * encoded = NULL;  // Encoded audio
+    struct ggml_tensor * codes   = NULL;  // Quantized representation of audio in codebook
+    struct ggml_tensor * decoded = NULL;  // Reconstructed audio from codes
 
     std::vector<int32_t> out_codes;
-    std::vector<float> out_audio;
+    std::vector<float>   out_audio;
 
     // statistics
     encodec_statistics stats;
 };
 
-bool encodec_load_model_weights(std::ifstream &infile, encodec_model &model, int n_gpu_layers) {
+bool encodec_load_model_weights(std::ifstream & infile, encodec_model & model, int n_gpu_layers) {
     // verify magic (i.e. ggml signature in hex format)
     {
         uint32_t magic;
@@ -543,10 +544,10 @@ static struct ggml_cgraph * encodec_ggml_cgraph_create(size_t size) {
     return cgraph;
 }
 
-void encodec_build_graph(struct encodec_context *ectx,
-                         const float * inp_audio,
-                         const int n_samples,
-                         const encodec_run_mode_t mode) {
+void encodec_build_graph(struct encodec_context * ectx,
+                                    const float * inp_audio,
+                                      const int   n_samples,
+                       const encodec_run_mode_t   mode) {
     assert(mode == encodec_run_mode_t::FULL || mode == encodec_run_mode_t::ENCODE);
 
     const auto & model   = ectx->model;
@@ -555,16 +556,16 @@ void encodec_build_graph(struct encodec_context *ectx,
 
     auto & gf = ectx->gf;
 
-    const int *ratios       = hparams.ratios;
-    const int kernel_size   = hparams.kernel_size;
-    const int res_kernel_sz = hparams.residual_kernel_size;
-    const int stride        = hparams.stride;
-    const int n_bins        = hparams.n_bins;
-    const int n_q           = hparams.n_q;
-    const int sr            = hparams.sr;
-    const int bandwidth     = hparams.bandwidth;
-    const int hop_length    = hparams.hop_length;
-    const int hidden_dim    = hparams.hidden_dim;
+    const int * ratios       = hparams.ratios;
+    const int kernel_size    = hparams.kernel_size;
+    const int res_kernel_sz  = hparams.residual_kernel_size;
+    const int stride         = hparams.stride;
+    const int n_bins         = hparams.n_bins;
+    const int n_q            = hparams.n_q;
+    const int sr             = hparams.sr;
+    const int bandwidth      = hparams.bandwidth;
+    const int hop_length     = hparams.hop_length;
+    const int hidden_dim     = hparams.hidden_dim;
 
     // since we are using ggml-alloc, this buffer only needs enough space to hold the
     // ggml_tensor and ggml_cgraph structs, but not the tensor data
@@ -577,28 +578,28 @@ void encodec_build_graph(struct encodec_context *ectx,
         /*.no_alloc   =*/ true,  // the tensors will be allocated later by ggml_gallocr_alloc_graph()
     };
 
-    struct ggml_context *ctx0 = ggml_init(ggml_params);
+    struct ggml_context * ctx0 = ggml_init(ggml_params);
 
     gf = std::unique_ptr<struct ggml_cgraph, encodec_ggml_cgraph_deleter>(encodec_ggml_cgraph_create(ENCODEC_MAX_NODES));
 
-    struct ggml_tensor *inp = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, n_samples);
+    struct ggml_tensor * inp = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, n_samples);
     ggml_set_name(inp, "inp");
     ggml_set_input(inp);
 
-    const struct encodec_encoder   * encoder   = &model.encoder;
-    const struct encodec_quantizer * quantizer = &model.quantizer;
-    const struct encodec_decoder   * decoder   = &model.decoder;
+    const struct encodec_encoder   * encoder   = & model.encoder;
+    const struct encodec_quantizer * quantizer = & model.quantizer;
+    const struct encodec_decoder   * decoder   = & model.decoder;
 
-    struct ggml_tensor * encoded = encodec_forward_encoder(
+    struct ggml_tensor * encoded   = encodec_forward_encoder(
         encoder, ctx0, inp, ratios, kernel_size, res_kernel_sz, stride);
 
-    struct ggml_tensor * codes = encodec_forward_quantizer_encode(
+    struct ggml_tensor * codes     = encodec_forward_quantizer_encode(
         quantizer, ctx0, encoded, n_bins, sr, bandwidth, hop_length);
 
     struct ggml_tensor * quantized = encodec_forward_quantizer_decode(
         quantizer, ctx0, codes, hidden_dim, n_bins, sr, bandwidth, hop_length);
 
-    struct ggml_tensor * decoded = encodec_forward_decoder(
+    struct ggml_tensor * decoded   = encodec_forward_decoder(
         decoder, ctx0, quantized, ratios, kernel_size, res_kernel_sz, stride);
 
     switch (mode) {
@@ -627,8 +628,10 @@ void encodec_build_graph(struct encodec_context *ectx,
     ectx->decoded = decoded;
 }
 
-void encodec_build_graph(struct encodec_context *ectx, const int32_t *codes,
-                         const int n_codes, const encodec_run_mode_t mode) {
+void encodec_build_graph(struct encodec_context * ectx,
+                                  const int32_t * codes,
+                                      const int   n_codes,
+                       const encodec_run_mode_t   mode) {
     assert(mode == encodec_run_mode_t::DECODE);
 
     const auto & model   = ectx->model;
@@ -666,22 +669,22 @@ void encodec_build_graph(struct encodec_context *ectx, const int32_t *codes,
         /*.no_alloc   =*/ true,
     };
 
-    struct ggml_context *ctx0 = ggml_init(ggml_params);
+    struct ggml_context * ctx0 = ggml_init(ggml_params);
 
     gf = std::unique_ptr<struct ggml_cgraph, encodec_ggml_cgraph_deleter>(encodec_ggml_cgraph_create(ENCODEC_MAX_NODES));
 
-    struct ggml_tensor *inp_codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, N, n_q);
+    struct ggml_tensor * inp_codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, N, n_q);
     ggml_set_name(inp_codes, "inp_codes");
     ggml_set_input(inp_codes);
 
-    const struct encodec_quantizer * quantizer = &model.quantizer;
-    const struct encodec_decoder   * decoder   = &model.decoder;
+    const struct encodec_quantizer * quantizer = & model.quantizer;
+    const struct encodec_decoder   * decoder   = & model.decoder;
 
-    struct ggml_tensor *quantized = encodec_forward_quantizer_decode(
+    struct ggml_tensor * quantized = encodec_forward_quantizer_decode(
         quantizer, ctx0, inp_codes, hidden_dim, n_bins, sr, bandwidth, hop_length
     );
 
-    struct ggml_tensor *decoded = encodec_forward_decoder(
+    struct ggml_tensor * decoded = encodec_forward_decoder(
         decoder, ctx0, quantized, ratios, kernel_size, res_kernel_sz, stride
     );
 
@@ -703,14 +706,16 @@ void encodec_build_graph(struct encodec_context *ectx, const int32_t *codes,
     ectx->decoded = decoded;
 }
 
-static void encodec_zero_tensor(struct ggml_cgraph *gf, const char *name) {
-    struct ggml_tensor *tensor = ggml_graph_get_tensor(gf, name);
+static void encodec_zero_tensor(struct ggml_cgraph * gf, const char * name) {
+    struct ggml_tensor * tensor = ggml_graph_get_tensor(gf, name);
     ggml_set_zero(tensor);
 }
 
-bool encodec_eval_internal(struct encodec_context *ectx, const float * raw_audio,
-                           const int n_samples, const int n_threads,
-                           const encodec_run_mode_t mode) {
+bool encodec_eval_internal(struct encodec_context * ectx,
+                                      const float * raw_audio,
+                                        const int   n_samples,
+                                        const int   n_threads,
+                         const encodec_run_mode_t   mode) {
     auto & model  = ectx->model;
     auto & allocr = ectx->allocr;
     auto & gf     = ectx->gf;
@@ -747,9 +752,11 @@ bool encodec_eval_internal(struct encodec_context *ectx, const float * raw_audio
     return true;
 }
 
-bool encodec_eval_internal(struct encodec_context *ectx, const int32_t *codes,
-                           const int n_codes, const int n_threads,
-                           const encodec_run_mode_t mode) {
+bool encodec_eval_internal(struct encodec_context * ectx,
+                                    const int32_t * codes,
+                                        const int   n_codes,
+                                        const int   n_threads,
+                         const encodec_run_mode_t   mode) {
     auto & model  = ectx->model;
     auto & allocr = ectx->allocr;
     auto & gf     = ectx->gf;
@@ -786,9 +793,11 @@ bool encodec_eval_internal(struct encodec_context *ectx, const int32_t *codes,
     return true;
 }
 
-bool encodec_eval(struct encodec_context *ectx, const float *raw_audio,
-                  const int n_samples, const int n_threads,
-                  const encodec_run_mode_t mode) {
+bool encodec_eval(struct encodec_context * ectx,
+                             const float * raw_audio,
+                               const int   n_samples,
+                               const int   n_threads,
+                const encodec_run_mode_t   mode) {
     const int64_t t_start_us = ggml_time_us();
 
     // allocate the compute buffer
@@ -816,9 +825,11 @@ bool encodec_eval(struct encodec_context *ectx, const float *raw_audio,
     return true;
 }
 
-bool encodec_eval(struct encodec_context *ectx, const int32_t *codes,
-                  const int n_codes, const int n_threads,
-                  const encodec_run_mode_t mode) {
+bool encodec_eval(struct encodec_context * ectx,
+                           const int32_t * codes,
+                               const int   n_codes,
+                               const int   n_threads,
+                const encodec_run_mode_t   mode) {
     const int64_t t_start_ms = ggml_time_us();
 
     // allocate the compute buffer
@@ -847,8 +858,10 @@ bool encodec_eval(struct encodec_context *ectx, const int32_t *codes,
     return true;
 }
 
-bool encodec_reconstruct_audio(struct encodec_context *ectx, const float *raw_audio,
-                                const int n_samples, const int n_threads) {
+bool encodec_reconstruct_audio(struct encodec_context * ectx,
+                                          const float * raw_audio,
+                                            const int   n_samples,
+                                            const int   n_threads) {
     if (raw_audio == nullptr) {
         fprintf(stderr, "%s: null input audio\n", __func__);
         return false;
@@ -864,9 +877,9 @@ bool encodec_reconstruct_audio(struct encodec_context *ectx, const float *raw_au
         return false;
     }
 
-    struct ggml_tensor *decoded = ectx->decoded;
+    struct ggml_tensor * decoded = ectx->decoded;
 
-    auto &out_audio = ectx->out_audio;
+    auto & out_audio = ectx->out_audio;
 
     int out_length = decoded->ne[0];
     out_audio.resize(out_length);
@@ -876,8 +889,10 @@ bool encodec_reconstruct_audio(struct encodec_context *ectx, const float *raw_au
     return true;
 }
 
-bool encodec_compress_audio(struct encodec_context *ectx, const float *raw_audio,
-                             const int n_samples, const int n_threads) {
+bool encodec_compress_audio(struct encodec_context * ectx,
+                                       const float * raw_audio,
+                                         const int   n_samples,
+                                         const int   n_threads) {
     if (!encodec_eval(ectx, raw_audio, n_samples, n_threads, encodec_run_mode_t::ENCODE)) {
         fprintf(stderr, "%s: failed to run encodec eval\n", __func__);
         return false;
@@ -888,9 +903,9 @@ bool encodec_compress_audio(struct encodec_context *ectx, const float *raw_audio
         return false;
     }
 
-    struct ggml_tensor *codes = ectx->codes;
+    struct ggml_tensor * codes = ectx->codes;
 
-    auto &out_codes = ectx->out_codes;
+    auto & out_codes = ectx->out_codes;
 
     int out_length = codes->ne[0] * codes->ne[1];
     out_codes.resize(out_length);
@@ -900,8 +915,10 @@ bool encodec_compress_audio(struct encodec_context *ectx, const float *raw_audio
     return true;
 }
 
-bool encodec_decompress_audio(struct encodec_context *ectx, const int32_t *codes,
-                              const int n_codes, const int n_threads) {
+bool encodec_decompress_audio(struct encodec_context * ectx,
+                                       const int32_t * codes,
+                                           const int   n_codes,
+                                           const int   n_threads) {
     if (!encodec_eval(ectx, codes, n_codes, n_threads, encodec_run_mode_t::DECODE)) {
         fprintf(stderr, "%s: failed to run encodec eval\n", __func__);
         return false;
@@ -912,9 +929,9 @@ bool encodec_decompress_audio(struct encodec_context *ectx, const int32_t *codes
         return false;
     }
 
-    struct ggml_tensor *decoded = ectx->decoded;
+    struct ggml_tensor * decoded = ectx->decoded;
 
-    auto &out_audio = ectx->out_audio;
+    auto & out_audio = ectx->out_audio;
 
     int out_length = decoded->ne[0];
     out_audio.resize(out_length);
@@ -931,7 +948,7 @@ bool encodec_decompress_audio(struct encodec_context *ectx, const int32_t *codes
 //    model, hence the model is loaded from the offset. This is the case for Bark.
 // Note that we used to have an encodec_load_model taking a reference to a file stream
 // but it was removed to comply the C-header requirements.
-struct encodec_context *encodec_load_model(const char* model_path, const int offset, int n_gpu_layers) {
+struct encodec_context * encodec_load_model(const char * model_path, const int offset, int n_gpu_layers) {
     int64_t t_start_load_us = ggml_time_us();
 
     auto infile = std::ifstream(model_path, std::ios::binary);
@@ -944,7 +961,7 @@ struct encodec_context *encodec_load_model(const char* model_path, const int off
         infile.seekg(offset);
     }
 
-    struct encodec_context *ectx = new encodec_context();
+    struct encodec_context * ectx = new encodec_context();
 
     ectx->model = encodec_model();
     if (!encodec_load_model_weights(infile, ectx->model, n_gpu_layers)) {
@@ -970,18 +987,15 @@ struct encodec_context *encodec_load_model(const char* model_path, const int off
     return ectx;
 }
 
-void encodec_free(struct encodec_context *ectx) {
-    if (!ectx) {
+void encodec_free(struct encodec_context * ectx) {
+    if (!ectx)
         return;
-    }
 
-    if (ectx->model.ctx) {
+    if (ectx->model.ctx)
         ggml_free(ectx->model.ctx);
-    }
 
-    if (ectx->buf_compute) {
+    if (ectx->buf_compute)
         ggml_backend_buffer_free(ectx->buf_compute);
-    }
 
     ggml_backend_buffer_free(ectx->model.buffer_w);
     ggml_backend_free(ectx->model.backend);
@@ -989,15 +1003,15 @@ void encodec_free(struct encodec_context *ectx) {
     delete ectx;
 }
 
-void encodec_set_target_bandwidth(struct encodec_context *ectx, int bandwidth) {
+void encodec_set_target_bandwidth(struct encodec_context * ectx, int bandwidth) {
     ectx->model.hparams.bandwidth = bandwidth;
 }
 
-void encodec_set_sample_rate(struct encodec_context *ectx, int sample_rate) {
+void encodec_set_sample_rate(struct encodec_context * ectx, int sample_rate) {
     ectx->model.hparams.sr = sample_rate;
 }
 
-const struct encodec_statistics* encodec_get_statistics(struct encodec_context *ectx) {
+const struct encodec_statistics * encodec_get_statistics(struct encodec_context * ectx) {
     if (!ectx) {
         fprintf(stderr, "%s: null context\n", __func__);
         return nullptr;
@@ -1005,7 +1019,7 @@ const struct encodec_statistics* encodec_get_statistics(struct encodec_context *
     return &ectx->stats;
 }
 
-void encodec_reset_statistics(struct encodec_context *ectx) {
+void encodec_reset_statistics(struct encodec_context * ectx) {
     if (!ectx) {
         fprintf(stderr, "%s: null context\n", __func__);
         return;
@@ -1013,7 +1027,7 @@ void encodec_reset_statistics(struct encodec_context *ectx) {
     memset(&ectx->stats, 0, sizeof(ectx->stats));
 }
 
-float * encodec_get_audio(struct encodec_context *ectx) {
+float * encodec_get_audio(struct encodec_context * ectx) {
     if (!ectx) {
         fprintf(stderr, "%s: null context\n", __func__);
         return nullptr;
@@ -1021,7 +1035,7 @@ float * encodec_get_audio(struct encodec_context *ectx) {
     return ectx->out_audio.data();
 }
 
-int encodec_get_audio_size(struct encodec_context *ectx) {
+int encodec_get_audio_size(struct encodec_context * ectx) {
     if (!ectx) {
         fprintf(stderr, "%s: null context\n", __func__);
         return 0;
@@ -1029,7 +1043,7 @@ int encodec_get_audio_size(struct encodec_context *ectx) {
     return ectx->out_audio.size();
 }
 
-int32_t * encodec_get_codes(struct encodec_context *ectx) {
+int32_t * encodec_get_codes(struct encodec_context * ectx) {
     if (!ectx) {
         fprintf(stderr, "%s: null context\n", __func__);
         return nullptr;
@@ -1037,7 +1051,7 @@ int32_t * encodec_get_codes(struct encodec_context *ectx) {
     return ectx->out_codes.data();
 }
 
-int encodec_get_codes_size(struct encodec_context *ectx) {
+int encodec_get_codes_size(struct encodec_context * ectx) {
     if (!ectx) {
         fprintf(stderr, "%s: null context\n", __func__);
         return 0;
diff --git a/encodec.h b/encodec.h
index 0a8d8b9..5996018 100644
--- a/encodec.h
+++ b/encodec.h
@@ -48,10 +48,9 @@ extern "C" {
      * @param n_gpu_layers The number of GPU layers to use.
      * @return A pointer to the encodec context struct.
      */
-    struct encodec_context *encodec_load_model(
-        const char *model_path,
-        const int offset,
-        int n_gpu_layers);
+    struct encodec_context * encodec_load_model(const char * model_path,
+                                                 const int   offset,
+                                                       int   n_gpu_layers);
 
     /**
      * Sets the target bandwidth for the given encodec context.
@@ -59,9 +58,8 @@ extern "C" {
      * @param ectx The encodec context to set the target bandwidth for.
      * @param bandwidth The target bandwidth to set, in bits per second.
      */
-    void encodec_set_target_bandwidth(
-        struct encodec_context *ectx,
-        int bandwidth);
+    void encodec_set_target_bandwidth(struct encodec_context * ectx,
+                                                         int   bandwidth);
 
     /**
      * Sets the sample rate for the given encodec context.
@@ -69,9 +67,8 @@ extern "C" {
      * @param ectx The encodec context to set the target bandwidth for.
      * @param sample_rate The sample rate to set.
      */
-    void encodec_set_sample_rate(
-        struct encodec_context *ectx,
-        int sample_rate);
+    void encodec_set_sample_rate(struct encodec_context * ectx,
+                                                    int   sample_rate);
 
     /**
      * Reconstructs audio from raw audio data using the specified encodec context.
@@ -82,11 +79,10 @@ extern "C" {
      * @param n_threads The number of threads to use for reconstruction.
      * @return True if the reconstruction was successful, false otherwise.
      */
-    bool encodec_reconstruct_audio(
-        struct encodec_context *ectx,
-        const float *raw_audio,
-        const int n_samples,
-        int n_threads);
+    bool encodec_reconstruct_audio(struct encodec_context * ectx,
+                                              const float * raw_audio,
+                                                const int   n_samples,
+                                                      int   n_threads);
 
     /**
      * Compresses audio data using the specified encodec context.
@@ -97,11 +93,10 @@ extern "C" {
      * @param n_threads The number of threads to use for compression.
      * @return True if the compression was successful, false otherwise.
      */
-    bool encodec_compress_audio(
-        struct encodec_context *ectx,
-        const float *raw_audio,
-        const int n_samples,
-        int n_threads);
+    bool encodec_compress_audio(struct encodec_context * ectx,
+                                           const float * raw_audio,
+                                             const int   n_samples,
+                                                   int   n_threads);
 
     /**
      * Decompresses audio data using the specified encodec context.
@@ -112,11 +107,10 @@ extern "C" {
      * @param n_threads The number of threads to use for decompression.
      * @return True if the audio data was successfully decompressed, false otherwise.
      */
-    bool encodec_decompress_audio(
-        struct encodec_context *ectx,
-        const int32_t *codes,
-        const int n_codes,
-        int n_threads);
+    bool encodec_decompress_audio(struct encodec_context * ectx,
+                                           const int32_t * codes,
+                                               const int   n_codes,
+                                                     int   n_threads);
 
     /**
      * Gets the audio data from the given encodec context.
@@ -124,8 +118,7 @@ extern "C" {
      * @param ectx The encodec context to get the audio data from.
      * @return A pointer to the audio data.
     */
-    float * encodec_get_audio(
-        struct encodec_context *ectx);
+    float * encodec_get_audio(struct encodec_context * ectx);
 
     /**
      * Gets the size of the audio data from the given encodec context.
@@ -133,8 +126,7 @@ extern "C" {
      * @param ectx The encodec context to get the audio size from.
      * @return The size of the audio data.
     */
-    int encodec_get_audio_size(
-        struct encodec_context *ectx);
+    int encodec_get_audio_size(struct encodec_context * ectx);
 
     /**
      * Gets the code data from the given encodec context.
@@ -142,8 +134,7 @@ extern "C" {
      * @param ectx The encodec context to get the code data from.
      * @return A pointer to the code data.
     */
-    int32_t * encodec_get_codes(
-        struct encodec_context *ectx);
+    int32_t * encodec_get_codes(struct encodec_context * ectx);
 
     /**
      * Gets the size of the code data from the given encodec context.
@@ -151,8 +142,7 @@ extern "C" {
      * @param ectx The encodec context to get the code size from.
      * @return The size of the code data.
     */
-    int encodec_get_codes_size(
-        struct encodec_context *ectx);
+    int encodec_get_codes_size(struct encodec_context * ectx);
 
     /**
      * Gets the statistics for the given encodec context.
@@ -160,24 +150,21 @@ extern "C" {
      * @param ectx The encodec context to get the statistics for.
      * @return A pointer to the statistics struct.
     */
-    const struct encodec_statistics* encodec_get_statistics(
-        struct encodec_context *ectx);
+    const struct encodec_statistics * encodec_get_statistics(struct encodec_context * ectx);
 
     /**
      * Reset the statistics for the given encodec context.
      *
      * @param ectx The encodec context to reset the statistics for.
     */
-   void encodec_reset_statistics(
-        struct encodec_context *ectx);
+   void encodec_reset_statistics(struct encodec_context * ectx);
 
     /**
      * @brief Frees the memory allocated for an encodec context.
      *
      * @param ectx The encodec context to free.
      */
-    void encodec_free(
-        struct encodec_context *ectx);
+    void encodec_free(struct encodec_context * ectx);
 
 #ifdef __cplusplus
 }
diff --git a/encoder.h b/encoder.h
index 15b4e3f..c0fa570 100644
--- a/encoder.h
+++ b/encoder.h
@@ -8,54 +8,56 @@
 // res + downsample block at some ratio
 struct encodec_encoder_block {
     // conv1
-    struct ggml_tensor *conv_1_w;
-    struct ggml_tensor *conv_1_b;
+    struct ggml_tensor * conv_1_w;
+    struct ggml_tensor * conv_1_b;
 
     // conv2
-    struct ggml_tensor *conv_2_w;
-    struct ggml_tensor *conv_2_b;
+    struct ggml_tensor * conv_2_w;
+    struct ggml_tensor * conv_2_b;
 
     // shortcut
-    struct ggml_tensor *conv_sc_w;
-    struct ggml_tensor *conv_sc_b;
+    struct ggml_tensor * conv_sc_w;
+    struct ggml_tensor * conv_sc_b;
 
     // downsampling layers
-    struct ggml_tensor *ds_conv_w;
-    struct ggml_tensor *ds_conv_b;
+    struct ggml_tensor * ds_conv_w;
+    struct ggml_tensor * ds_conv_b;
 };
 
 struct encodec_encoder {
-    struct ggml_tensor *init_conv_w;
-    struct ggml_tensor *init_conv_b;
+    struct ggml_tensor * init_conv_w;
+    struct ggml_tensor * init_conv_b;
 
     encodec_lstm lstm;
 
-    struct ggml_tensor *final_conv_w;
-    struct ggml_tensor *final_conv_b;
+    struct ggml_tensor * final_conv_w;
+    struct ggml_tensor * final_conv_b;
 
     std::vector<encodec_encoder_block> blocks;
 };
 
-struct ggml_tensor *encodec_forward_encoder(
-    const struct encodec_encoder *encoder, struct ggml_context *ctx0,
-    struct ggml_tensor *inp, const int * ratios, const int kernel_size, const int res_kernel_size,
-    const int stride) {
-
+struct ggml_tensor *encodec_forward_encoder(const struct encodec_encoder * encoder,
+                                                     struct ggml_context * ctx0,
+                                                      struct ggml_tensor * inp,
+                                                               const int * ratios,
+                                                               const int   kernel_size,
+                                                               const int   res_kernel_size,
+                                                               const int   stride) {
     if (!inp) {
         fprintf(stderr, "%s: null input tensor\n", __func__);
         return NULL;
     }
 
-    struct ggml_tensor *inpL = strided_conv_1d(
+    struct ggml_tensor * inpL = strided_conv_1d(
         ctx0, inp, encoder->init_conv_w, encoder->init_conv_b, stride);
 
     for (int layer_ix = 0; layer_ix < 4; layer_ix++) {
         encodec_encoder_block block = encoder->blocks[layer_ix];
 
-        struct ggml_tensor *current = inpL;
+        struct ggml_tensor * current = inpL;
 
         // shortcut
-        struct ggml_tensor *shortcut = strided_conv_1d(
+        struct ggml_tensor * shortcut = strided_conv_1d(
             ctx0, inpL, block.conv_sc_w, block.conv_sc_b, stride);
 
         // conv1
@@ -82,18 +84,18 @@ struct ggml_tensor *encodec_forward_encoder(
 
     // lstm
     {
-        struct ggml_tensor *cur = inpL;
+        struct ggml_tensor * cur = inpL;
 
         const encodec_lstm lstm = encoder->lstm;
 
         // first lstm layer
         char l0_prefix[7] = "enc_l0";
-        struct ggml_tensor *hs1 = forward_pass_lstm_unilayer(
+        struct ggml_tensor * hs1 = forward_pass_lstm_unilayer(
             ctx0, cur, lstm.l0_ih_w, lstm.l0_hh_w, lstm.l0_ih_b, lstm.l0_hh_b, l0_prefix);
 
         // second lstm layer
         char l1_prefix[7] = "enc_l1";
-        struct ggml_tensor *out = forward_pass_lstm_unilayer(
+        struct ggml_tensor * out = forward_pass_lstm_unilayer(
             ctx0, hs1, lstm.l1_ih_w, lstm.l1_hh_w, lstm.l1_ih_b, lstm.l1_hh_b, l1_prefix);
 
         inpL = ggml_add(ctx0, inpL, out);
@@ -102,7 +104,7 @@ struct ggml_tensor *encodec_forward_encoder(
     // final conv
     inpL = ggml_elu(ctx0, inpL);
 
-    struct ggml_tensor *encoded_inp = strided_conv_1d(
+    struct ggml_tensor * encoded_inp = strided_conv_1d(
         ctx0, inpL, encoder->final_conv_w, encoder->final_conv_b, stride);
 
     return encoded_inp;
diff --git a/lstm.h b/lstm.h
index ac23ed1..edec8f0 100644
--- a/lstm.h
+++ b/lstm.h
@@ -6,26 +6,26 @@
 #include "ops.h"
 
 struct encodec_lstm {
-    struct ggml_tensor *l0_ih_w;
-    struct ggml_tensor *l0_hh_w;
+    struct ggml_tensor * l0_ih_w;
+    struct ggml_tensor * l0_hh_w;
 
-    struct ggml_tensor *l0_ih_b;
-    struct ggml_tensor *l0_hh_b;
+    struct ggml_tensor * l0_ih_b;
+    struct ggml_tensor * l0_hh_b;
 
-    struct ggml_tensor *l1_ih_w;
-    struct ggml_tensor *l1_hh_w;
+    struct ggml_tensor * l1_ih_w;
+    struct ggml_tensor * l1_hh_w;
 
-    struct ggml_tensor *l1_ih_b;
-    struct ggml_tensor *l1_hh_b;
+    struct ggml_tensor * l1_ih_b;
+    struct ggml_tensor * l1_hh_b;
 };
 
-struct ggml_tensor *forward_pass_lstm_unilayer(struct ggml_context *ctx0,
-                                               struct ggml_tensor  *inp,
-                                               struct ggml_tensor  *weight_ih,
-                                               struct ggml_tensor  *weight_hh,
-                                               struct ggml_tensor  *bias_ih,
-                                               struct ggml_tensor  *bias_hh,
-                                               char                *prefix) {
+struct ggml_tensor * forward_pass_lstm_unilayer(struct ggml_context * ctx0,
+                                                 struct ggml_tensor * inp,
+                                                 struct ggml_tensor * weight_ih,
+                                                 struct ggml_tensor * weight_hh,
+                                                 struct ggml_tensor * bias_ih,
+                                                 struct ggml_tensor * bias_hh,
+                                                               char * prefix) {
     const int seq_length = inp->ne[0];
     const int input_dim  = inp->ne[1];
     const int hidden_dim = weight_ih->ne[1] / 4;
@@ -36,34 +36,34 @@ struct ggml_tensor *forward_pass_lstm_unilayer(struct ggml_context *ctx0,
     snprintf(ct_name, 10, "%s_ct", prefix);
     snprintf(ht_name, 10, "%s_ht", prefix);
 
-    struct ggml_tensor *hs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
+    struct ggml_tensor * hs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
     ggml_set_input(hs);
 
-    struct ggml_tensor *c_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
+    struct ggml_tensor * c_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
     ggml_set_input(c_t);
     ggml_set_name(c_t, ct_name);
 
-    struct ggml_tensor *h_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
+    struct ggml_tensor * h_t = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hidden_dim);
     ggml_set_input(h_t);
     ggml_set_name(h_t, ht_name);
 
-    struct ggml_tensor *current = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
+    struct ggml_tensor * current = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
 
     for (int t = 0; t < seq_length; t++) {
-        struct ggml_tensor *x_t = ggml_view_1d(ctx0, current, input_dim, t * current->nb[1]);
+        struct ggml_tensor * x_t = ggml_view_1d(ctx0, current, input_dim, t * current->nb[1]);
 
-        struct ggml_tensor *inp_gates = ggml_mul_mat(ctx0, weight_ih, x_t);
+        struct ggml_tensor * inp_gates = ggml_mul_mat(ctx0, weight_ih, x_t);
         inp_gates = ggml_add(ctx0, inp_gates, bias_ih);
 
-        struct ggml_tensor *hid_gates = ggml_mul_mat(ctx0, weight_hh, h_t);
+        struct ggml_tensor * hid_gates = ggml_mul_mat(ctx0, weight_hh, h_t);
         hid_gates = ggml_add(ctx0, hid_gates, bias_hh);
 
-        struct ggml_tensor *out_gates = ggml_add(ctx0, inp_gates, hid_gates);
+        struct ggml_tensor * out_gates = ggml_add(ctx0, inp_gates, hid_gates);
 
-        struct ggml_tensor *i_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 0 * sizeof(float) * hidden_dim));
-        struct ggml_tensor *f_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 1 * sizeof(float) * hidden_dim));
-        struct ggml_tensor *g_t = ggml_tanh(ctx0   , ggml_view_1d(ctx0, out_gates, hidden_dim, 2 * sizeof(float) * hidden_dim));
-        struct ggml_tensor *o_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 3 * sizeof(float) * hidden_dim));
+        struct ggml_tensor * i_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 0 * sizeof(float) * hidden_dim));
+        struct ggml_tensor * f_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 1 * sizeof(float) * hidden_dim));
+        struct ggml_tensor * g_t = ggml_tanh   (ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 2 * sizeof(float) * hidden_dim));
+        struct ggml_tensor * o_t = ggml_sigmoid(ctx0, ggml_view_1d(ctx0, out_gates, hidden_dim, 3 * sizeof(float) * hidden_dim));
 
         c_t = ggml_add(ctx0, ggml_mul(ctx0, f_t, c_t), ggml_mul(ctx0, i_t, g_t));
 
diff --git a/ops.cpp b/ops.cpp
index b245f42..77a7ed6 100644
--- a/ops.cpp
+++ b/ops.cpp
@@ -7,16 +7,23 @@
 
 #include "ops.h"
 
-static int get_extra_padding_for_conv_1d(struct ggml_tensor *inp, float kernel_size,
-                                         float stride, float padding_total) {
+static int get_extra_padding_for_conv_1d(struct ggml_tensor * inp,
+                                                      float   kernel_size,
+                                                      float   stride,
+                                                      float   padding_total) {
     float length = inp->ne[0];
+
     float n_frames = (length - kernel_size + padding_total) / stride + 1.0f;
+
     int ideal_length = (ceilf(n_frames) - 1) * stride + (kernel_size - padding_total);
+
     return ideal_length - length;
 }
 
-struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
-                           int padding_left, int padding_right) {
+struct ggml_tensor * pad_1d(struct ggml_context * ctx0,
+                             struct ggml_tensor * inp,
+                                            int   padding_left,
+                                            int   padding_right) {
     int length = inp->ne[0];
     int dim = inp->ne[1];
 
@@ -27,20 +34,22 @@ struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
         extra_pad = max_pad - length + 1;
 
         // constant padding
-        struct ggml_tensor *out = ggml_new_tensor_2d(ctx0, inp->type, length + extra_pad, dim);
+        struct ggml_tensor * out = ggml_new_tensor_2d(ctx0, inp->type, length + extra_pad, dim);
         out = ggml_set_2d(ctx0, out, inp, out->nb[1], 0);
     }
 
-    struct ggml_tensor *padded = ggml_pad_reflect_1d(ctx0, inp, padding_left, padding_right);
+    struct ggml_tensor * padded = ggml_pad_reflect_1d(ctx0, inp, padding_left, padding_right);
 
     const int end = padded->ne[0] - extra_pad;
-    struct ggml_tensor *dest = ggml_view_2d(ctx0, padded, end, dim, padded->nb[1], 0);
+    struct ggml_tensor * dest = ggml_view_2d(ctx0, padded, end, dim, padded->nb[1], 0);
 
     return dest;
 }
 
-struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
-                             int padding_left, int padding_right) {
+struct ggml_tensor * unpad_1d(struct ggml_context * ctx0,
+                               struct ggml_tensor * inp,
+                                              int   padding_left,
+                                              int   padding_right) {
     int length = inp->ne[0];
     int dim = inp->ne[1];
 
@@ -51,20 +60,22 @@ struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
     int end = length - padding_right;
 
     int offset = padding_left * inp->nb[1];
-    struct ggml_tensor *dst = ggml_view_2d(ctx0, inp, end, dim, inp->nb[1], offset);
+    struct ggml_tensor * dst = ggml_view_2d(ctx0, inp, end, dim, inp->nb[1], offset);
 
     return dst;
 }
 
-struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
-                                    struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
-                                    int stride) {
-    int kernel_size = conv_w->ne[0];
+struct ggml_tensor * strided_conv_1d(struct ggml_context * ctx0,
+                                      struct ggml_tensor * inp,
+                                      struct ggml_tensor * conv_w,
+                                      struct ggml_tensor * conv_b,
+                                                     int   stride) {
+    int kernel_size   = conv_w->ne[0];
     int padding_total = kernel_size - stride;
     int extra_padding = get_extra_padding_for_conv_1d(inp, kernel_size, stride, padding_total);
 
-    struct ggml_tensor *padded_inp = pad_1d(ctx0, inp, padding_total, extra_padding);
-    struct ggml_tensor *dst = ggml_conv_1d(ctx0, conv_w, padded_inp, stride, 0, 1);
+    struct ggml_tensor * padded_inp = pad_1d(ctx0, inp, padding_total, extra_padding);
+    struct ggml_tensor * dst = ggml_conv_1d(ctx0, conv_w, padded_inp, stride, 0, 1);
 
     // add bias
     dst = ggml_transpose(ctx0, dst);
@@ -74,10 +85,12 @@ struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tenso
     return dst;
 }
 
-struct ggml_tensor *strided_conv_transpose_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
-                                              struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
-                                              int stride) {
-    struct ggml_tensor *dst = ggml_conv_transpose_1d(
+struct ggml_tensor * strided_conv_transpose_1d(struct ggml_context * ctx0,
+                                                struct ggml_tensor * inp,
+                                                struct ggml_tensor * conv_w,
+                                                struct ggml_tensor * conv_b,
+                                                               int   stride) {
+    struct ggml_tensor * dst = ggml_conv_transpose_1d(
         ctx0, conv_w, inp, stride, 0 /* p0 */, 1 /* d0 */);
 
     // add bias
diff --git a/ops.h b/ops.h
index e935b91..1795abf 100644
--- a/ops.h
+++ b/ops.h
@@ -2,16 +2,24 @@
 
 #include "ggml.h"
 
-struct ggml_tensor *pad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
-                           int padding_left, int padding_right);
+struct ggml_tensor * pad_1d(struct ggml_context * ctx0,
+                             struct ggml_tensor * inp,
+                                            int   padding_left,
+                                            int   padding_right);
 
-struct ggml_tensor *unpad_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
-                             int padding_left, int padding_right);
+struct ggml_tensor * unpad_1d(struct ggml_context * ctx0,
+                               struct ggml_tensor * inp,
+                                              int   padding_left,
+                                              int   padding_right);
 
-struct ggml_tensor *strided_conv_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
-                                    struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
-                                    int stride);
+struct ggml_tensor * strided_conv_1d(struct ggml_context * ctx0,
+                                      struct ggml_tensor * inp,
+                                      struct ggml_tensor * conv_w,
+                                      struct ggml_tensor * conv_b,
+                                                     int   stride);
 
-struct ggml_tensor *strided_conv_transpose_1d(struct ggml_context *ctx0, struct ggml_tensor *inp,
-                                              struct ggml_tensor *conv_w, struct ggml_tensor *conv_b,
-                                              int stride);
+struct ggml_tensor * strided_conv_transpose_1d(struct ggml_context * ctx0,
+                                                struct ggml_tensor * inp,
+                                                struct ggml_tensor * conv_w,
+                                                struct ggml_tensor * conv_b,
+                                                               int   stride);
diff --git a/quantizer.h b/quantizer.h
index 9986561..e050854 100644
--- a/quantizer.h
+++ b/quantizer.h
@@ -10,18 +10,21 @@
 #include "utils.h"
 
 struct encodec_quant_block {
-    struct ggml_tensor *embed;
+    struct ggml_tensor * embed;
 };
 
 struct encodec_quantizer {
     std::vector<encodec_quant_block> blocks;
 };
 
-struct ggml_tensor *encodec_forward_quantizer_encode(
-    const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
-    struct ggml_tensor *encoded_inp, const int n_bins, const int sr, const int bandwidth,
-    const int hop_length) {
-
+struct ggml_tensor * encodec_forward_quantizer_encode(
+                            const struct encodec_quantizer * quantizer,
+                                       struct ggml_context * ctx0,
+                                        struct ggml_tensor * encoded_inp,
+                                                 const int   n_bins,
+                                                 const int   sr,
+                                                 const int   bandwidth,
+                                                 const int   hop_length) {
     if (!encoded_inp) {
         fprintf(stderr, "%s: null input tensor\n", __func__);
         return NULL;
@@ -32,31 +35,31 @@ struct ggml_tensor *encodec_forward_quantizer_encode(
 
     const int seq_length = encoded_inp->ne[0];
 
-    struct ggml_tensor *codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q);
+    struct ggml_tensor * codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q);
     ggml_set_input(codes);
 
-    struct ggml_tensor *inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp));
-    struct ggml_tensor *residual = inpL;
-    struct ggml_tensor *indices;
+    struct ggml_tensor * inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp));
+    struct ggml_tensor * residual = inpL;
+    struct ggml_tensor * indices;
 
     for (int i = 0; i < n_q; i++) {
         encodec_quant_block block = quantizer->blocks[i];
 
         // compute distance
         // [seq_length, n_bins]
-        struct ggml_tensor *dp = ggml_scale(
+        struct ggml_tensor * dp = ggml_scale(
             ctx0, ggml_mul_mat(ctx0, block.embed, residual), -2.0f);
 
         // [n_bins]
-        struct ggml_tensor *sqr_embed = ggml_sqr(ctx0, block.embed);
-        struct ggml_tensor *sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed);
+        struct ggml_tensor * sqr_embed = ggml_sqr(ctx0, block.embed);
+        struct ggml_tensor * sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed);
 
         // [seq_length]
-        struct ggml_tensor *sqr_inp = ggml_sqr(ctx0, residual);
-        struct ggml_tensor *sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp);
+        struct ggml_tensor * sqr_inp = ggml_sqr(ctx0, residual);
+        struct ggml_tensor * sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp);
 
         // [seq_length, n_bins]
-        struct ggml_tensor *dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp);
+        struct ggml_tensor * dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp);
         dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist);
         dist = ggml_neg(ctx0, dist);
 
@@ -65,7 +68,7 @@ struct ggml_tensor *encodec_forward_quantizer_encode(
         indices = ggml_argmax(ctx0, dist);
 
         // look up in embedding table
-        struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
+        struct ggml_tensor * quantized = ggml_get_rows(ctx0, block.embed, indices);
 
         residual = ggml_sub(ctx0, residual, quantized);
 
@@ -75,11 +78,15 @@ struct ggml_tensor *encodec_forward_quantizer_encode(
     return codes;
 }
 
-struct ggml_tensor *encodec_forward_quantizer_decode(
-    const struct encodec_quantizer *quantizer, struct ggml_context *ctx0,
-    struct ggml_tensor *codes, const int hidden_dim, const int n_bins, const int sr, const int bandwidth,
-    const int hop_length) {
-
+struct ggml_tensor * encodec_forward_quantizer_decode(
+                            const struct encodec_quantizer * quantizer,
+                                       struct ggml_context * ctx0,
+                                        struct ggml_tensor * codes,
+                                                 const int   hidden_dim,
+                                                 const int   n_bins,
+                                                 const int   sr,
+                                                 const int   bandwidth,
+                                                 const int   hop_length) {
     if (!codes) {
         fprintf(stderr, "%s: null input tensor\n", __func__);
         return NULL;
@@ -92,15 +99,15 @@ struct ggml_tensor *encodec_forward_quantizer_decode(
 
     assert(n_q == codes->ne[1]);
 
-    struct ggml_tensor *quantized_out = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
+    struct ggml_tensor * quantized_out = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hidden_dim, seq_length);
     ggml_set_input(quantized_out);
     ggml_set_name(quantized_out, "quantized_out");
 
     for (int i = 0; i < n_q; i++) {
         encodec_quant_block block = quantizer->blocks[i];
 
-        struct ggml_tensor *indices = ggml_view_1d(ctx0, codes, seq_length, i * codes->nb[1]);
-        struct ggml_tensor *quantized = ggml_get_rows(ctx0, block.embed, indices);
+        struct ggml_tensor * indices = ggml_view_1d(ctx0, codes, seq_length, i * codes->nb[1]);
+        struct ggml_tensor * quantized = ggml_get_rows(ctx0, block.embed, indices);
 
         quantized_out = ggml_add(ctx0, quantized_out, quantized);
     }