diff --git a/scripts/rebuild-testbench.sh b/scripts/rebuild-testbench.sh
index fee09fd243ae..996d16f45a8c 100755
--- a/scripts/rebuild-testbench.sh
+++ b/scripts/rebuild-testbench.sh
@@ -97,7 +97,7 @@ export_xtensa_setup()
     cat <<EOFSETUP > "$export_script"
 export XTENSA_TOOLS_ROOT=$XTENSA_TOOLS_ROOT
 export XTENSA_CORE=$XTENSA_CORE
-XTENSA_PATH=$tools_bin
+export XTENSA_PATH=$tools_bin
 EOFSETUP
 }
 
diff --git a/src/audio/mfcc/Kconfig b/src/audio/mfcc/Kconfig
index 678331896b5f..f56cadb40de2 100644
--- a/src/audio/mfcc/Kconfig
+++ b/src/audio/mfcc/Kconfig
@@ -4,7 +4,8 @@ config COMP_MFCC
 	tristate "MFCC component"
 	depends on COMP_MODULE_ADAPTER
 	select CORDIC_FIXED
-	select MATH_16BIT_MEL_FILTERBANK
+	select MATH_32BIT_FFT
+	select MATH_32BIT_MEL_FILTERBANK
 	select MATH_AUDITORY
 	select MATH_DCT
 	select MATH_DECIBELS
diff --git a/src/audio/mfcc/mfcc.c b/src/audio/mfcc/mfcc.c
index 656e3d9b7bf7..ea09d919009b 100644
--- a/src/audio/mfcc/mfcc.c
+++ b/src/audio/mfcc/mfcc.c
@@ -160,7 +160,6 @@ static int mfcc_prepare(struct processing_module *mod,
 	enum sof_ipc_frame source_format;
 	enum sof_ipc_frame sink_format;
 	size_t data_size;
-	uint32_t sink_period_bytes;
 	int ret;
 
 	comp_info(dev, "entry");
@@ -178,15 +177,7 @@ static int mfcc_prepare(struct processing_module *mod,
 
 	/* get sink data format and period bytes */
 	sink_format = audio_stream_get_frm_fmt(&sinkb->stream);
-	sink_period_bytes = audio_stream_period_bytes(&sinkb->stream, dev->frames);
-	comp_info(dev, "source_format = %d, sink_format = %d",
-		  source_format, sink_format);
-	if (audio_stream_get_size(&sinkb->stream) < sink_period_bytes) {
-		comp_err(dev, "sink buffer size %d is insufficient < %d",
-			 audio_stream_get_size(&sinkb->stream), sink_period_bytes);
-		ret = -ENOMEM;
-		goto err;
-	}
+	comp_info(dev, "source_format = %d, sink_format = %d", source_format, sink_format);
 
 	cd->config = comp_get_data_blob(cd->model_handler, &data_size, NULL);
 
diff --git a/src/audio/mfcc/mfcc_common.c b/src/audio/mfcc/mfcc_common.c
index bba1253f9740..1079864e9259 100644
--- a/src/audio/mfcc/mfcc_common.c
+++ b/src/audio/mfcc/mfcc_common.c
@@ -10,6 +10,7 @@
 #include <sof/audio/audio_stream.h>
 #include <sof/audio/format.h>
 #include <sof/math/auditory.h>
+#include <sof/math/fft.h>
 #include <sof/math/matrix.h>
 #include <sof/math/sqrt.h>
 #include <sof/math/trig.h>
@@ -21,17 +22,6 @@
 #include <stdint.h>
 
 LOG_MODULE_REGISTER(mfcc_common, CONFIG_SOF_LOG_LEVEL);
-/* MFCC with 16 bit FFT benefits from data normalize, for 32 bits there's no
- * significant impact. The amount of left shifts for FFT input is limited to
- * 10 that equals about 60 dB boost. The boost is compensated in Mel energy
- * calculation.
- */
-#if MFCC_FFT_BITS == 16
-#define MFCC_NORMALIZE_FFT
-#else
-#undef MFCC_NORMALIZE_FFT
-#endif
-#define MFCC_NORMALIZE_MAX_SHIFT	10
 
 /*
  * The main processing function for MFCC
@@ -45,13 +35,13 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *
 	struct mfcc_fft *fft = &state->fft;
 	int mel_scale_shift;
 	int input_shift;
-	int i;
+	int j;
 	int m;
 	int cc_count = 0;
-	int32_t s;
-	int16_t mel_value;
-	int16_t peak;
-	int16_t clamp_value;
+	int64_t s;
+	int32_t mel_value;
+	int32_t peak;
+	int32_t clamp_value;
 
 	/* Phase 1, wait until whole fft_size is filled with valid data. This way
 	 * first output cepstral coefficients originate from streamed data and not
@@ -73,9 +63,9 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *
 		state->prev_samples_valid = true;
 	}
 
-	/* Check if enough samples in buffer for FFT hop */
+	/* Check if enough samples in buffer for one FFT hop */
 	m = buf->s_avail / fft->fft_hop_size;
-	for (i = 0; i < m; i++) {
+	if (m > 0) {
 		/* Clear FFT input buffer because it has been used as scratch */
 		bzero(fft->fft_buf, fft->fft_buffer_size);
 
@@ -86,12 +76,7 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *
 
 		/* TODO: use_energy & raw_energy */
 
-#ifdef MFCC_NORMALIZE_FFT
-		/* Find block scale left shift for FFT input */
-		input_shift = mfcc_normalize_fft_buffer(state);
-#else
 		input_shift = 0;
-#endif
 
 		/* Window function */
 		mfcc_apply_window(state, input_shift);
@@ -104,71 +89,76 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *
 		bzero(fft->fft_out, fft->fft_buffer_size);
 
 		/* Compute FFT */
-#if MFCC_FFT_BITS == 16
-		fft_execute_16(fft->fft_plan, false);
-#else
 		fft_execute_32(fft->fft_plan, false);
-#endif
 
-		/* Convert powerspectrum to Mel band logarithmic spectrum */
-		mat_init_16b(state->mel_spectra, 1, state->dct.num_in, 7); /* Q8.7 */
+		/* Initialize 16-bit Mel log spectrum buffer in Q9.7. The Mel values
+		 * are converted from Q9.23 to Q9.7 for DCT matrix multiplication.
+		 */
+		mat_init_16b(state->mel_spectra, 1, state->dct.num_in, 7); /* Q9.7 */
 
 		/* Compensate FFT lib scaling to Mel log values, e.g. for 512 long FFT
 		 * the fft_plan->len is 9. The scaling is 1/512. Subtract from input_shift it
 		 * to add the missing "gain".
 		 */
 		mel_scale_shift = input_shift - fft->fft_plan->len;
-#if MFCC_FFT_BITS == 16
-		psy_apply_mel_filterbank_16(&state->melfb, fft->fft_out, state->power_spectra,
-					    state->mel_spectra->data, mel_scale_shift);
-#else
 		psy_apply_mel_filterbank_32(&state->melfb, fft->fft_out, state->power_spectra,
-					    state->mel_spectra->data, mel_scale_shift);
-#endif
+					    state->mel_log_32, mel_scale_shift);
 
 		if (state->mel_only) {
 			/* In Mel-only mode output Mel log spectra directly */
 			cc_count += state->dct.num_in;
 
-			/* Find peak mel value and track state->mmax */
+			/* Find peak mel value and track state->mmax in Q9.23 */
 			if (config->dynamic_mmax) {
-				peak = state->mel_spectra->data[0];
-				for (i = 1; i < state->dct.num_in; i++) {
-					if (state->mel_spectra->data[i] > peak)
-						peak = state->mel_spectra->data[i];
+				peak = state->mel_log_32[0];
+				for (j = 1; j < state->dct.num_in; j++) {
+					if (state->mel_log_32[j] > peak)
+						peak = state->mel_log_32[j];
 				}
 
 				/* Jump to peak immediately if higher, decay otherwise */
 				if (peak > state->mmax) {
 					state->mmax = peak;
 				} else {
-					/* Q8.7 * Q1.15, result Q8.7. The coefficient is small so
-					 * no need for saturation.
+					/* Q9.23 * Q1.15, result Q9.23. The coefficient is small
+					 * so no need for saturation.
 					 */
-					s = (int32_t)peak - state->mmax;
+					s = (int64_t)peak - state->mmax;
 					state->mmax +=
-						Q_MULTSR_32X32(s, config->mmax_coef, 7, 15, 7);
+						Q_MULTSR_32X32(s, config->mmax_coef, 23, 15, 23);
 				}
 			}
 
-			/* Clamp Mel values lower than mmax - top_db, add offset, and scale */
-			clamp_value = state->mmax - config->top_db;
-			for (i = 0; i < state->dct.num_in; i++) {
-				mel_value = state->mel_spectra->data[i];
+			/* Clamp Mel values lower than mmax - top_db, add offset, and scale.
+			 * Config top_db and mel_offset are Q9.7, shift to Q9.23.
+			 */
+			clamp_value = state->mmax - ((int32_t)config->top_db << 16);
+			for (j = 0; j < state->dct.num_in; j++) {
+				mel_value = state->mel_log_32[j];
 				if (mel_value < clamp_value)
 					mel_value = clamp_value;
 
-				/* Q8.7 * Q4.12, result 8.7 */
-				s = (int32_t)mel_value + config->mel_offset;
-				state->mel_spectra->data[i] =
-					sat_int16(Q_MULTSR_32X32(s, config->mel_scale, 7, 12, 7));
+				/* Q9.23 * Q4.12, result Q9.23 */
+				s = (int64_t)mel_value + ((int32_t)config->mel_offset << 16);
+				state->mel_log_32[j] =
+					sat_int32(Q_MULTSR_32X32(s, config->mel_scale, 23, 12, 23));
 			}
 
+			/* Store Q9.7 version in mel_spectra for s16 output mode */
+			for (j = 0; j < state->dct.num_in; j++)
+				state->mel_spectra->data[j] =
+					sat_int16(state->mel_log_32[j] >> 16);
+
 			/* Enable this to check mmax decay */
 			comp_dbg(dev, "state->mmax = %d", state->mmax);
 		} else {
+			/* Convert Q9.23 to Q9.7 for 16-bit DCT */
+			for (j = 0; j < state->dct.num_in; j++)
+				state->mel_spectra->data[j] =
+					sat_int16(state->mel_log_32[j] >> 16);
+
 			/* Multiply Mel spectra with DCT matrix to get cepstral coefficients */
-			mat_init_16b(state->cepstral_coef, 1, state->dct.num_out, 7); /* Q8.7 */
+			mat_init_16b(state->cepstral_coef, 1, state->dct.num_out, 7); /* Q9.7 */
 			mat_multiply(state->mel_spectra, state->dct.matrix, state->cepstral_coef);
 
 			/* Apply cepstral lifter */
@@ -179,16 +169,58 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *
 
 			cc_count += state->dct.num_out;
 		}
-
-		/* Output to sink buffer */
 	}
 
-	/* TODO: This version handles only one FFT run per copy(). How to pass multiple
-	 * cepstral coefficients sets return is an open.
-	 */
 	return cc_count;
 }
 
+void mfcc_fill_fft_buffer(struct mfcc_state *state)
+{
+	struct mfcc_buffer *buf = &state->buf;
+	struct mfcc_fft *fft = &state->fft;
+	int32_t *d = &fft->fft_buf[fft->fft_fill_start_idx].real;
+	const int fft_elem_inc = sizeof(fft->fft_buf[0]) / sizeof(int32_t);
+	int16_t *prev = state->prev_data;
+	int16_t *prev_end = prev + state->prev_data_size;
+	int16_t *r = buf->r_ptr;
+	int copied;
+	int nmax;
+	int n;
+	int j;
+
+	/* Copy overlapped samples from state buffer. The fft_buf has been
+	 * cleared by caller so imaginary part remains zero.
+	 */
+	while (prev < prev_end) {
+		*d = *prev++;
+		d += fft_elem_inc;
+	}
+
+	/* Copy hop size of new data from circular buffer */
+	for (copied = 0; copied < fft->fft_hop_size; copied += n) {
+		nmax = fft->fft_hop_size - copied;
+		n = mfcc_buffer_samples_without_wrap(buf, r);
+		n = MIN(n, nmax);
+		for (j = 0; j < n; j++) {
+			*d = *r++;
+			d += fft_elem_inc;
+		}
+		r = mfcc_buffer_wrap(buf, r);
+	}
+
+	buf->s_avail -= copied;
+	buf->s_free += copied;
+	buf->r_ptr = r;
+
+	/* Copy for next time data back to overlap buffer */
+	d = (int32_t *)&fft->fft_buf[fft->fft_fill_start_idx + fft->fft_hop_size].real;
+	prev = state->prev_data;
+	while (prev < prev_end) {
+		*prev++ = *d;
+		d += fft_elem_inc;
+	}
+}
+
 #if CONFIG_FORMAT_S16LE
 static int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, int16_t *w_ptr,
 					int samples)
@@ -338,6 +370,7 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer
 	int sink_samples;
 	int remain_s32;
 	int to_copy;
+	int k;
 
 	/* Get samples from source buffer */
 	mfcc_source_copy_s24(bsource, buf, &state->emph, frames, state->source_channel);
@@ -347,10 +380,15 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer
 
 	/* If new output produced, set up pointer into scratch data */
 	if (num_ceps > 0) {
-		if (state->mel_only)
-			state->out_data_ptr = state->mel_spectra->data;
-		else
+		if (state->mel_only) {
+			/* Convert mel_log_32 from Q9.23 to Q9.15 in-place */
+			for (k = 0; k < num_ceps; k++)
+				state->mel_log_32[k] >>= 8;
+
+			state->out_data_ptr_32 = state->mel_log_32;
+		} else {
 			state->out_data_ptr = state->cepstral_coef->data;
+		}
 
 		state->out_remain = num_ceps;
 		state->magic_pending = true;
@@ -366,18 +404,30 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer
 		state->magic_pending = false;
 	}
 
-	/* Write cepstral/mel data packed as int32_t from scratch buffer */
-	remain_s32 = (state->out_remain + 1) / 2;
-	to_copy = MIN(remain_s32, sink_samples);
-	if (to_copy > 0) {
-		w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy,
-						(int32_t *)state->out_data_ptr);
-		state->out_data_ptr += to_copy * 2;
-		state->out_remain -= to_copy * 2;
-		if (state->out_remain < 0)
-			state->out_remain = 0;
-
-		sink_samples -= to_copy;
+	if (state->mel_only) {
+		/* Write 32-bit mel data Q9.15, one value per int32_t */
+		to_copy = MIN(state->out_remain, sink_samples);
+		if (to_copy > 0) {
+			w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy,
+							state->out_data_ptr_32);
+			state->out_data_ptr_32 += to_copy;
+			state->out_remain -= to_copy;
+			sink_samples -= to_copy;
+		}
+	} else {
+		/* Write cepstral data packed as int32_t from scratch buffer */
+		remain_s32 = (state->out_remain + 1) / 2;
+		to_copy = MIN(remain_s32, sink_samples);
+		if (to_copy > 0) {
+			w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy,
+							(int32_t *)state->out_data_ptr);
+			state->out_data_ptr += to_copy * 2;
+			state->out_remain -= to_copy * 2;
+			if (state->out_remain < 0)
+				state->out_remain = 0;
+
+			sink_samples -= to_copy;
+		}
 	}
 
 	/* Zero-fill remaining sink samples */
@@ -409,10 +459,11 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer
 
 	/* If new output produced, set up pointer into scratch data */
 	if (num_ceps > 0) {
-		if (state->mel_only)
-			state->out_data_ptr = state->mel_spectra->data;
-		else
+		if (state->mel_only) {
+			state->out_data_ptr_32 = state->mel_log_32;
+		} else {
 			state->out_data_ptr = state->cepstral_coef->data;
+		}
 
 		state->out_remain = num_ceps;
 		state->magic_pending = true;
@@ -428,18 +479,30 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer
 		state->magic_pending = false;
 	}
 
-	/* Write cepstral/mel data packed as int32_t from scratch buffer */
-	remain_s32 = (state->out_remain + 1) / 2;
-	to_copy = MIN(remain_s32, sink_samples);
-	if (to_copy > 0) {
-		w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy,
-						(int32_t *)state->out_data_ptr);
-		state->out_data_ptr += to_copy * 2;
-		state->out_remain -= to_copy * 2;
-		if (state->out_remain < 0)
-			state->out_remain = 0;
-
-		sink_samples -= to_copy;
+	if (state->mel_only) {
+		/* Write 32-bit mel data Q9.23, one value per int32_t */
+		to_copy = MIN(state->out_remain, sink_samples);
+		if (to_copy > 0) {
+			w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy,
+							state->out_data_ptr_32);
+			state->out_data_ptr_32 += to_copy;
+			state->out_remain -= to_copy;
+			sink_samples -= to_copy;
+		}
+	} else {
+		/* Write cepstral data packed as int32_t from scratch buffer */
+		remain_s32 = (state->out_remain + 1) / 2;
+		to_copy = MIN(remain_s32, sink_samples);
+		if (to_copy > 0) {
+			w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy,
+							(int32_t *)state->out_data_ptr);
+			state->out_data_ptr += to_copy * 2;
+			state->out_remain -= to_copy * 2;
+			if (state->out_remain < 0)
+				state->out_remain = 0;
+
+			sink_samples -= to_copy;
+		}
 	}
 
 	/* Zero-fill remaining sink samples */
diff --git a/src/audio/mfcc/mfcc_generic.c b/src/audio/mfcc/mfcc_generic.c
index 48d2b2e88997..73ac49272ed4 100644
--- a/src/audio/mfcc/mfcc_generic.c
+++ b/src/audio/mfcc/mfcc_generic.c
@@ -51,94 +51,17 @@ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data,
 	buf->r_ptr = r;
 }
 
-void mfcc_fill_fft_buffer(struct mfcc_state *state)
-{
-	struct mfcc_buffer *buf = &state->buf;
-	struct mfcc_fft *fft = &state->fft;
-	int16_t *r = buf->r_ptr;
-	int copied;
-	int nmax;
-	int idx = fft->fft_fill_start_idx;
-	int j;
-	int n;
-
-	/* Copy overlapped samples from state buffer. Imaginary part of input
-	 * remains zero.
-	 */
-	for (j = 0; j < state->prev_data_size; j++)
-		fft->fft_buf[idx + j].real = state->prev_data[j];
-
-	/* Copy hop size of new data from circular buffer */
-	idx += state->prev_data_size;
-	for (copied = 0; copied < fft->fft_hop_size; copied += n) {
-		nmax = fft->fft_hop_size - copied;
-		n = mfcc_buffer_samples_without_wrap(buf, r);
-		n = MIN(n, nmax);
-		for (j = 0; j < n; j++) {
-			fft->fft_buf[idx].real = *r;
-			r++;
-			idx++;
-		}
-		r = mfcc_buffer_wrap(buf, r);
-	}
-
-	buf->s_avail -= copied;
-	buf->s_free += copied;
-	buf->r_ptr = r;
-
-	/* Copy for next time data back to overlap buffer */
-	idx = fft->fft_fill_start_idx + fft->fft_hop_size;
-	for (j = 0; j < state->prev_data_size; j++)
-		state->prev_data[j] = fft->fft_buf[idx + j].real;
-}
-
-#ifdef MFCC_NORMALIZE_FFT
-int mfcc_normalize_fft_buffer(struct mfcc_state *state)
-{
-	struct mfcc_fft *fft = &state->fft;
-	int32_t absx;
-	int32_t smax = 0;
-	int32_t x;
-	int shift;
-	int j;
-	int i = fft->fft_fill_start_idx;
-
-	for (j = 0; j < fft->fft_size; j++) {
-		x = fft->fft_buf[i + j].real;
-		absx = (x < 0) ? -x : x;
-		if (smax < absx)
-			smax = absx;
-	}
-
-	shift = norm_int32(smax << 15) - 1; /* 16 bit data */
-	shift = MAX(shift, 0);
-	shift = MIN(shift, MFCC_NORMALIZE_MAX_SHIFT);
-	return shift;
-}
-#endif
-
 void mfcc_apply_window(struct mfcc_state *state, int input_shift)
 {
 	struct mfcc_fft *fft = &state->fft;
 	int j;
 	int i = fft->fft_fill_start_idx;
 
-#if MFCC_FFT_BITS == 16
-	/* TODO: Use proper multiply and saturate function to make sure no overflows */
-	int32_t x;
-	int s = 14 - input_shift; /* Q1.15 x Q1.15 -> Q30 -> Q15, shift by 15 - 1 for round */
-
-	for (j = 0; j < fft->fft_size; j++) {
-		x = (int32_t)fft->fft_buf[i + j].real * state->window[j];
-		fft->fft_buf[i + j].real = ((x >> s) + 1) >> 1;
-	}
-#else
 	/* TODO: Use proper multiply and saturate function to make sure no overflows */
 	int s = input_shift + 1; /* To convert 16 -> 32 with Q1.15 x Q1.15 -> Q30 -> Q31 */
 
 	for (j = 0; j < fft->fft_size; j++)
 		fft->fft_buf[i + j].real = (fft->fft_buf[i + j].real * state->window[j]) << s;
-#endif
 }
 
 #if CONFIG_FORMAT_S16LE
diff --git a/src/audio/mfcc/mfcc_hifi3.c b/src/audio/mfcc/mfcc_hifi3.c
index 153048d67bf7..80c384ad6c64 100644
--- a/src/audio/mfcc/mfcc_hifi3.c
+++ b/src/audio/mfcc/mfcc_hifi3.c
@@ -128,73 +128,6 @@ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data,
 	buf->r_ptr = (void *)in; /* int16_t pointer but direct cast is not possible */
 }
 
-void mfcc_fill_fft_buffer(struct mfcc_state *state)
-{
-	struct mfcc_buffer *buf = &state->buf;
-	struct mfcc_fft *fft = &state->fft;
-	int idx = fft->fft_fill_start_idx;
-	ae_int16 *out = (ae_int16 *)&fft->fft_buf[idx].real;
-	ae_int16 *in = (ae_int16 *)state->prev_data;
-	ae_int16x4 sample;
-	const int buf_inc = sizeof(ae_int16);
-	const int fft_inc = sizeof(fft->fft_buf[0]);
-	int j;
-
-	/* Copy overlapped samples from state buffer. Imaginary part of input
-	 * remains zero.
-	 */
-	for (j = 0; j < state->prev_data_size; j++) {
-		AE_L16_XP(sample, in, buf_inc);
-		AE_S16_0_XP(sample, out, fft_inc);
-	}
-
-	/* Copy hop size of new data from circular buffer */
-	idx += state->prev_data_size;
-	in = (ae_int16 *)buf->r_ptr;
-	out = (ae_int16 *)&fft->fft_buf[idx].real;
-	set_circular_buf0(buf->addr, buf->end_addr);
-	for (j = 0; j < fft->fft_hop_size; j++) {
-		AE_L16_XC(sample, in, buf_inc);
-		AE_S16_0_XP(sample, out, fft_inc);
-	}
-
-	buf->s_avail -= fft->fft_hop_size;
-	buf->s_free += fft->fft_hop_size;
-	buf->r_ptr = (int16_t *)in;
-
-	/* Copy for next time data back to overlap buffer */
-	idx = fft->fft_fill_start_idx + fft->fft_hop_size;
-	in = (ae_int16 *)&fft->fft_buf[idx].real;
-	out = (ae_int16 *)state->prev_data;
-	for (j = 0; j < state->prev_data_size; j++) {
-		AE_L16_XP(sample, in, fft_inc);
-		AE_S16_0_XP(sample, out, buf_inc);
-	}
-}
-
-#ifdef MFCC_NORMALIZE_FFT
-int mfcc_normalize_fft_buffer(struct mfcc_state *state)
-{
-	struct mfcc_fft *fft = &state->fft;
-	ae_p16s *in = (ae_p16s *)&fft->fft_buf[fft->fft_fill_start_idx].real;
-	ae_int32x2 sample;
-	ae_int32x2 max = AE_ZERO32();
-	const int fft_inc = sizeof(fft->fft_buf[0]);
-	int shift;
-	int j;
-
-	for (j = 0; j < fft->fft_size; j++) {
-		/* load 16-bit data to middle of 32-bit container*/
-		AE_L16M_XU(sample, in, fft_inc);
-		max = AE_MAXABS32S(max, sample);
-	}
-	shift = AE_NSAZ32_L(max) - 8;/* 16 bit data */
-	shift = MAX(shift, 0);
-	shift = MIN(shift, MFCC_NORMALIZE_MAX_SHIFT);
-	return shift;
-}
-#endif
-
 void mfcc_apply_window(struct mfcc_state *state, int input_shift)
 {
 	struct mfcc_fft *fft = &state->fft;
@@ -205,31 +138,18 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift)
 	ae_int16x4 win;
 	int j;
 
-#if MFCC_FFT_BITS == 16
-	ae_int16 *fft_in = (ae_int16 *)&fft->fft_buf[fft->fft_fill_start_idx].real;
-	ae_int16x4 sample;
-
-	for (j = 0; j < fft->fft_size; j++) {
-		AE_L16_IP(sample, fft_in, 0);
-		AE_L16_XP(win, win_in, win_inc);
-		temp = AE_MULF16SS_00(sample, win);
-		temp = AE_SLAA32S(temp, input_shift);
-		sample = AE_ROUND16X4F32SASYM(temp, temp);
-		AE_S16_0_XP(sample, fft_in, fft_inc);
-	}
-#else
 	ae_int32 *fft_in = (ae_int32 *)&fft->fft_buf[fft->fft_fill_start_idx].real;
 	ae_int32x2 sample;
 
 	for (j = 0; j < fft->fft_size; j++) {
 		AE_L32_IP(sample, fft_in, 0);
 		AE_L16_XP(win, win_in, win_inc);
-		temp = AE_MULFP32X16X2RS_H(sample, win);
+		/* Data is 16-bit in 32-bit container, shift to Q1.31 for fractional multiply */
+		sample = AE_SLAI32S(sample, 16);
 		temp = AE_MULFP32X16X2RS_L(sample, win);
 		temp = AE_SLAA32S(temp, input_shift);
 		AE_S32_L_XP(temp, fft_in, fft_inc);
 	}
-#endif
 }
 
 #if CONFIG_FORMAT_S24LE
@@ -263,8 +183,10 @@ void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffe
 			delay = emph->delay;
 			for (i = 0; i < n; i++) {
 				AE_L32_XP(sample32, in, in_inc);
-				/* S24_4LE: shift right by 8 to get 16-bit, then convert */
-				sample32 = AE_SRAI32(sample32, 8);
+				/* Shift left by 8 to sign-extend to Q1.31 */
+				sample32 = AE_SLAI32(sample32, 8);
+				/* Then shift right by 16 to get 16-bit */
+				sample32 = AE_SRAI32(sample32, 16);
 				sample = AE_SAT16X4(sample32, sample32);
 				/* Q1.15 -> Q1.31 */
 				temp = AE_CVT32X2F16_10(sample);
@@ -277,7 +199,10 @@ void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffe
 		} else {
 			for (i = 0; i < n; i++) {
 				AE_L32_XP(sample32, in, in_inc);
-				sample32 = AE_SRAI32(sample32, 8);
+				/* Shift left by 8 to sign-extend to Q1.31 */
+				sample32 = AE_SLAI32(sample32, 8);
+				/* Then shift right by 16 to get 16-bit */
+				sample32 = AE_SRAI32(sample32, 16);
 				sample = AE_SAT16X4(sample32, sample32);
 				AE_S16_0_IP(sample, out, 2);
 			}
diff --git a/src/audio/mfcc/mfcc_hifi4.c b/src/audio/mfcc/mfcc_hifi4.c
index c9bd59ada18b..63986870793b 100644
--- a/src/audio/mfcc/mfcc_hifi4.c
+++ b/src/audio/mfcc/mfcc_hifi4.c
@@ -124,73 +124,6 @@ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data,
 	buf->r_ptr = (int16_t *)in;
 }
 
-void mfcc_fill_fft_buffer(struct mfcc_state *state)
-{
-	struct mfcc_buffer *buf = &state->buf;
-	struct mfcc_fft *fft = &state->fft;
-	int idx = fft->fft_fill_start_idx;
-	ae_int16 *out = (ae_int16 *)&fft->fft_buf[idx].real;
-	ae_int16 *in = (ae_int16 *)state->prev_data;
-	ae_int16x4 sample;
-	const int buf_inc = sizeof(ae_int16);
-	const int fft_inc = sizeof(fft->fft_buf[0]);
-	int j;
-
-	/* Copy overlapped samples from state buffer. Imaginary part of input
-	 * remains zero.
-	 */
-	for (j = 0; j < state->prev_data_size; j++) {
-		AE_L16_XP(sample, in, buf_inc);
-		AE_S16_0_XP(sample, out, fft_inc);
-	}
-
-	/* Copy hop size of new data from circular buffer */
-	idx += state->prev_data_size;
-	in = (ae_int16 *)buf->r_ptr;
-	out = (ae_int16 *)&fft->fft_buf[idx].real;
-	set_circular_buf0(buf->addr, buf->end_addr);
-	for (j = 0; j < fft->fft_hop_size; j++) {
-		AE_L16_XC(sample, in, buf_inc);
-		AE_S16_0_XP(sample, out, fft_inc);
-	}
-
-	buf->s_avail -= fft->fft_hop_size;
-	buf->s_free += fft->fft_hop_size;
-	buf->r_ptr = (int16_t *)in;
-
-	/* Copy for next time data back to overlap buffer */
-	idx = fft->fft_fill_start_idx + fft->fft_hop_size;
-	in = (ae_int16 *)&fft->fft_buf[idx].real;
-	out = (ae_int16 *)state->prev_data;
-	for (j = 0; j < state->prev_data_size; j++) {
-		AE_L16_XP(sample, in, fft_inc);
-		AE_S16_0_XP(sample, out, buf_inc);
-	}
-}
-
-#ifdef MFCC_NORMALIZE_FFT
-int mfcc_normalize_fft_buffer(struct mfcc_state *state)
-{
-	struct mfcc_fft *fft = &state->fft;
-	ae_p16s *in = (ae_p16s *)&fft->fft_buf[fft->fft_fill_start_idx].real;
-	ae_int32x2 sample;
-	ae_int32x2 max = AE_ZERO32();
-	const int fft_inc = sizeof(fft->fft_buf[0]);
-	int shift;
-	int j;
-
-	for (j = 0; j < fft->fft_size; j++) {
-		/* load 16-bit data to middle of 32-bit container*/
-		AE_L16M_XU(sample, in, fft_inc);
-		max = AE_MAXABS32S(max, sample);
-	}
-	shift = AE_NSAZ32_L(max) - 8;/* 16 bit data */
-	shift = MAX(shift, 0);
-	shift = MIN(shift, MFCC_NORMALIZE_MAX_SHIFT);
-	return shift;
-}
-#endif
-
 void mfcc_apply_window(struct mfcc_state *state, int input_shift)
 {
 	struct mfcc_fft *fft = &state->fft;
@@ -201,31 +134,18 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift)
 	ae_int16x4 win;
 	int j;
 
-#if MFCC_FFT_BITS == 16
-	ae_int16 *fft_in = (ae_int16 *)&fft->fft_buf[fft->fft_fill_start_idx].real;
-	ae_int16x4 sample;
-
-	for (j = 0; j < fft->fft_size; j++) {
-		AE_L16_IP(sample, fft_in, 0);
-		AE_L16_XP(win, win_in, win_inc);
-		temp = AE_MULF16SS_00(sample, win);
-		temp = AE_SLAA32S(temp, input_shift);
-		sample = AE_ROUND16X4F32SASYM(temp, temp);
-		AE_S16_0_XP(sample, fft_in, fft_inc);
-	}
-#else
 	ae_int32 *fft_in = (ae_int32 *)&fft->fft_buf[fft->fft_fill_start_idx].real;
 	ae_int32x2 sample;
 
 	for (j = 0; j < fft->fft_size; j++) {
 		AE_L32_IP(sample, fft_in, 0);
 		AE_L16_XP(win, win_in, win_inc);
-		temp = AE_MULFP32X16X2RS_H(sample, win);
+		/* Data is 16-bit in 32-bit container, shift to Q1.31 for fractional multiply */
+		sample = AE_SLAI32S(sample, 16);
 		temp = AE_MULFP32X16X2RS_L(sample, win);
 		temp = AE_SLAA32S(temp, input_shift);
 		AE_S32_L_XP(temp, fft_in, fft_inc);
 	}
-#endif
 }
 
 #if CONFIG_FORMAT_S24LE
@@ -253,8 +173,10 @@ void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffe
 		coef = emph->coef;
 		for (i = 0; i < frames; i++) {
 			AE_L32_XC(sample32, in, in_inc);
-			/* S24_4LE: shift right by 8 to get 16-bit */
-			sample32 = AE_SRAI32(sample32, 8);
+			/* Shift left by 8 to sign-extend to Q1.31 */
+			sample32 = AE_SLAI32(sample32, 8);
+			/* Then shift right by 16 to get 16-bit */
+			sample32 = AE_SRAI32(sample32, 16);
 			sample = AE_SAT16X4(sample32, sample32);
 			/* Q1.15 -> Q1.31 */
 			temp = AE_CVT32X2F16_10(sample);
@@ -267,7 +189,10 @@ void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffe
 	} else {
 		for (i = 0; i < frames; i++) {
 			AE_L32_XC(sample32, in, in_inc);
-			sample32 = AE_SRAI32(sample32, 8);
+			/* Shift left by 8 to sign-extend to Q1.31 */
+			sample32 = AE_SLAI32(sample32, 8);
+			/* Then shift right by 16 to get 16-bit */
+			sample32 = AE_SRAI32(sample32, 16);
 			sample = AE_SAT16X4(sample32, sample32);
 			AE_S16_0_XC1(sample, out, out_inc);
 		}
diff --git a/src/audio/mfcc/mfcc_setup.c b/src/audio/mfcc/mfcc_setup.c
index 0a9fc19f0f53..1cad4b2b984e 100644
--- a/src/audio/mfcc/mfcc_setup.c
+++ b/src/audio/mfcc/mfcc_setup.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: BSD-3-Clause
 //
-// Copyright(c) 2022 Intel Corporation. All rights reserved.
+// Copyright(c) 2022-2026 Intel Corporation.
 //
 // Author: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
 
@@ -152,7 +152,7 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 	else
 		state->source_channel = config->channel;
 
-	state->mmax = config->mmax_init;
+	state->mmax = (int32_t)config->mmax_init << 16; /* Q9.7 -> Q9.23 */
 	state->emph.enable = config->preemphasis_coefficient > 0;
 	state->emph.coef = -config->preemphasis_coefficient; /* Negate config parameter */
 	fft->fft_size = config->frame_length;
@@ -187,11 +187,7 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 	state->window = state->prev_data + state->prev_data_size;
 
 	/* Allocate buffers for FFT input and output data */
-#if MFCC_FFT_BITS == 16
-	fft->fft_buffer_size = fft->fft_padded_size * sizeof(struct icomplex16);
-#else
 	fft->fft_buffer_size = fft->fft_padded_size * sizeof(struct icomplex32);
-#endif
 	fft->fft_buf = mod_zalloc(mod, fft->fft_buffer_size);
 	if (!fft->fft_buf) {
 		comp_err(dev, "Failed FFT buffer allocate");
@@ -226,7 +222,7 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 	ret = mfcc_get_window(state, config->window);
 	if (ret < 0) {
 		comp_err(dev, "Failed Window function");
-		goto free_fft_out;
+		goto free_fft_plan;
 	}
 
 	/* Setup Mel auditory filterbank. FFT input and output buffers are used
@@ -248,7 +244,7 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 	ret = mod_psy_get_mel_filterbank(mod, fb);
 	if (ret < 0) {
 		comp_err(dev, "Failed Mel filterbank");
-		goto free_fft_out;
+		goto free_fft_plan;
 	}
 
 	/* Setup DCT and cepstral lifter only when num_ceps > 0.
@@ -286,15 +282,16 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 
 	/* Scratch overlay during runtime
 	 *
-	 *  +--------------------------------------------------------+
-	 *  | 1. fft_buf[], 16 bits,size x 4, e.g. 512 -> 2048 bytes |
-	 *  +-------------------------------------+------------------+
-	 *  | 3. power_spectra[],                 |
-	 *  |    32 bits, e.g. x257 -> 1028 bytes |
-	 *  +-------------------------------------+
+	 *  +------------------------------------------------------------+
+	 *  | 1. fft_buf[], 32 bits, size x 8, e.g. 512 -> 4096 bytes    |
+	 *  +-------------------------------------+----------------------+
+	 *  | 3. power_spectra[],                 | 6. mel_log_32[],     |
+	 *  |    32 bits, e.g. x257 -> 1028 bytes |    32 bits, e.g. x80 |
+	 *  |                                     |    320 bytes         |
+	 *  +-------------------------------------+----------------------+
 	 *
 	 *  +---------------------------------------------------------------------------------+
-	 *  | 2. fft_out[], 16 bits,size x 4, e.g. 512 -> 2048 bytes                          |
+	 *  | 2. fft_out[], 32 bits, size x 8, e.g. 512 -> 4096 bytes                         |
 	 *  +----------------------------------+----------------------------------+-----------+
 	 *  | 4. mel_spectra[],                | 5. cepstral_coef[],              |
 	 *  |    16 bits, e.g. x23 -> 46 bytes |    16 bits, e.g. 13x -> 26 bytes |
@@ -304,6 +301,18 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 
 	/* Use FFT buffer as scratch for later computed data */
 	state->power_spectra = (int32_t *)&fft->fft_buf[0];
+	state->mel_log_32 = &state->power_spectra[fft->half_fft_size];
+
+	/* Check that mel_log_32 fits in the remaining fft_buf scratch space */
+	int mel_log_32_space = (int)(fft->fft_buffer_size / sizeof(int32_t)) - fft->half_fft_size;
+
+	if (config->num_mel_bins > mel_log_32_space) {
+		comp_err(dev, "num_mel_bins %d exceeds mel_log_32 scratch space %d",
+			 config->num_mel_bins, mel_log_32_space);
+		ret = -EINVAL;
+		goto free_lifter;
+	}
+
 	state->mel_spectra = (struct mat_matrix_16b *)&fft->fft_out[0];
 	if (!state->mel_only) {
 		state->cepstral_coef =
@@ -330,7 +339,7 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 		comp_err(dev, "Output %d int16 per hop exceeds sink capacity %d (hop %d x ch %d)",
 			 out_per_hop, sink_per_hop, fft->fft_hop_size, channels);
 		ret = -EINVAL;
-		goto free_dct_matrix;
+		goto free_lifter;
 	}
 
 	/* Set initial state for STFT */
@@ -338,25 +347,32 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 	state->prev_samples_valid = false;
 	state->magic_pending = false;
 	state->out_data_ptr = NULL;
+	state->out_data_ptr_32 = NULL;
 	state->out_remain = 0;
 
 	comp_dbg(dev, "done");
 	return 0;
 
+free_lifter:
+	mod_free(mod, state->lifter.matrix);
+
 free_dct_matrix:
-	rfree(state->dct.matrix);
+	mod_free(mod, state->dct.matrix);
 
 free_melfb_data:
-	rfree(fb->data);
+	mod_free(mod, fb->data);
+
+free_fft_plan:
+	mod_fft_plan_free(mod, fft->fft_plan);
 
 free_fft_out:
-	rfree(fft->fft_out);
+	mod_free(mod, fft->fft_out);
 
 free_fft_buf:
-	rfree(fft->fft_buf);
+	mod_free(mod, fft->fft_buf);
 
 free_buffers:
-	rfree(state->buffers);
+	mod_free(mod, state->buffers);
 
 exit:
 	return ret;
diff --git a/src/audio/mfcc/tune/README.txt b/src/audio/mfcc/tune/README.txt
index 7ea6618896b9..a0c3189e81a3 100644
--- a/src/audio/mfcc/tune/README.txt
+++ b/src/audio/mfcc/tune/README.txt
@@ -8,13 +8,32 @@ need to be created with "scripts/build-tools.sh -t". Next the testbench
 is build with "scripts/rebuild-testbench.sh".
 
 Once the previous steps are done, a sample wav file can be processed
-into stream of cepstral coefficients with script run_mfcc.sh. E.g.
-next command processes an ALSA test file with speech clip "front center".
-The output file is hard-coded to mfcc.raw.
+with script run_mfcc.sh. The script converts the input to raw 16 kHz
+stereo format and runs the testbench for S16, S24, and S32 bit depths,
+producing both cepstral coefficient (MFCC) and Mel spectrogram outputs.
 
 ./run_mfcc.sh /usr/share/sounds/alsa/Front_Center.wav
 
-The output can be plotted and retrieved with Matlab or Octave command:
+Output files from host testbench:
+  mfcc_s16.raw, mfcc_s24.raw, mfcc_s32.raw   - cepstral coefficients
+  mel_s16.raw, mel_s24.raw, mel_s32.raw       - Mel spectrogram
+
+If the XTENSA_PATH environment variable is set, the script also runs
+the Xtensa build of the testbench (via xt-run) and produces additional
+output files prefixed with "xt_":
+  xt_mfcc_s16.raw, xt_mfcc_s24.raw, xt_mfcc_s32.raw
+  xt_mel_s16.raw, xt_mel_s24.raw, xt_mel_s32.raw
+
+All output files can be decoded and plotted at once in Matlab or Octave
+with the decode_all.m script:
+
+decode_all
+
+This calls decode_ceps for each MFCC file (13 cepstral coefficients) and
+decode_mel for each Mel file (80 Mel bins), plotting spectrograms for all
+files that exist including the Xtensa variants.
+
+Individual files can also be decoded manually:
 
 [ceps, t, n] = decode_ceps('mfcc_s16.raw', 13);
 
@@ -22,14 +41,12 @@ In the above it's known from configuration script that MFCC was set up to
 output 13 cepstral coefficients from each FFT -> Mel -> DCT -> Cepstral
 coefficients computation run.
 
+The 80 bands Mel output can be visualized with command:
+
+[mel, t, n] = decode_mel('mel_s16.raw', 80);
+
 Other kind of signals have quite big visual difference in audio features. Try
 e.g. other sound files found in computer.
 
 ./run_mfcc.sh /usr/share/sounds/gnome/default/alerts/bark.ogg
 ./run_mfcc.sh /usr/share/sounds/gnome/default/alerts/sonar.ogg
-
-The script runs the same input sample with s16/24/32 formats for
-cepstral coefficients data output and Mel frequency spectrogram
-output. The 80 bands Mel output can be visualized with command:
-
-[ceps, t, n] = decode_mel('mel_s16.raw', 80);
diff --git a/src/audio/mfcc/tune/decode_all.m b/src/audio/mfcc/tune/decode_all.m
new file mode 100644
index 000000000000..d5b60289b4cf
--- /dev/null
+++ b/src/audio/mfcc/tune/decode_all.m
@@ -0,0 +1,39 @@
+% decode_all.m - Decode all MFCC and Mel raw output files from run_mfcc.sh
+%
+% SPDX-License-Identifier: BSD-3-Clause
+% Copyright(c) 2026 Intel Corporation.
+
+num_ceps = 13;
+num_mel = 80;
+
+% MFCC cepstral output files
+ceps_files = {'mfcc_s16.raw', 'mfcc_s24.raw', 'mfcc_s32.raw'};
+
+% Mel output files with corresponding format
+mel_files = {'mel_s16.raw', 'mel_s24.raw', 'mel_s32.raw'};
+mel_fmts  = {'s16',         's24',          's32'};
+
+% Xtensa prefixed variants
+xt_ceps_files = {'xt_mfcc_s16.raw', 'xt_mfcc_s24.raw', 'xt_mfcc_s32.raw'};
+xt_mel_files  = {'xt_mel_s16.raw',  'xt_mel_s24.raw',  'xt_mel_s32.raw'};
+
+all_ceps_files = [ceps_files, xt_ceps_files];
+all_mel_files  = [mel_files, xt_mel_files];
+all_mel_fmts   = [mel_fmts, mel_fmts];
+
+for i = 1:length(all_ceps_files)
+	fn = all_ceps_files{i};
+	if exist(fn, 'file')
+		fprintf('Decoding MFCC ceps: %s\n', fn);
+		[ceps, t, n] = decode_ceps(fn, num_ceps);
+	end
+end
+
+for i = 1:length(all_mel_files)
+	fn = all_mel_files{i};
+	fmt = all_mel_fmts{i};
+	if exist(fn, 'file')
+		fprintf('Decoding Mel: %s\n', fn);
+		[mel, t, n] = decode_mel(fn, num_mel, fmt);
+	end
+end
diff --git a/src/audio/mfcc/tune/decode_mel.m b/src/audio/mfcc/tune/decode_mel.m
index c52ad4b9f6d9..f6a723aa2040 100644
--- a/src/audio/mfcc/tune/decode_mel.m
+++ b/src/audio/mfcc/tune/decode_mel.m
@@ -1,8 +1,9 @@
-% [mel, t, n] = decode_mel(fn, num_mel, num_channels)
+% [mel, t, n] = decode_mel(fn, num_mel, fmt, num_channels)
 %
 % Input
-%   fn - File with MFCC data in .raw or .wav format
+%   fn - File with Mel data in .raw or .wav format
 %   num_mel - number of Mel coefficients per frame
+%   fmt - format of the Mel data ('s16', 's24', 's32')
 %   num_channels - needed for .raw format, omit for .wav
 %
 % Outputs
@@ -13,26 +14,51 @@
 % SPDX-License-Identifier: BSD-3-Clause
 % Copyright(c) 2026 Intel Corporation.
 
-function [mel, t, n] = decode_mel(fn, num_mel, num_channels)
+function [mel, t, n] = decode_mel(fn, num_mel, fmt, num_channels)
 
 if nargin < 3
+	fmt = 's16';
+end
+if nargin < 4
 	num_channels = 1;
 end
 
 % MFCC stream
 fs = 16e3;
-qformat = 7;
-magic = [25443 28006]; % ASCII 'mfcc' as int16
 
-% Load output data
-[data, num_channels] = get_file(fn, num_channels);
+switch fmt
+  case 's16'
+    qformat = 7;
+    magic = [25443 28006]; % ASCII 'mfcc' as two int16
+    num_magic = 2;
+  case 's24'
+    qformat = 15;
+    magic = int32(1835426659); % 0x6D666363 as int32
+    num_magic = 1;
+  case 's32'
+    qformat = 23;
+    magic = int32(1835426659); % 0x6D666363 as int32
+    num_magic = 1;
+    otherwise
+    error("Use 's16', 's24', or 's32' as format.");
+end
 
-idx1 = find(data == magic(1));
-idx = [];
-for i = 1:length(idx1)
-	if data(idx1(i) + 1) == magic(2)
-		idx = [idx idx1(i)];
+% Load output data
+[data, num_channels] = get_file(fn, num_channels, fmt);
+
+if strcmp(fmt, 's16')
+	idx1 = find(data == magic(1));
+	idx = [];
+	for i = 1:length(idx1)
+		next_word = idx1(i) + 1;
+		if next_word <= length(data)
+			if data(next_word) == magic(2)
+				idx = [idx idx1(i)];
+			end
+		end
 	end
+else
+	idx = find(data == magic);
 end
 
 if isempty(idx)
@@ -54,9 +80,9 @@
 
 mel = zeros(num_mel, num_frames);
 for i = 1:num_frames
-	i1 = idx(i) + 2;
+	i1 = idx(i) + num_magic;
 	i2 = i1 + num_mel - 1;
-	mel(:,i) = data(i1:i2) / 2^qformat;
+	mel(:,i) = double(data(i1:i2)) / 2^qformat;
 end
 
 figure;
@@ -71,28 +97,46 @@
 
 end
 
-function [data, num_channels] = get_file(fn, num_channels)
+function [data, num_channels] = get_file(fn, num_channels, fmt)
 
 [~, ~, ext] = fileparts(fn);
 
+switch fmt
+	case 's16'
+		read_fmt = 'int16';
+	case {'s24', 's32'}
+		read_fmt = 'int32';
+	otherwise
+		error("Use 's16', 's24', or 's32' as format.");
+end
+
 switch lower(ext)
 	case '.raw'
 		fh = fopen(fn, 'r');
-		data = fread(fh, 'int16');
+		data = fread(fh, read_fmt);
 		fclose(fh);
 	case '.wav'
 		tmp = audioread(fn, 'native');
 		t = whos('tmp');
-		if ~strcmp(t.class, 'int16')
-			error('Only 16-bit wav file format is supported');
+		switch fmt
+			case 's16'
+				if ~strcmp(t.class, 'int16')
+					error('Expected 16-bit wav for s16 format');
+				end
+			case {'s24', 's32'}
+				if ~strcmp(t.class, 'int32')
+					error('Expected 32-bit wav for %s format', fmt);
+				end
 		end
 		s = size(tmp);
 		num_channels = s(2);
 		if num_channels > 1
-			data = int16(zeros(prod(s), 1));
+			data = zeros(prod(s), 1, t.class);
 			for i = 1:num_channels
 				data(i:num_channels:end) = tmp(:, i);
 			end
+		else
+			data = tmp;
 		end
 	otherwise
 		error('Unknown audio format');
diff --git a/src/audio/mfcc/tune/run_mfcc.sh b/src/audio/mfcc/tune/run_mfcc.sh
index a1b8030a6063..e3c309fbc03e 100755
--- a/src/audio/mfcc/tune/run_mfcc.sh
+++ b/src/audio/mfcc/tune/run_mfcc.sh
@@ -7,50 +7,49 @@ set -e
 RAW_INPUT_S16=in_s16.raw
 RAW_INPUT_S24=in_s24.raw
 RAW_INPUT_S32=in_s32.raw
-RAW_OUTPUT_S16=mfcc_s16.raw
-RAW_OUTPUT_S24=mfcc_s24.raw
-RAW_OUTPUT_S32=mfcc_s32.raw
 
 VALGRIND="valgrind --leak-check=full"
+#VALGRIND=""
 TESTBENCH=$SOF_WORKSPACE/sof/tools/testbench/build_testbench/install/bin/sof-testbench4
-TOPOLOGY_S16=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfcc16.tplg
-TOPOLOGY_S24=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfcc24.tplg
-TOPOLOGY_S32=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfcc32.tplg
-OPT_S16="-r 16000 -c 2 -b S16_LE -p 3,4 -t $TOPOLOGY_S16"
-OPT_S24="-r 16000 -c 2 -b S24_LE -p 3,4 -t $TOPOLOGY_S24"
-OPT_S32="-r 16000 -c 2 -b S32_LE -p 3,4 -t $TOPOLOGY_S32"
-
-# Convert input audio file raw 16 kHz 2 channel 16 bit
-sox -R --encoding signed-integer "$1" -L -r 16000 -c 2 -b 16 "$RAW_INPUT_S16"
-sox -R --no-dither --encoding signed-integer -L -r 16000 -c 2 -b 16 "$RAW_INPUT_S16" -b 32 "$RAW_INPUT_S32"
-sox -R --no-dither --encoding signed-integer -L -r 16000 -c 2 -b 16 "$RAW_INPUT_S16" -b 32 "$RAW_INPUT_S24" vol 0.003906250000
-
-# Run testbench
-$VALGRIND $TESTBENCH $OPT_S16 -i "$RAW_INPUT_S16" -o "$RAW_OUTPUT_S16"
-$VALGRIND $TESTBENCH $OPT_S24 -i "$RAW_INPUT_S24" -o "$RAW_OUTPUT_S24"
-$VALGRIND $TESTBENCH $OPT_S32 -i "$RAW_INPUT_S32" -o "$RAW_OUTPUT_S32"
-
-echo ----------------------------------------------------------------------------------
-echo The MFCC data was output to file $RAW_OUTPUT_S16, $RAW_OUTPUT_S24, $RAW_OUTPUT_S32
-echo ----------------------------------------------------------------------------------
-
-RAW_OUTPUT_S16=mel_s16.raw
-RAW_OUTPUT_S24=mel_s24.raw
-RAW_OUTPUT_S32=mel_s32.raw
-
-TESTBENCH=$SOF_WORKSPACE/sof/tools/testbench/build_testbench/install/bin/sof-testbench4
-TOPOLOGY_S16=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfccmel16.tplg
-TOPOLOGY_S24=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfccmel24.tplg
-TOPOLOGY_S32=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfccmel32.tplg
-OPT_S16="-r 16000 -c 2 -b S16_LE -p 3,4 -t $TOPOLOGY_S16"
-OPT_S24="-r 16000 -c 2 -b S24_LE -p 3,4 -t $TOPOLOGY_S24"
-OPT_S32="-r 16000 -c 2 -b S32_LE -p 3,4 -t $TOPOLOGY_S32"
-
-# Run testbench
-$VALGRIND $TESTBENCH $OPT_S16 -i "$RAW_INPUT_S16" -o "$RAW_OUTPUT_S16"
-$VALGRIND $TESTBENCH $OPT_S24 -i "$RAW_INPUT_S24" -o "$RAW_OUTPUT_S24"
-$VALGRIND $TESTBENCH $OPT_S32 -i "$RAW_INPUT_S32" -o "$RAW_OUTPUT_S32"
-
-echo ----------------------------------------------------------------------------------
-echo The MFCC Mel data was output to file $RAW_OUTPUT_S16, $RAW_OUTPUT_S24, $RAW_OUTPUT_S32
-echo ----------------------------------------------------------------------------------
+TESTBENCH_RUN="$VALGRIND $TESTBENCH"
+
+convert_input() {
+	sox -R --encoding signed-integer "$1" -L -r 16000 -c 2 -b 16 "$RAW_INPUT_S16"
+	sox -R --no-dither --encoding signed-integer -L -r 16000 -c 2 -b 16 \
+		"$RAW_INPUT_S16" -b 32 "$RAW_INPUT_S32"
+	sox -R --no-dither --encoding signed-integer -L -r 16000 -c 2 -b 16 \
+		"$RAW_INPUT_S16" -b 32 "$RAW_INPUT_S24" vol 0.003906250000
+}
+
+run_testbench() {
+	local tplg_base="$1"
+	local out_s16="$2"
+	local out_s24="$3"
+	local out_s32="$4"
+	local label="$5"
+	local tplg_s16="${SOF_WORKSPACE}/sof/tools/build_tools/topology/topology2/development/${tplg_base}16.tplg"
+	local tplg_s24="${SOF_WORKSPACE}/sof/tools/build_tools/topology/topology2/development/${tplg_base}24.tplg"
+	local tplg_s32="${SOF_WORKSPACE}/sof/tools/build_tools/topology/topology2/development/${tplg_base}32.tplg"
+
+	$TESTBENCH_RUN -r 16000 -c 2 -b S16_LE -p 3,4 -t "$tplg_s16" -i "$RAW_INPUT_S16" -o "$out_s16"
+	$TESTBENCH_RUN -r 16000 -c 2 -b S24_LE -p 3,4 -t "$tplg_s24" -i "$RAW_INPUT_S24" -o "$out_s24"
+	$TESTBENCH_RUN -r 16000 -c 2 -b S32_LE -p 3,4 -t "$tplg_s32" -i "$RAW_INPUT_S32" -o "$out_s32"
+
+	echo ----------------------------------------------------------------------------------
+	echo "The ${label} data was output to file ${out_s16}, ${out_s24}, ${out_s32}"
+	echo ----------------------------------------------------------------------------------
+}
+
+main() {
+	convert_input "$1"
+	run_testbench "sof-hda-benchmark-mfcc" mfcc_s16.raw mfcc_s24.raw mfcc_s32.raw "MFCC"
+	run_testbench "sof-hda-benchmark-mfccmel" mel_s16.raw mel_s24.raw mel_s32.raw "MFCC Mel"
+
+	if [ -n "$XTENSA_PATH" ]; then
+		TESTBENCH_RUN="$XTENSA_PATH/xt-run $SOF_WORKSPACE/sof/tools/testbench/build_xt_testbench/sof-testbench4"
+		run_testbench "sof-hda-benchmark-mfcc" xt_mfcc_s16.raw xt_mfcc_s24.raw xt_mfcc_s32.raw "Xtensa MFCC"
+		run_testbench "sof-hda-benchmark-mfccmel" xt_mel_s16.raw xt_mel_s24.raw xt_mel_s32.raw "Xtensa MFCC Mel"
+	fi
+}
+
+main "$@"
diff --git a/src/include/sof/audio/mfcc/mfcc_comp.h b/src/include/sof/audio/mfcc/mfcc_comp.h
index accf45868cbd..025eef116752 100644
--- a/src/include/sof/audio/mfcc/mfcc_comp.h
+++ b/src/include/sof/audio/mfcc/mfcc_comp.h
@@ -30,25 +30,7 @@
 #endif
 
 #define MFCC_MAGIC 0x6d666363 /* ASCII for "mfcc" */
-
-/* Set to 16 for lower RAM and MCPS with slightly lower quality. Set to 32 for best
- * quality but higher MCPS and RAM. The MFCC input is currently 16 bits. With this option
- * set to 32 the FFT and Mel filterbank are computed with better 32 bit precision. There
- * is also need to enable 32 bit FFT from Kconfig if set.
- */
-#define MFCC_FFT_BITS	16
-
-/* MFCC with 16 bit FFT benefits from data normalize, for 32 bits there's no
- * significant impact. The amount of left shifts for FFT input is limited to
- * 10 that equals about 60 dB boost. The boost is compensated in Mel energy
- * calculation.
- */
-#if MFCC_FFT_BITS == 16
-#define MFCC_NORMALIZE_FFT
-#else
-#undef MFCC_NORMALIZE_FFT
-#endif
-#define MFCC_NORMALIZE_MAX_SHIFT	10
+#define MFCC_FFT_BITS	32
 
 /** \brief Type definition for processing function select return value. */
 typedef void (*mfcc_func)(struct processing_module *mod,
@@ -79,15 +61,8 @@ struct mfcc_pre_emph {
 };
 
 struct mfcc_fft {
-#if MFCC_FFT_BITS == 16
-	struct icomplex16 *fft_buf; /**< fft_padded_size */
-	struct icomplex16 *fft_out; /**< fft_padded_size */
-#elif MFCC_FFT_BITS == 32
 	struct icomplex32 *fft_buf; /**< fft_padded_size */
 	struct icomplex32 *fft_out; /**< fft_padded_size */
-#else
-#error "MFCC_FFT_BITS needs to be 16 or 32"
-#endif
 	struct fft_plan *fft_plan;
 	int fft_fill_start_idx; /**< Set to 0 for pad left, etc. */
 	int fft_size;
@@ -114,7 +89,8 @@ struct mfcc_state {
 	struct mat_matrix_16b *mel_spectra; /**< Pointer to scratch */
 	struct mat_matrix_16b *cepstral_coef; /**< Pointer to scratch */
 	int32_t *power_spectra; /**< Pointer to scratch */
-	int16_t mmax; /**< Maximum Mel value in Q9.7 */
+	int32_t *mel_log_32; /**< Pointer to scratch for 32-bit Mel output Q9.23 */
+	int32_t mmax; /**< Maximum Mel value in Q9.23 */
 	int16_t buf_avail;
 	int16_t *buffers;
 	int16_t *prev_data; /**< prev_data_size */
@@ -132,6 +108,7 @@ struct mfcc_state {
 	bool magic_pending; /**< True when magic word not yet written for current output */
 	size_t sample_buffers_size; /**< bytes */
 	int16_t *out_data_ptr; /**< Read pointer into scratch data for multi-period output */
+	int32_t *out_data_ptr_32; /**< Read pointer for 32-bit mel-only output */
 	int out_remain; /**< Remaining int16_t samples to write to sink from scratch */
 };
 
@@ -166,10 +143,6 @@ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data,
 
 void mfcc_fill_fft_buffer(struct mfcc_state *state);
 
-#ifdef MFCC_NORMALIZE_FFT
-int mfcc_normalize_fft_buffer(struct mfcc_state *state);
-#endif
-
 void mfcc_apply_window(struct mfcc_state *state, int input_shift);
 
 #if CONFIG_FORMAT_S16LE
diff --git a/src/include/sof/math/auditory.h b/src/include/sof/math/auditory.h
index b09017786e36..bd707dc5079a 100644
--- a/src/include/sof/math/auditory.h
+++ b/src/include/sof/math/auditory.h
@@ -103,11 +103,11 @@ void psy_apply_mel_filterbank_16(struct psy_mel_filterbank *mel_fb, struct icomp
  * \param[in]  fft_out       Array of complex numbers from FFT in Q1.31 format.
  * \param[out] power_spectra Array of linear power spectra, needed scratch are that is half + 1
  *                           side of fft_out. The data can be discarded after if no use.
- * \param[out] mel_log       Array of Q9.7 log/log10/10log10 format Mel band energies.
+ * \param[out] mel_log       Array of Q9.23 log/log10/10log10 format Mel band energies.
  * \param[in]  bitshift      A shift left scale that has been possibly applied to FFT. This will
  *                           be subtracted from the log or decibels notation.
  */
 void psy_apply_mel_filterbank_32(struct psy_mel_filterbank *mel_fb, struct icomplex32 *fft_out,
-				 int32_t *power_spectra, int16_t *mel_log, int bitshift);
+				 int32_t *power_spectra, int32_t *mel_log, int bitshift);
 
 #endif /* __SOF_MATH_AUDITORY_H__ */
diff --git a/src/math/auditory/mel_filterbank_32.c b/src/math/auditory/mel_filterbank_32.c
index a80d09ad624a..414ddf482f93 100644
--- a/src/math/auditory/mel_filterbank_32.c
+++ b/src/math/auditory/mel_filterbank_32.c
@@ -12,7 +12,7 @@
 #include <stdint.h>
 
 void psy_apply_mel_filterbank_32(struct psy_mel_filterbank *fb, struct icomplex32 *fft_out,
-				 int32_t *power_spectra, int16_t *mel_log, int bitshift)
+				 int32_t *power_spectra, int32_t *mel_log, int bitshift)
 {
 	int64_t pmax;
 	int64_t p;
@@ -79,8 +79,8 @@ void psy_apply_mel_filterbank_32(struct psy_mel_filterbank *fb, struct icomplex3
 		 */
 		log -= ((int32_t)lshift + 2 * bitshift) << 16;
 
-		/* Scale for desired log  */
-		log = Q_MULTSR_32X32((int64_t)log, fb->log_mult, 16, 29, 7);
-		mel_log[i] = sat_int16(log); /* Q8.7 */
+		/* Scale for desired log, output as Q9.23 */
+		log = Q_MULTSR_32X32((int64_t)log, fb->log_mult, 16, 29, 23);
+		mel_log[i] = log; /* Q9.23 */
 	}
 }
diff --git a/test/cmocka/src/math/auditory/auditory.c b/test/cmocka/src/math/auditory/auditory.c
index dc05c387cfae..ff222e52fadd 100644
--- a/test/cmocka/src/math/auditory/auditory.c
+++ b/test/cmocka/src/math/auditory/auditory.c
@@ -163,7 +163,8 @@ static void filterbank_32_test(const int32_t *fft_real, const int32_t *fft_imag,
 	float error_rms;
 	float delta_max = 0;
 	int32_t *power_spectra;
-	int16_t *mel_log;
+	int32_t *mel_log;
+	int16_t mel_log_16;
 	int i;
 	const int half_fft = num_fft_bins / 2 + 1;
 	const int fft_size = num_fft_bins * sizeof(struct icomplex32);
@@ -181,7 +182,7 @@ static void filterbank_32_test(const int32_t *fft_real, const int32_t *fft_imag,
 		goto err_out_alloc;
 	}
 
-	mel_log = malloc(MEL_FILTERBANK_32_TEST1_NUM_MEL_BINS * sizeof(int16_t));
+	mel_log = malloc(num_mel_bins * sizeof(int32_t));
 	if (!mel_log) {
 		fprintf(stderr, "Failed to allocate output vector\n");
 		goto err_mel_alloc;
@@ -215,9 +216,10 @@ static void filterbank_32_test(const int32_t *fft_real, const int32_t *fft_imag,
 	power_spectra = (int32_t *)&fft_buf[0];
 	psy_apply_mel_filterbank_32(&fb, fft_out, power_spectra, mel_log, shift);
 
-	/* Check */
+	/* Check: convert Q9.23 output to Q9.7 for comparison with reference */
 	for (i = 0; i < num_mel_bins; i++) {
-		delta = (float)ref_mel_log[i] - (float)mel_log[i];
+		mel_log_16 = (int16_t)(mel_log[i] >> 16);
+		delta = (float)ref_mel_log[i] - (float)mel_log_16;
 		sum_squares += delta * delta;
 		if (delta > delta_max)
 			delta_max = delta;
@@ -233,7 +235,7 @@ static void filterbank_32_test(const int32_t *fft_real, const int32_t *fft_imag,
 	FILE *fh = fopen("mel_filterbank_32.txt", "w");
 
 	for (i = 0; i < num_mel_bins; i++)
-		fprintf(fh, "%d %d\n", ref_mel_log[i], mel_log[i]);
+		fprintf(fh, "%d %d\n", ref_mel_log[i], (int16_t)(mel_log[i] >> 16));
 
 	fclose(fh);
 #endif
diff --git a/tools/topology/topology2/cavs-sdw.conf b/tools/topology/topology2/cavs-sdw.conf
index d0e426e1e143..6932543c06e5 100644
--- a/tools/topology/topology2/cavs-sdw.conf
+++ b/tools/topology/topology2/cavs-sdw.conf
@@ -27,6 +27,7 @@
 <host-gateway-playback.conf>
 <host-gateway-capture.conf>
 <host-gateway-tdfb-drc-capture.conf>
+<host-gateway-src-mfcc-capture.conf>
 <src.conf>
 <io-gateway.conf>
 <io-gateway-capture.conf>
@@ -248,3 +249,11 @@ IncludeByKey.SDW_JACK_ECHO_REF {
 IncludeByKey.SDW_SPK_ECHO_REF {
 	"true" "platform/intel/sdw-amp-echo-ref.conf"
 }
+
+IncludeByKey.SDW_JACK_AUDIO_FEATURE_CAPTURE {
+	"true" "platform/intel/sdw-jack-audio-feature.conf"
+}
+
+IncludeByKey.SDW_DMIC_AUDIO_FEATURE_CAPTURE {
+	"true" "platform/intel/sdw-dmic-audio-feature.conf"
+}
diff --git a/tools/topology/topology2/development/tplg-targets.cmake b/tools/topology/topology2/development/tplg-targets.cmake
index 27094322c7f3..a906852d04f0 100644
--- a/tools/topology/topology2/development/tplg-targets.cmake
+++ b/tools/topology/topology2/development/tplg-targets.cmake
@@ -477,4 +477,13 @@ SDW_JACK_OUT_STREAM=Playback-SimpleJack,SDW_JACK_IN_STREAM=Capture-SimpleJack,CO
 "cavs-sdw\;sof-ptl-rt722-compr\;PLATFORM=ptl,SDW_DMIC=1,NUM_SDW_AMP_LINKS=1,\
 SDW_AMP_FEEDBACK=false,SDW_SPK_STREAM=Playback-SmartAmp,SDW_DMIC_STREAM=Capture-SmartMic,\
 SDW_JACK_OUT_STREAM=Playback-SimpleJack,SDW_JACK_IN_STREAM=Capture-SimpleJack,COMPRESSED=true"
+
+# Soundwire topologies with MFCC audio features capture
+"cavs-sdw\;sof-mtl-rt713-l0-rt1316-l12-mfcc\;PLATFORM=mtl,NUM_SDW_AMP_LINKS=2,\
+HDMI1_ID=4,HDMI2_ID=5,HDMI3_ID=6,SDW_JACK_AUDIO_FEATURE_CAPTURE=true"
+
+"cavs-sdw\;sof-arl-cs42l43-l0-cs35l56-l23-mfcc\;PLATFORM=mtl,NUM_SDW_AMP_LINKS=2,SDW_DMIC=1,\
+SDW_AMP_FEEDBACK=false,SDW_SPK_STREAM=Playback-SmartAmp,SDW_DMIC_STREAM=Capture-SmartMic,\
+SDW_JACK_OUT_STREAM=Playback-SimpleJack,SDW_JACK_IN_STREAM=Capture-SimpleJack,\
+SDW_JACK_AUDIO_FEATURE_CAPTURE=true,SDW_DMIC_AUDIO_FEATURE_CAPTURE=true"
 )
diff --git a/tools/topology/topology2/include/common/common_definitions.conf b/tools/topology/topology2/include/common/common_definitions.conf
index 28821efe3ed1..87c69dd41e41 100644
--- a/tools/topology/topology2/include/common/common_definitions.conf
+++ b/tools/topology/topology2/include/common/common_definitions.conf
@@ -71,4 +71,6 @@ Define {
 	PCM_FORMAT_ALL				false # Basic s16/s24/s32, no float, 8-bit etc.
 	SDW_JACK_ECHO_REF			false # No echo reference for 3.5mm jack
 	SDW_SPK_ECHO_REF			false # No echo reference for speaker
+	SDW_JACK_AUDIO_FEATURE_CAPTURE		false # No audio features capture for jack
+	SDW_DMIC_AUDIO_FEATURE_CAPTURE		false # No audio features capture for microphone
 }
diff --git a/tools/topology/topology2/include/pipelines/cavs/host-gateway-src-mfcc-capture.conf b/tools/topology/topology2/include/pipelines/cavs/host-gateway-src-mfcc-capture.conf
new file mode 100644
index 000000000000..793f71b883ab
--- /dev/null
+++ b/tools/topology/topology2/include/pipelines/cavs/host-gateway-src-mfcc-capture.conf
@@ -0,0 +1,134 @@
+#
+# SRC-MFCC capture pipeline
+#
+# This class provides host pipeline for capture with MFCC audio features input.
+# All attributes defined herein are namespaced by alsatplg to
+# "Object.Pipeline.host-gateway-src-mfcc-capture.N.attribute_name".
+#
+# Usage: host-gateway-src-mfcc-capture pipeline object can be instantiated as:
+#
+# Object.Pipeline.host-gateway-src-mfcc-capture."N" {
+#	period		1000
+#	time_domain	"timer"
+# }
+#
+# Where N is the unique pipeline ID within the same alsaconf node.
+#
+
+<include/common/input_audio_format.conf>
+<include/common/output_audio_format.conf>
+<include/components/pipeline.conf>
+<include/components/host-copier.conf>
+<include/components/src.conf>
+<include/components/mfcc.conf>
+
+Class.Pipeline."host-gateway-src-mfcc-capture" {
+
+	<include/pipelines/pipeline-common.conf>
+
+	attributes {
+		!constructor [
+			"index"
+		]
+
+		#
+		# host-gateway-src-mfcc-capture objects instantiated within the same alsaconf
+		# node must have unique pipeline_id attribute
+		#
+		unique	"instance"
+	}
+
+	Object.Widget {
+		src."1" {
+			num_input_pins 1
+			num_output_pins 1
+			num_input_audio_formats 3
+			num_output_audio_formats 1
+			Object.Base.input_audio_format [
+				{
+					in_bit_depth		32
+					in_valid_bit_depth	32
+					in_rate			48000
+				}
+				{
+					in_bit_depth		32
+					in_valid_bit_depth	32
+					in_rate			96000
+				}
+				{
+					in_bit_depth		32
+					in_valid_bit_depth	32
+					in_rate			192000
+				}
+			]
+			Object.Base.output_audio_format [
+				{
+					out_bit_depth		32
+					out_valid_bit_depth	32
+					out_rate		16000
+				}
+			]
+		}
+
+		mfcc."1" {
+			num_input_audio_formats 1
+			num_output_audio_formats 1
+			Object.Base.input_audio_format [
+				{
+					in_bit_depth		32
+					in_valid_bit_depth	32
+					in_rate			16000
+				}
+			]
+			Object.Base.output_audio_format [
+				{
+					out_bit_depth		32
+					out_valid_bit_depth	32
+					out_rate		16000
+				}
+			]
+		}
+
+		host-copier."1" {
+			type	"aif_out"
+			node_type $HDA_HOST_INPUT_CLASS
+			num_input_pins 1
+			num_output_pins 1
+			num_input_audio_formats 1
+			num_output_audio_formats 1
+			Object.Base.input_audio_format [
+				{
+					in_bit_depth		32
+					in_valid_bit_depth	32
+					in_rate			16000
+				}
+			]
+			Object.Base.output_audio_format [
+				{
+					out_bit_depth		32
+					out_valid_bit_depth	32
+					out_rate		16000
+				}
+			]
+		}
+
+		pipeline."1" {
+			priority	0
+			lp_mode		0
+		}
+	}
+
+
+	Object.Base {
+		!route [
+			{
+				source	src.$index.1
+				sink	mfcc.$index.1
+			}
+		]
+	}
+
+	direction	"capture"
+	dynamic_pipeline 1
+	time_domain	"timer"
+}
diff --git a/tools/topology/topology2/platform/intel/sdw-dmic-audio-feature.conf b/tools/topology/topology2/platform/intel/sdw-dmic-audio-feature.conf
new file mode 100644
index 000000000000..87039b261597
--- /dev/null
+++ b/tools/topology/topology2/platform/intel/sdw-dmic-audio-feature.conf
@@ -0,0 +1,56 @@
+Define {
+	SDW_DMIC_MODULE_COPIER_ID 41
+	SDW_DMIC_AUDIO_FEATURE_CAPTURE_PCM_NAME "Microphone Audio Features"
+	SDW_DMIC_AUDIO_FEATURE_CAPTURE_PCM_ID 48
+	SDW_DMIC_AUDIO_FEATURE_CAPTURE_STREAM_NAME "Microphone Audio Features Stream"
+	SDW_DMIC_AUDIO_FEATURE_CAPTURE_PIPELINE_ID 131
+}
+
+Object.Pipeline.host-gateway-src-mfcc-capture [
+	{
+		index $SDW_DMIC_AUDIO_FEATURE_CAPTURE_PIPELINE_ID
+
+		Object.Widget.host-copier.1 {
+			stream_name "$SDW_DMIC_AUDIO_FEATURE_CAPTURE_STREAM_NAME"
+			pcm_id $SDW_DMIC_AUDIO_FEATURE_CAPTURE_PCM_ID
+		}
+
+		Object.Widget.mfcc.1 {
+			Object.Control {
+				bytes."1" {
+					name "$SDW_DMIC_AUDIO_FEATURE_CAPTURE_PCM_NAME MFCC bytes"
+					<include/components/mfcc/mel80.conf>
+				}
+			}
+		}
+	}
+]
+Object.Base.route [
+	{
+		source "module-copier.$SDW_DMIC_MODULE_COPIER_ID.0"
+		sink "src.$SDW_DMIC_AUDIO_FEATURE_CAPTURE_PIPELINE_ID.1"
+	}
+	{
+		source "mfcc.$SDW_DMIC_AUDIO_FEATURE_CAPTURE_PIPELINE_ID.1"
+		sink "host-copier.$SDW_DMIC_AUDIO_FEATURE_CAPTURE_PCM_ID.capture"
+	}
+]
+
+Object.PCM.pcm [
+	{
+		name "$SDW_DMIC_AUDIO_FEATURE_CAPTURE_PCM_NAME"
+		id $SDW_DMIC_AUDIO_FEATURE_CAPTURE_PCM_ID
+		direction "capture"
+		Object.Base.fe_dai.1 {
+			name "$SDW_DMIC_AUDIO_FEATURE_CAPTURE_PCM_NAME"
+		}
+
+		Object.PCM.pcm_caps.1 {
+			name "$SDW_DMIC_AUDIO_FEATURE_CAPTURE_STREAM_NAME"
+			formats 'S32_LE'
+			rates '16000'
+			channels_min 2
+			channels_max 2
+		}
+	}
+]
diff --git a/tools/topology/topology2/platform/intel/sdw-jack-audio-feature.conf b/tools/topology/topology2/platform/intel/sdw-jack-audio-feature.conf
new file mode 100644
index 000000000000..9645199d6907
--- /dev/null
+++ b/tools/topology/topology2/platform/intel/sdw-jack-audio-feature.conf
@@ -0,0 +1,56 @@
+Define {
+	SDW_JACK_MODULE_COPIER_ID 11
+	SDW_JACK_AUDIO_FEATURE_CAPTURE_PCM_NAME "Jack In Audio Features"
+	SDW_JACK_AUDIO_FEATURE_CAPTURE_PCM_ID 47
+	SDW_JACK_AUDIO_FEATURE_CAPTURE_STREAM_NAME "Jack In Audio Features Stream"
+	SDW_JACK_AUDIO_FEATURE_CAPTURE_PIPELINE_ID 130
+}
+
+Object.Pipeline.host-gateway-src-mfcc-capture [
+	{
+		index $SDW_JACK_AUDIO_FEATURE_CAPTURE_PIPELINE_ID
+
+		Object.Widget.host-copier.1 {
+			stream_name "$SDW_JACK_AUDIO_FEATURE_CAPTURE_STREAM_NAME"
+			pcm_id $SDW_JACK_AUDIO_FEATURE_CAPTURE_PCM_ID
+		}
+
+		Object.Widget.mfcc.1 {
+			Object.Control {
+				bytes."1" {
+					name "$SDW_JACK_AUDIO_FEATURE_CAPTURE_PCM_NAME MFCC bytes"
+					<include/components/mfcc/mel80.conf>
+				}
+			}
+		}
+	}
+]
+Object.Base.route [
+	{
+		source "module-copier.$SDW_JACK_MODULE_COPIER_ID.0"
+		sink "src.$SDW_JACK_AUDIO_FEATURE_CAPTURE_PIPELINE_ID.1"
+	}
+	{
+		source "mfcc.$SDW_JACK_AUDIO_FEATURE_CAPTURE_PIPELINE_ID.1"
+		sink "host-copier.$SDW_JACK_AUDIO_FEATURE_CAPTURE_PCM_ID.capture"
+	}
+]
+
+Object.PCM.pcm [
+	{
+		name "$SDW_JACK_AUDIO_FEATURE_CAPTURE_PCM_NAME"
+		id $SDW_JACK_AUDIO_FEATURE_CAPTURE_PCM_ID
+		direction "capture"
+		Object.Base.fe_dai.1 {
+			name "$SDW_JACK_AUDIO_FEATURE_CAPTURE_PCM_NAME"
+		}
+
+		Object.PCM.pcm_caps.1 {
+			name "$SDW_JACK_AUDIO_FEATURE_CAPTURE_STREAM_NAME"
+			formats 'S32_LE'
+			rates '16000'
+			channels_min $SDW_JACK_CAPTURE_CH
+			channels_max $SDW_JACK_CAPTURE_CH
+		}
+	}
+]
diff --git a/tools/topology/topology2/platform/intel/sdw-jack-generic.conf b/tools/topology/topology2/platform/intel/sdw-jack-generic.conf
index 0dd663e6b39d..b2e1a259d9a0 100644
--- a/tools/topology/topology2/platform/intel/sdw-jack-generic.conf
+++ b/tools/topology/topology2/platform/intel/sdw-jack-generic.conf
@@ -496,6 +496,28 @@ Object.Widget {
 							}
 						}
 					]
+					Object.Widget.module-copier [
+						{
+							num_input_audio_formats 1
+							num_output_audio_formats 1
+							# index 11 is inherited from the pipeline definition
+							# the instance number is automatically generated as '0'
+							Object.Base.input_audio_format [
+								{
+									in_rate			$JACK_RATE
+									in_bit_depth		32
+									in_valid_bit_depth	32
+								}
+							]
+							Object.Base.output_audio_format [
+								{
+									out_rate		$JACK_RATE
+									out_bit_depth		32
+									out_valid_bit_depth	32
+								}
+							]
+						}
+					]
 				}
 			}
 		}
@@ -590,6 +612,10 @@ IncludeByKey.PASSTHROUGH {
 		}
 		{
 			source	"eqiir.11.0"
+			sink	"module-copier.11.0"
+		}
+		{
+			source	"module-copier.11.0"
 			sink	"host-copier.1.capture"
 		}
 		{