@@ -114,10 +114,13 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
114114 /* cir_buf_wrap() is required and is done below in a loop */
115115 ae_int16 * dst = (ae_int16 * )sink -> ptr + start_sample ;
116116 ae_int16 * src = source -> ptr ;
117- ae_int16x4 gain_vec ;
118- ae_int32x2 tmpl , tmph ;
117+ ae_f16x4 gain_vec ;
118+
119+ /* this func does not support unity gain as 1 cannot be represented as Q1.15 value */
120+ assert (gain < IPC4_MIXIN_UNITY_GAIN );
119121
120122 gain_vec = AE_L16_I ((ae_int16 * )& gain , 0 );
123+ gain_vec = AE_SLAI16S (gain_vec , 5 ); /* convert to Q1.15 */
121124
122125 assert (mixed_samples >= start_sample );
123126 samples_to_mix = AE_MIN_32_signed (mixed_samples - start_sample , sample_count );
@@ -141,13 +144,8 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
141144 /* process 4 frames per loop */
142145 for (i = 0 ; i < m ; i ++ ) {
143146 AE_LA16X4_IP (in_sample , inu , in );
144-
145147 /* apply gain to in_sample */
146- AE_MUL16X4 (tmph , tmpl , in_sample , gain_vec );
147- tmpl = AE_SRAI32 (tmpl , IPC4_MIXIN_GAIN_SHIFT );
148- tmph = AE_SRAI32 (tmph , IPC4_MIXIN_GAIN_SHIFT );
149- in_sample = AE_CVT16X4 (tmph , tmpl );
150-
148+ in_sample = AE_MULFP16X4S (in_sample , gain_vec );
151149 AE_LA16X4_IP (out_sample , outu1 , out );
152150 out -- ;
153151 out_sample = AE_ADD16S (in_sample , out_sample );
@@ -160,11 +158,7 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
160158 */
161159 for (i = 0 ; i < left ; i ++ ) {
162160 AE_L16_IP (in_sample , (ae_int16 * )in , sizeof (ae_int16 ));
163-
164- AE_MUL16X4 (tmph , tmpl , in_sample , gain_vec );
165- tmpl = AE_SRAI32 (tmpl , IPC4_MIXIN_GAIN_SHIFT );
166- in_sample = AE_CVT16X4 (tmpl , tmpl );
167-
161+ in_sample = AE_MULFP16X4S (in_sample , gain_vec );
168162 AE_L16_IP (out_sample , (ae_int16 * )out , 0 );
169163 out_sample = AE_ADD16S (in_sample , out_sample );
170164 AE_S16_0_IP (out_sample , (ae_int16 * )out , sizeof (ae_int16 ));
@@ -187,12 +181,7 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
187181 /* process 4 frames per loop */
188182 for (i = 0 ; i < m ; i ++ ) {
189183 AE_LA16X4_IP (in_sample , inu , in );
190-
191- AE_MUL16X4 (tmph , tmpl , in_sample , gain_vec );
192- tmpl = AE_SRAI32 (tmpl , IPC4_MIXIN_GAIN_SHIFT );
193- tmph = AE_SRAI32 (tmph , IPC4_MIXIN_GAIN_SHIFT );
194- in_sample = AE_CVT16X4 (tmph , tmpl );
195-
184+ in_sample = AE_MULFP16X4S (in_sample , gain_vec );
196185 AE_SA16X4_IP (in_sample , outu2 , out );
197186 }
198187 AE_SA64POS_FP (outu2 , out );
@@ -202,11 +191,7 @@ static void mix_s16_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
202191 */
203192 for (i = 0 ; i < left ; i ++ ) {
204193 AE_L16_IP (in_sample , (ae_int16 * )in , sizeof (ae_int16 ));
205-
206- AE_MUL16X4 (tmph , tmpl , in_sample , gain_vec );
207- tmpl = AE_SRAI32 (tmpl , IPC4_MIXIN_GAIN_SHIFT );
208- in_sample = AE_CVT16X4 (tmpl , tmpl );
209-
194+ in_sample = AE_MULFP16X4S (in_sample , gain_vec );
210195 AE_S16_0_IP (in_sample , (ae_int16 * )out , sizeof (ae_int16 ));
211196 }
212197 }
@@ -309,7 +294,7 @@ static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
309294{
310295 int samples_to_mix , samples_to_copy , left_samples ;
311296 int n , nmax , i , m , left ;
312- ae_int32x2 in_sample , in_sample32 ;
297+ ae_int32x2 in_sample ;
313298 ae_int32x2 out_sample ;
314299 ae_int32x2 * in ;
315300 ae_int32x2 * out ;
@@ -319,10 +304,14 @@ static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
319304 /* cir_buf_wrap() is required and is done below in a loop */
320305 int32_t * dst = (int32_t * )sink -> ptr + start_sample ;
321306 int32_t * src = source -> ptr ;
322- ae_int16x4 gain_vec ;
323- ae_int64 tmph , tmpl ;
307+ ae_f24x2 gain_vec ;
308+ ae_int32 gain32 = ( ae_int32 ) gain ;
324309
325- gain_vec = AE_L16_I ((ae_int16 * )& gain , 0 );
310+ /* this func does not support unity gain as 1 cannot be represented as Q1.23 value */
311+ assert (gain < IPC4_MIXIN_UNITY_GAIN );
312+
313+ gain_vec = AE_MOVF24X2_FROMINT32X2 (AE_L32_I (& gain32 , 0 ));
314+ gain_vec = AE_SLAI24S (gain_vec , 13 ); /* convert to Q1.23 */
326315
327316 assert (mixed_samples >= start_sample );
328317 samples_to_mix = AE_MIN_32_signed (mixed_samples - start_sample , sample_count );
@@ -346,18 +335,10 @@ static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
346335 /* process 2 samples per time */
347336 for (i = 0 ; i < m ; i ++ ) {
348337 AE_LA32X2_IP (in_sample , inu , in );
349-
350- /* apply gain to in_sample */
351- in_sample32 = AE_SLAI32 (in_sample , 8 ); /* sign extension */
352- tmpl = AE_MUL32X16_L0 (in_sample32 , gain_vec );
353- tmph = AE_MUL32X16_H0 (in_sample32 , gain_vec );
354- tmpl = AE_SRAI64 (tmpl , 8 + IPC4_MIXIN_GAIN_SHIFT );
355- tmph = AE_SRAI64 (tmph , 8 + IPC4_MIXIN_GAIN_SHIFT );
356- in_sample = AE_SEL32_LL (AE_MOVINT32X2_FROMINT64 (tmph ),
357- AE_MOVINT32X2_FROMINT64 (tmpl ));
358-
338+ in_sample = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample ), gain_vec );
359339 AE_LA32X2_IP (out_sample , outu1 , out );
360340 out -- ;
341+ /* out samples are already sign extended by other mixin in a loop below */
361342 out_sample = AE_ADD24S (in_sample , out_sample );
362343 AE_SA32X2_IP (out_sample , outu2 , out );
363344 }
@@ -366,13 +347,9 @@ static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
366347 /* process the left sample to avoid memory access overrun */
367348 if (left ) {
368349 AE_L32_IP (in_sample , (ae_int32 * )in , sizeof (ae_int32 ));
369-
370- in_sample32 = AE_SLAI32 (in_sample , 8 ); /* sign extension */
371- tmpl = AE_MUL32X16_L0 (in_sample32 , gain_vec );
372- tmpl = AE_SRAI64 (tmpl , 8 + IPC4_MIXIN_GAIN_SHIFT );
373- in_sample = AE_MOVINT32X2_FROMINT64 (tmpl );
374-
350+ in_sample = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample ), gain_vec );
375351 AE_L32_IP (out_sample , (ae_int32 * )out , 0 );
352+ /* out samples are already sign extended by other mixin in a loop below */
376353 out_sample = AE_ADD24S (in_sample , out_sample );
377354 AE_S32_L_IP (out_sample , (ae_int32 * )out , sizeof (ae_int32 ));
378355 }
@@ -392,27 +369,14 @@ static void mix_s24_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
392369 left = n & 1 ;
393370 for (i = 0 ; i < m ; i ++ ) {
394371 AE_LA32X2_IP (in_sample , inu , in );
395-
396- in_sample32 = AE_SLAI32 (in_sample , 8 ); /* sign extension */
397- tmpl = AE_MUL32X16_L0 (in_sample32 , gain_vec );
398- tmph = AE_MUL32X16_H0 (in_sample32 , gain_vec );
399- tmpl = AE_SRAI64 (tmpl , 8 + IPC4_MIXIN_GAIN_SHIFT );
400- tmph = AE_SRAI64 (tmph , 8 + IPC4_MIXIN_GAIN_SHIFT );
401- in_sample = AE_SEL32_LL (AE_MOVINT32X2_FROMINT64 (tmph ),
402- AE_MOVINT32X2_FROMINT64 (tmpl ));
403-
372+ in_sample = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample ), gain_vec );
404373 AE_SA32X2_IP (in_sample , outu2 , out );
405374 }
406375 AE_SA64POS_FP (outu2 , out );
407376 /* process the left sample to avoid memory access overrun */
408377 if (left ) {
409378 AE_L32_IP (in_sample , (ae_int32 * )in , sizeof (ae_int32 ));
410-
411- in_sample32 = AE_SLAI32 (in_sample , 8 ); /* sign extension */
412- tmpl = AE_MUL32X16_L0 (in_sample32 , gain_vec );
413- tmpl = AE_SRAI64 (tmpl , 8 + IPC4_MIXIN_GAIN_SHIFT );
414- in_sample = AE_MOVINT32X2_FROMINT64 (tmpl );
415-
379+ in_sample = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample ), gain_vec );
416380 AE_S32_L_IP (in_sample , (ae_int32 * )out , sizeof (ae_int32 ));
417381 }
418382 }
@@ -518,10 +482,13 @@ static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
518482 /* cir_buf_wrap() is required and is done below in a loop */
519483 int32_t * dst = (int32_t * )sink -> ptr + start_sample ;
520484 int32_t * src = source -> ptr ;
521- ae_int16x4 gain_vec ;
522- ae_int64 tmpl , tmph ;
485+ ae_f16x4 gain_vec ;
486+
487+ /* this func does not support unity gain as 1 cannot be represented as Q1.15 value */
488+ assert (gain < IPC4_MIXIN_UNITY_GAIN );
523489
524490 gain_vec = AE_L16_I ((ae_int16 * )& gain , 0 );
491+ gain_vec = AE_SLAI16S (gain_vec , 5 ); /* convert to Q1.15 */
525492
526493 assert (mixed_samples >= start_sample );
527494 samples_to_mix = AE_MIN_32_signed (mixed_samples - start_sample , sample_count );
@@ -544,32 +511,18 @@ static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
544511 left = n & 1 ;
545512 for (i = 0 ; i < m ; i ++ ) {
546513 AE_LA32X2_IP (in_sample , inu , in );
547-
548- /* apply gain to in_sample */
549- tmpl = AE_MUL32X16_L0 (in_sample , gain_vec );
550- tmph = AE_MUL32X16_H0 (in_sample , gain_vec );
551- tmpl = AE_SRAI64 (tmpl , IPC4_MIXIN_GAIN_SHIFT );
552- tmph = AE_SRAI64 (tmph , IPC4_MIXIN_GAIN_SHIFT );
553- in_sample = AE_SEL32_LL (AE_MOVINT32X2_FROMINT64 (tmph ),
554- AE_MOVINT32X2_FROMINT64 (tmpl ));
555-
556514 AE_LA32X2_IP (out_sample , outu1 , out );
557515 out -- ;
558- out_sample = AE_ADD32S ( in_sample , out_sample );
516+ AE_MULAFP32X16X2RS_L ( out_sample , in_sample , gain_vec );
559517 AE_SA32X2_IP (out_sample , outu2 , out );
560518 }
561519 AE_SA64POS_FP (outu2 , out );
562520
563521 /* process the left sample to avoid memory access overrun */
564522 if (left ) {
565523 AE_L32_IP (in_sample , (ae_int32 * )in , sizeof (ae_int32 ));
566-
567- tmpl = AE_MUL32X16_L0 (in_sample , gain_vec );
568- tmpl = AE_SRAI64 (tmpl , IPC4_MIXIN_GAIN_SHIFT );
569- in_sample = AE_MOVINT32X2_FROMINT64 (tmpl );
570-
571524 AE_L32_IP (out_sample , (ae_int32 * )out , 0 );
572- out_sample = AE_ADD32S ( in_sample , out_sample );
525+ AE_MULAFP32X16X2RS_L ( out_sample , in_sample , gain_vec );
573526 AE_S32_L_IP (out_sample , (ae_int32 * )out , sizeof (ae_int32 ));
574527 }
575528 }
@@ -589,26 +542,15 @@ static void mix_s32_gain(struct cir_buf_ptr *sink, int32_t start_sample, int32_t
589542 left = n & 1 ;
590543 for (i = 0 ; i < m ; i ++ ) {
591544 AE_LA32X2_IP (in_sample , inu , in );
592-
593- tmpl = AE_MUL32X16_L0 (in_sample , gain_vec );
594- tmph = AE_MUL32X16_H0 (in_sample , gain_vec );
595- tmpl = AE_SRAI64 (tmpl , IPC4_MIXIN_GAIN_SHIFT );
596- tmph = AE_SRAI64 (tmph , IPC4_MIXIN_GAIN_SHIFT );
597- in_sample = AE_SEL32_LL (AE_MOVINT32X2_FROMINT64 (tmph ),
598- AE_MOVINT32X2_FROMINT64 (tmpl ));
599-
545+ in_sample = AE_MULFP32X16X2RS_L (in_sample , gain_vec );
600546 AE_SA32X2_IP (in_sample , outu2 , out );
601547 }
602548 AE_SA64POS_FP (outu2 , out );
603549
604550 /* process the left sample to avoid memory access overrun */
605551 if (left ) {
606552 AE_L32_IP (in_sample , (ae_int32 * )in , sizeof (ae_int32 ));
607-
608- tmpl = AE_MUL32X16_L0 (in_sample , gain_vec );
609- tmpl = AE_SRAI64 (tmpl , IPC4_MIXIN_GAIN_SHIFT );
610- in_sample = AE_MOVINT32X2_FROMINT64 (tmpl );
611-
553+ in_sample = AE_MULFP32X16X2RS_L (in_sample , gain_vec );
612554 AE_S32_L_IP (in_sample , (ae_int32 * )out , sizeof (ae_int32 ));
613555 }
614556 }
0 commit comments