diff --git a/scripts/rebuild-testbench.sh b/scripts/rebuild-testbench.sh index fee09fd243ae..996d16f45a8c 100755 --- a/scripts/rebuild-testbench.sh +++ b/scripts/rebuild-testbench.sh @@ -97,7 +97,7 @@ export_xtensa_setup() cat < "$export_script" export XTENSA_TOOLS_ROOT=$XTENSA_TOOLS_ROOT export XTENSA_CORE=$XTENSA_CORE -XTENSA_PATH=$tools_bin +export XTENSA_PATH=$tools_bin EOFSETUP } diff --git a/src/audio/mfcc/Kconfig b/src/audio/mfcc/Kconfig index 678331896b5f..6bbf8fc486b9 100644 --- a/src/audio/mfcc/Kconfig +++ b/src/audio/mfcc/Kconfig @@ -4,7 +4,7 @@ config COMP_MFCC tristate "MFCC component" depends on COMP_MODULE_ADAPTER select CORDIC_FIXED - select MATH_16BIT_MEL_FILTERBANK + select MATH_32BIT_MEL_FILTERBANK select MATH_AUDITORY select MATH_DCT select MATH_DECIBELS diff --git a/src/audio/mfcc/mfcc.c b/src/audio/mfcc/mfcc.c index 9874edea4be5..656e3d9b7bf7 100644 --- a/src/audio/mfcc/mfcc.c +++ b/src/audio/mfcc/mfcc.c @@ -38,13 +38,13 @@ SOF_DEFINE_REG_UUID(mfcc); __cold_rodata const struct mfcc_func_map mfcc_fm[] = { #if CONFIG_FORMAT_S16LE - {SOF_IPC_FRAME_S16_LE, mfcc_s16_default}, + {SOF_IPC_FRAME_S16_LE, mfcc_s16_default}, #endif /* CONFIG_FORMAT_S16LE */ #if CONFIG_FORMAT_S24LE - {SOF_IPC_FRAME_S24_4LE, NULL}, + {SOF_IPC_FRAME_S24_4LE, mfcc_s24_default}, #endif /* CONFIG_FORMAT_S24LE */ #if CONFIG_FORMAT_S32LE - {SOF_IPC_FRAME_S32_LE, NULL}, + {SOF_IPC_FRAME_S32_LE, mfcc_s32_default}, #endif /* CONFIG_FORMAT_S32LE */ }; diff --git a/src/audio/mfcc/mfcc_common.c b/src/audio/mfcc/mfcc_common.c index 688c7afac9b2..76eb7dd04a74 100644 --- a/src/audio/mfcc/mfcc_common.c +++ b/src/audio/mfcc/mfcc_common.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause // -// Copyright(c) 2023 Intel Corporation. All rights reserved. +// Copyright(c) 2023-2026 Intel Corporation. // // Author: Andrula Song @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -36,15 +37,21 @@ LOG_MODULE_REGISTER(mfcc_common, CONFIG_SOF_LOG_LEVEL); * The main processing function for MFCC */ -static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_state *state) +static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *cd) { + struct sof_mfcc_config *config = cd->config; + struct mfcc_state *state = &cd->state; struct mfcc_buffer *buf = &state->buf; struct mfcc_fft *fft = &state->fft; int mel_scale_shift; int input_shift; - int i; + int i, j; int m; int cc_count = 0; + int64_t s; + int32_t mel_value; + int32_t peak; + int32_t clamp_value; /* Phase 1, wait until whole fft_size is filled with valid data. This way * first output cepstral coefficients originate from streamed data and not @@ -103,8 +110,8 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_state *stat fft_execute_32(fft->fft_plan, false); #endif - /* Convert powerspectrum to Mel band logarithmic spectrum */ - mat_init_16b(state->mel_spectra, 1, state->dct.num_in, 7); /* Q8.7 */ + /* Convert powerspectrum to Mel band logarithmic spectrum Q9.23 */ + mat_init_16b(state->mel_spectra, 1, state->dct.num_in, 7); /* Q9.7 */ /* Compensate FFT lib scaling to Mel log values, e.g. for 512 long FFT * the fft_plan->len is 9. The scaling is 1/512. Subtract from input_shift it @@ -114,21 +121,79 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_state *stat #if MFCC_FFT_BITS == 16 psy_apply_mel_filterbank_16(&state->melfb, fft->fft_out, state->power_spectra, state->mel_spectra->data, mel_scale_shift); + /* Convert Q9.7 int16_t mel log to Q9.23 int32_t for downstream processing */ + for (j = 0; j < state->dct.num_in; j++) + state->mel_log_32[j] = (int32_t)state->mel_spectra->data[j] << 16; #else psy_apply_mel_filterbank_32(&state->melfb, fft->fft_out, state->power_spectra, - state->mel_spectra->data, mel_scale_shift); + state->mel_log_32, mel_scale_shift); #endif - /* Multiply Mel spectra with DCT matrix to get cepstral coefficients */ - mat_init_16b(state->cepstral_coef, 1, state->dct.num_out, 7); /* Q8.7 */ - mat_multiply(state->mel_spectra, state->dct.matrix, state->cepstral_coef); + if (state->mel_only) { + /* In Mel-only mode output Mel log spectra directly */ + cc_count += state->dct.num_in; + + /* Find peak mel value and track state->mmax in Q9.23 */ + if (config->dynamic_mmax) { + peak = state->mel_log_32[0]; + for (j = 1; j < state->dct.num_in; j++) { + if (state->mel_log_32[j] > peak) + peak = state->mel_log_32[j]; + } + + /* Jump to peak immediately if higher, decay otherwise */ + if (peak > state->mmax) { + state->mmax = peak; + } else { + /* Q9.23 * Q1.15, result Q9.23. The coefficient is small + * so no need for saturation. + */ + s = (int64_t)peak - state->mmax; + state->mmax += + Q_MULTSR_32X32(s, config->mmax_coef, 23, 15, 23); + } + } + + /* Clamp Mel values lower than mmax - top_db, add offset, and scale. + * Config top_db and mel_offset are Q9.7, shift to Q9.23. + */ + clamp_value = state->mmax - ((int32_t)config->top_db << 16); + for (j = 0; j < state->dct.num_in; j++) { + mel_value = state->mel_log_32[j]; + if (mel_value < clamp_value) + mel_value = clamp_value; + + /* Q9.23 * Q4.12, result Q9.23 */ + s = (int64_t)mel_value + ((int32_t)config->mel_offset << 16); + state->mel_log_32[j] = + sat_int32(Q_MULTSR_32X32(s, config->mel_scale, 23, 12, 23)); + } + + /* Store Q9.7 version in mel_spectra for s16 output mode */ + for (j = 0; j < state->dct.num_in; j++) + state->mel_spectra->data[j] = + sat_int16(state->mel_log_32[j] >> 16); - /* Apply cepstral lifter */ - if (state->lifter.cepstral_lifter != 0) - mat_multiply_elementwise(state->cepstral_coef, state->lifter.matrix, - state->cepstral_coef); + /* Enable this to check mmax decay */ + comp_dbg(dev, "state->mmax = %d", state->mmax); + } else { + /* Convert Q9.23 to Q9.7 for 16-bit DCT */ + for (j = 0; j < state->dct.num_in; j++) + state->mel_spectra->data[j] = + sat_int16(state->mel_log_32[j] >> 16); - cc_count += state->dct.num_out; + /* Multiply Mel spectra with DCT matrix to get cepstral coefficients */ + mat_init_16b(state->cepstral_coef, 1, state->dct.num_out, 7); /* Q9.7 */ + mat_multiply(state->mel_spectra, state->dct.matrix, state->cepstral_coef); + + /* Apply cepstral lifter */ + if (state->lifter.cepstral_lifter != 0) { + mat_multiply_elementwise(state->cepstral_coef, state->lifter.matrix, + state->cepstral_coef); + } + + cc_count += state->dct.num_out; + } /* Output to sink buffer */ } @@ -139,7 +204,101 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_state *stat return cc_count; } +void mfcc_fill_fft_buffer(struct mfcc_state *state) +{ + struct mfcc_buffer *buf = &state->buf; + struct mfcc_fft *fft = &state->fft; +#if MFCC_FFT_BITS == 16 + int16_t *d = &fft->fft_buf[fft->fft_fill_start_idx].real; + const int fft_elem_inc = sizeof(fft->fft_buf[0]) / sizeof(int16_t); +#else + int32_t *d = &fft->fft_buf[fft->fft_fill_start_idx].real; + const int fft_elem_inc = sizeof(fft->fft_buf[0]) / sizeof(int32_t); +#endif + int16_t *prev = state->prev_data; + int16_t *prev_end = prev + state->prev_data_size; + int16_t *r = buf->r_ptr; + int copied; + int nmax; + int n; + int j; + + /* Copy overlapped samples from state buffer. The fft_buf has been + * cleared by caller so imaginary part remains zero. + */ + while (prev < prev_end) { + *d = *prev++; + d += fft_elem_inc; + } + + /* Copy hop size of new data from circular buffer */ + for (copied = 0; copied < fft->fft_hop_size; copied += n) { + nmax = fft->fft_hop_size - copied; + n = mfcc_buffer_samples_without_wrap(buf, r); + n = MIN(n, nmax); + for (j = 0; j < n; j++) { + *d = *r++; + d += fft_elem_inc; + } + r = mfcc_buffer_wrap(buf, r); + } + + buf->s_avail -= copied; + buf->s_free += copied; + buf->r_ptr = r; + + /* Copy for next time data back to overlap buffer */ +#if MFCC_FFT_BITS == 16 + d = (int16_t *)&fft->fft_buf[fft->fft_fill_start_idx + fft->fft_hop_size].real; +#else + d = (int32_t *)&fft->fft_buf[fft->fft_fill_start_idx + fft->fft_hop_size].real; +#endif + prev = state->prev_data; + while (prev < prev_end) { + *prev++ = *d; + d += fft_elem_inc; + } +} + #if CONFIG_FORMAT_S16LE +static int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, int16_t *w_ptr, + int samples) +{ + int copied; + int nmax; + int n; + + for (copied = 0; copied < samples; copied += n) { + nmax = samples - copied; + n = audio_stream_samples_without_wrap_s16(sink, w_ptr); + n = MIN(n, nmax); + memset(w_ptr, 0, n * sizeof(int16_t)); + w_ptr = audio_stream_wrap(sink, w_ptr + n); + } + + return w_ptr; +} + +static int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, + int samples, int16_t *r_ptr) +{ + int copied; + int nmax; + int n; + + for (copied = 0; copied < samples; copied += n) { + nmax = samples - copied; + n = audio_stream_samples_without_wrap_s16(sink, w_ptr); + n = MIN(n, nmax); + /* Not using memcpy_s() due to speed need */ + memcpy(w_ptr, r_ptr, n * sizeof(int16_t)); + w_ptr = audio_stream_wrap(sink, w_ptr + n); + r_ptr += n; + } + + return w_ptr; +} + void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer *bsource, struct output_stream_buffer *bsink, int frames) { @@ -149,35 +308,243 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer struct mfcc_buffer *buf = &cd->state.buf; uint32_t magic = MFCC_MAGIC; int16_t *w_ptr = audio_stream_get_wptr(sink); - // int num_magic = sizeof(magic) / sizeof(int16_t); const int num_magic = 2; int num_ceps; - int zero_samples; + int sink_samples; + int to_copy; /* Get samples from source buffer */ mfcc_source_copy_s16(bsource, buf, &state->emph, frames, state->source_channel); - /* Run STFT and processing after FFT: Mel auditory filter and DCT. The sink - * buffer is updated during STDF processing. - */ - num_ceps = mfcc_stft_process(mod->dev, state); + /* Run STFT and processing after FFT: Mel auditory filter and DCT. */ + num_ceps = mfcc_stft_process(mod->dev, cd); - /* Done, copy data to sink. This works only if the period has room for magic (2) - * plus num_ceps int16_t samples. TODO: split ceps over multiple periods. - */ - zero_samples = frames * audio_stream_get_channels(sink); + /* If new output produced, set up pointer into scratch data and mark magic pending */ if (num_ceps > 0) { - zero_samples -= num_ceps + num_magic; + if (state->mel_only) + state->out_data_ptr = state->mel_spectra->data; + else + state->out_data_ptr = state->cepstral_coef->data; + + state->out_remain = num_ceps; + state->magic_pending = true; + } + + /* Write to sink, limited by period size */ + sink_samples = frames * audio_stream_get_channels(sink); + + /* Write magic word first if pending */ + if (state->magic_pending && sink_samples >= num_magic) { w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_magic, (int16_t *)&magic); - w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_ceps, state->cepstral_coef->data); + sink_samples -= num_magic; + state->magic_pending = false; + } + + /* Write cepstral/mel data from scratch buffer */ + to_copy = MIN(state->out_remain, sink_samples); + if (to_copy > 0) { + w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, to_copy, state->out_data_ptr); + state->out_data_ptr += to_copy; + state->out_remain -= to_copy; + sink_samples -= to_copy; } - w_ptr = mfcc_sink_copy_zero_s16(sink, w_ptr, zero_samples); + /* Zero-fill remaining sink samples */ + w_ptr = mfcc_sink_copy_zero_s16(sink, w_ptr, sink_samples); } #endif /* CONFIG_FORMAT_S16LE */ +#if CONFIG_FORMAT_S24LE || CONFIG_FORMAT_S32LE +static int32_t *mfcc_sink_copy_zero_s32(const struct audio_stream *sink, int32_t *w_ptr, + int samples) +{ + int copied; + int nmax; + int n; + + for (copied = 0; copied < samples; copied += n) { + nmax = samples - copied; + n = audio_stream_samples_without_wrap_s32(sink, w_ptr); + n = MIN(n, nmax); + memset(w_ptr, 0, n * sizeof(int32_t)); + w_ptr = audio_stream_wrap(sink, w_ptr + n); + } + + return w_ptr; +} + +static int32_t *mfcc_sink_copy_data_s32(const struct audio_stream *sink, int32_t *w_ptr, + int samples, int32_t *r_ptr) +{ + int copied; + int nmax; + int n; + + for (copied = 0; copied < samples; copied += n) { + nmax = samples - copied; + n = audio_stream_samples_without_wrap_s32(sink, w_ptr); + n = MIN(n, nmax); + /* Not using memcpy_s() due to speed need */ + memcpy(w_ptr, r_ptr, n * sizeof(int32_t)); + w_ptr = audio_stream_wrap(sink, w_ptr + n); + r_ptr += n; + } + + return w_ptr; +} +#endif /* CONFIG_FORMAT_S24LE || CONFIG_FORMAT_S32LE */ + #if CONFIG_FORMAT_S24LE +void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, int frames) +{ + struct audio_stream *sink = bsink->data; + struct mfcc_comp_data *cd = module_get_private_data(mod); + struct mfcc_state *state = &cd->state; + struct mfcc_buffer *buf = &cd->state.buf; + uint32_t magic = MFCC_MAGIC; + int32_t *w_ptr = audio_stream_get_wptr(sink); + const int num_magic = 1; /* one int32_t word for magic */ + int num_ceps; + int sink_samples; + int remain_s32; + int to_copy; + int k; + + /* Get samples from source buffer */ + mfcc_source_copy_s24(bsource, buf, &state->emph, frames, state->source_channel); + + /* Run STFT and processing after FFT */ + num_ceps = mfcc_stft_process(mod->dev, cd); + + /* If new output produced, set up pointer into scratch data */ + if (num_ceps > 0) { + if (state->mel_only) { + /* Convert mel_log_32 from Q9.23 to Q9.15 in-place */ + for (k = 0; k < num_ceps; k++) + state->mel_log_32[k] >>= 8; + + state->out_data_ptr_32 = state->mel_log_32; + } else { + state->out_data_ptr = state->cepstral_coef->data; + } + + state->out_remain = num_ceps; + state->magic_pending = true; + } + + /* Write to sink, limited by period size */ + sink_samples = frames * audio_stream_get_channels(sink); + + /* Write magic word first if pending */ + if (state->magic_pending && sink_samples >= num_magic) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic); + sink_samples -= num_magic; + state->magic_pending = false; + } + + if (state->mel_only) { + /* Write 32-bit mel data Q9.15, one value per int32_t */ + to_copy = MIN(state->out_remain, sink_samples); + if (to_copy > 0) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy, + state->out_data_ptr_32); + state->out_data_ptr_32 += to_copy; + state->out_remain -= to_copy; + sink_samples -= to_copy; + } + } else { + /* Write cepstral data packed as int32_t from scratch buffer */ + remain_s32 = (state->out_remain + 1) / 2; + to_copy = MIN(remain_s32, sink_samples); + if (to_copy > 0) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy, + (int32_t *)state->out_data_ptr); + state->out_data_ptr += to_copy * 2; + state->out_remain -= to_copy * 2; + if (state->out_remain < 0) + state->out_remain = 0; + + sink_samples -= to_copy; + } + } + + /* Zero-fill remaining sink samples */ + w_ptr = mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples); +} #endif /* CONFIG_FORMAT_S24LE */ #if CONFIG_FORMAT_S32LE +void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, int frames) +{ + struct audio_stream *sink = bsink->data; + struct mfcc_comp_data *cd = module_get_private_data(mod); + struct mfcc_state *state = &cd->state; + struct mfcc_buffer *buf = &cd->state.buf; + uint32_t magic = MFCC_MAGIC; + int32_t *w_ptr = audio_stream_get_wptr(sink); + const int num_magic = 1; /* one int32_t word for magic */ + int num_ceps; + int sink_samples; + int remain_s32; + int to_copy; + + /* Get samples from source buffer */ + mfcc_source_copy_s32(bsource, buf, &state->emph, frames, state->source_channel); + + /* Run STFT and processing after FFT */ + num_ceps = mfcc_stft_process(mod->dev, cd); + + /* If new output produced, set up pointer into scratch data */ + if (num_ceps > 0) { + if (state->mel_only) { + state->out_data_ptr_32 = state->mel_log_32; + } else { + state->out_data_ptr = state->cepstral_coef->data; + } + + state->out_remain = num_ceps; + state->magic_pending = true; + } + + /* Write to sink, limited by period size */ + sink_samples = frames * audio_stream_get_channels(sink); + + /* Write magic word first if pending */ + if (state->magic_pending && sink_samples >= num_magic) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic); + sink_samples -= num_magic; + state->magic_pending = false; + } + + if (state->mel_only) { + /* Write 32-bit mel data Q9.23, one value per int32_t */ + to_copy = MIN(state->out_remain, sink_samples); + if (to_copy > 0) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy, + state->out_data_ptr_32); + state->out_data_ptr_32 += to_copy; + state->out_remain -= to_copy; + sink_samples -= to_copy; + } + } else { + /* Write cepstral data packed as int32_t from scratch buffer */ + remain_s32 = (state->out_remain + 1) / 2; + to_copy = MIN(remain_s32, sink_samples); + if (to_copy > 0) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy, + (int32_t *)state->out_data_ptr); + state->out_data_ptr += to_copy * 2; + state->out_remain -= to_copy * 2; + if (state->out_remain < 0) + state->out_remain = 0; + + sink_samples -= to_copy; + } + } + + /* Zero-fill remaining sink samples */ + w_ptr = mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples); +} #endif /* CONFIG_FORMAT_S32LE */ diff --git a/src/audio/mfcc/mfcc_generic.c b/src/audio/mfcc/mfcc_generic.c index ecc95474326b..c6c699a2b724 100644 --- a/src/audio/mfcc/mfcc_generic.c +++ b/src/audio/mfcc/mfcc_generic.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause // -// Copyright(c) 2022 Intel Corporation. All rights reserved. +// Copyright(c) 2022-2026 Intel Corporation. // // Author: Seppo Ingalsuo @@ -26,53 +26,6 @@ * MFCC algorithm code */ -void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, - struct mfcc_pre_emph *emph, int frames, int source_channel) -{ - struct audio_stream *source = bsource->data; - int32_t s; - int16_t *x0; - int16_t *x = audio_stream_get_rptr(source); - int16_t *w = buf->w_ptr; - int copied; - int nmax; - int n1; - int n2; - int n; - int i; - int num_channels = audio_stream_get_channels(source); - - /* Copy from source to pre-buffer for FFT. - * The pre-emphasis filter is done in this step. - */ - for (copied = 0; copied < frames; copied += n) { - nmax = frames - copied; - n1 = audio_stream_frames_without_wrap(source, x); - n2 = mfcc_buffer_samples_without_wrap(buf, w); - n = MIN(n1, n2); - n = MIN(n, nmax); - x0 = x + source_channel; - for (i = 0; i < n; i++) { - if (emph->enable) { - /* Q1.15 x Q1.15 -> Q2.30 */ - s = (int32_t)emph->delay * emph->coef + Q_SHIFT_LEFT(*x0, 15, 30); - *w = sat_int16(Q_SHIFT_RND(s, 30, 15)); - emph->delay = *x0; - } else { - *w = *x0; - } - x0 += num_channels; - w++; - } - - x = audio_stream_wrap(source, x + n * audio_stream_get_channels(source)); - w = mfcc_buffer_wrap(buf, w); - } - buf->s_avail += copied; - buf->s_free -= copied; - buf->w_ptr = w; -} - void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, int prev_data_length) { @@ -98,47 +51,6 @@ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, buf->r_ptr = r; } -void mfcc_fill_fft_buffer(struct mfcc_state *state) -{ - struct mfcc_buffer *buf = &state->buf; - struct mfcc_fft *fft = &state->fft; - int16_t *r = buf->r_ptr; - int copied; - int nmax; - int idx = fft->fft_fill_start_idx; - int j; - int n; - - /* Copy overlapped samples from state buffer. Imaginary part of input - * remains zero. - */ - for (j = 0; j < state->prev_data_size; j++) - fft->fft_buf[idx + j].real = state->prev_data[j]; - - /* Copy hop size of new data from circular buffer */ - idx += state->prev_data_size; - for (copied = 0; copied < fft->fft_hop_size; copied += n) { - nmax = fft->fft_hop_size - copied; - n = mfcc_buffer_samples_without_wrap(buf, r); - n = MIN(n, nmax); - for (j = 0; j < n; j++) { - fft->fft_buf[idx].real = *r; - r++; - idx++; - } - r = mfcc_buffer_wrap(buf, r); - } - - buf->s_avail -= copied; - buf->s_free += copied; - buf->r_ptr = r; - - /* Copy for next time data back to overlap buffer */ - idx = fft->fft_fill_start_idx + fft->fft_hop_size; - for (j = 0; j < state->prev_data_size; j++) - state->prev_data[j] = fft->fft_buf[idx + j].real; -} - #ifdef MFCC_NORMALIZE_FFT int mfcc_normalize_fft_buffer(struct mfcc_state *state) { @@ -189,53 +101,160 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift) } #if CONFIG_FORMAT_S16LE - -int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, - int16_t *w_ptr, int samples) +void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int32_t s; + int16_t *x0; + int16_t *x = audio_stream_get_rptr(source); + int16_t *w = buf->w_ptr; int copied; int nmax; - int i; + int n1; + int n2; int n; + int i; + int num_channels = audio_stream_get_channels(source); - for (copied = 0; copied < samples; copied += n) { - nmax = samples - copied; - n = audio_stream_samples_without_wrap_s16(sink, w_ptr); + /* Copy from source to pre-buffer for FFT. + * The pre-emphasis filter is done in this step. + */ + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n1 = audio_stream_frames_without_wrap(source, x); + n2 = mfcc_buffer_samples_without_wrap(buf, w); + n = MIN(n1, n2); n = MIN(n, nmax); + x0 = x + source_channel; for (i = 0; i < n; i++) { - *w_ptr = 0; - w_ptr++; + if (emph->enable) { + /* Q1.15 x Q1.15 -> Q2.30 */ + s = (int32_t)emph->delay * emph->coef + Q_SHIFT_LEFT(*x0, 15, 30); + *w = sat_int16(Q_SHIFT_RND(s, 30, 15)); + emph->delay = *x0; + } else { + *w = *x0; + } + x0 += num_channels; + w++; } - w_ptr = audio_stream_wrap(sink, w_ptr); + x = audio_stream_wrap(source, x + n * audio_stream_get_channels(source)); + w = mfcc_buffer_wrap(buf, w); } - - return w_ptr; + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = w; } +#endif /* CONFIG_FORMAT_S16LE */ + +#if CONFIG_FORMAT_S24LE -int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, - int samples, int16_t *r_ptr) +void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int32_t tmp, s; + int32_t *x0; + int32_t *x = audio_stream_get_rptr(source); + int16_t *w = buf->w_ptr; int copied; int nmax; - int i; + int n1; + int n2; int n; + int i; + int num_channels = audio_stream_get_channels(source); - for (copied = 0; copied < samples; copied += n) { - nmax = samples - copied; - n = audio_stream_samples_without_wrap_s16(sink, w_ptr); + /* Copy from source to pre-buffer for FFT. + * The pre-emphasis filter is done in this step. + * S24_4LE data is in 32-bit container, shift left by 8 to Q1.31, + * then convert to Q1.15 with rounding. + */ + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n1 = audio_stream_frames_without_wrap(source, x); + n2 = mfcc_buffer_samples_without_wrap(buf, w); + n = MIN(n1, n2); n = MIN(n, nmax); + x0 = x + source_channel; for (i = 0; i < n; i++) { - *w_ptr = *r_ptr; - r_ptr++; - w_ptr++; + if (emph->enable) { + /* Convert to Q1.31, ignore highest byte */ + s = (int32_t)((uint32_t)*x0 << 8); + /* Q1.15 x Q1.15 -> Q2.30 */ + tmp = (int32_t)emph->delay * emph->coef + Q_SHIFT(s, 31, 30); + *w = sat_int16(Q_SHIFT_RND(tmp, 30, 15)); + emph->delay = sat_int16(Q_SHIFT_RND(s, 31, 15)); + } else { + /* Convert to Q1.31, ignore highest byte */ + s = (int32_t)((uint32_t)*x0 << 8); + *w = sat_int16(Q_SHIFT_RND(s, 31, 15)); + } + x0 += num_channels; + w++; } - w_ptr = audio_stream_wrap(sink, w_ptr); + x = audio_stream_wrap(source, x + n * audio_stream_get_channels(source)); + w = mfcc_buffer_wrap(buf, w); } + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = w; +} + +#endif /* CONFIG_FORMAT_S24LE */ - return w_ptr; +#if CONFIG_FORMAT_S32LE + +void mfcc_source_copy_s32(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) +{ + struct audio_stream *source = bsource->data; + int32_t s; + int32_t *x0; + int32_t *x = audio_stream_get_rptr(source); + int16_t *w = buf->w_ptr; + int copied; + int nmax; + int n1; + int n2; + int n; + int i; + int num_channels = audio_stream_get_channels(source); + + /* Copy from source to pre-buffer for FFT. + * The pre-emphasis filter is done in this step. + * S32 data is in 32-bit container, shift right by 16 to get 16-bit. + */ + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n1 = audio_stream_frames_without_wrap(source, x); + n2 = mfcc_buffer_samples_without_wrap(buf, w); + n = MIN(n1, n2); + n = MIN(n, nmax); + x0 = x + source_channel; + for (i = 0; i < n; i++) { + if (emph->enable) { + /* Q1.15 x Q1.15 -> Q2.30 */ + s = (int32_t)emph->delay * emph->coef + Q_SHIFT(*x0, 31, 30); + *w = sat_int16(Q_SHIFT_RND(s, 30, 15)); + emph->delay = sat_int16(Q_SHIFT_RND(*x0, 31, 15)); + } else { + *w = sat_int16(Q_SHIFT_RND(*x0, 31, 15)); + } + x0 += num_channels; + w++; + } + + x = audio_stream_wrap(source, x + n * audio_stream_get_channels(source)); + w = mfcc_buffer_wrap(buf, w); + } + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = w; } +#endif /* CONFIG_FORMAT_S32LE */ -#endif /* CONFIG_FORMAT_S16LE */ -#endif +#endif /* MFCC_GENERIC */ diff --git a/src/audio/mfcc/mfcc_hifi3.c b/src/audio/mfcc/mfcc_hifi3.c index b3b5d99967db..b9ed6c7f8380 100644 --- a/src/audio/mfcc/mfcc_hifi3.c +++ b/src/audio/mfcc/mfcc_hifi3.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause // -// Copyright(c) 2023 Intel Corporation. All rights reserved. +// Copyright(c) 2023-2026 Intel Corporation. // // Author: Andrula Song @@ -35,6 +35,7 @@ static inline void set_circular_buf0(const void *start, const void *end) * MFCC algorithm code */ +#if CONFIG_FORMAT_S16LE void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, struct mfcc_pre_emph *emph, int frames, int source_channel) { @@ -92,6 +93,7 @@ void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffe buf->s_free -= copied; buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S16LE */ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, int prev_data_length) @@ -126,50 +128,6 @@ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, buf->r_ptr = (void *)in; /* int16_t pointer but direct cast is not possible */ } -void mfcc_fill_fft_buffer(struct mfcc_state *state) -{ - struct mfcc_buffer *buf = &state->buf; - struct mfcc_fft *fft = &state->fft; - int idx = fft->fft_fill_start_idx; - ae_int16 *out = (ae_int16 *)&fft->fft_buf[idx].real; - ae_int16 *in = (ae_int16 *)state->prev_data; - ae_int16x4 sample; - const int buf_inc = sizeof(ae_int16); - const int fft_inc = sizeof(fft->fft_buf[0]); - int j; - - /* Copy overlapped samples from state buffer. Imaginary part of input - * remains zero. - */ - for (j = 0; j < state->prev_data_size; j++) { - AE_L16_XP(sample, in, buf_inc); - AE_S16_0_XP(sample, out, fft_inc); - } - - /* Copy hop size of new data from circular buffer */ - idx += state->prev_data_size; - in = (ae_int16 *)buf->r_ptr; - out = (ae_int16 *)&fft->fft_buf[idx].real; - set_circular_buf0(buf->addr, buf->end_addr); - for (j = 0; j < fft->fft_hop_size; j++) { - AE_L16_XC(sample, in, buf_inc); - AE_S16_0_XP(sample, out, fft_inc); - } - - buf->s_avail -= fft->fft_hop_size; - buf->s_free += fft->fft_hop_size; - buf->r_ptr = (int16_t *)in; - - /* Copy for next time data back to overlap buffer */ - idx = fft->fft_fill_start_idx + fft->fft_hop_size; - in = (ae_int16 *)&fft->fft_buf[idx].real; - out = (ae_int16 *)state->prev_data; - for (j = 0; j < state->prev_data_size; j++) { - AE_L16_XP(sample, in, fft_inc); - AE_S16_0_XP(sample, out, buf_inc); - } -} - #ifdef MFCC_NORMALIZE_FFT int mfcc_normalize_fft_buffer(struct mfcc_state *state) { @@ -192,6 +150,7 @@ int mfcc_normalize_fft_buffer(struct mfcc_state *state) return shift; } #endif + void mfcc_apply_window(struct mfcc_state *state, int input_shift) { struct mfcc_fft *fft = &state->fft; @@ -221,7 +180,8 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift) for (j = 0; j < fft->fft_size; j++) { AE_L32_IP(sample, fft_in, 0); AE_L16_XP(win, win_in, win_inc); - temp = AE_MULFP32X16X2RS_H(sample, win); + /* Data is 16-bit in 32-bit container, shift to Q1.31 for fractional multiply */ + sample = AE_SLAI32S(sample, 16); temp = AE_MULFP32X16X2RS_L(sample, win); temp = AE_SLAA32S(temp, input_shift); AE_S32_L_XP(temp, fft_in, fft_inc); @@ -229,65 +189,129 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift) #endif } -#if CONFIG_FORMAT_S16LE - -int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, - int16_t *w_ptr, int samples) +#if CONFIG_FORMAT_S24LE +void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int copied; + int nmax; + int n; int i; - int n = samples >> 2; - int m = samples & 0x03; - ae_int16x4 *out = (ae_int16x4 *)w_ptr; - const int inc = sizeof(ae_int16); - ae_valign outu = AE_ZALIGN64(); - ae_int16x4 zero = AE_ZERO16(); - - set_circular_buf0(sink->addr, sink->end_addr); - - for (i = 0; i < n; i++) - AE_SA16X4_IC(zero, outu, out); + int num_channels = audio_stream_get_channels(source); + ae_int32 *in; + ae_int32 *x = (ae_int32 *)audio_stream_get_rptr(source); + ae_int16 *out = (ae_int16 *)buf->w_ptr; + ae_int32x2 sample32; + ae_int16x4 sample; + ae_int32x2 temp; + ae_int16x4 coef = emph->coef; + ae_int16x4 delay; + const int in_inc = sizeof(ae_int32) * num_channels; - AE_SA64POS_FP(outu, out); - /* process the left samples that less than 4 - * one by one to avoid memory access overrun - */ - for (i = 0; i < m ; i++) - AE_S16_0_XC(zero, (ae_int16 *)out, inc); + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n = audio_stream_frames_without_wrap(source, x); + n = MIN(n, nmax); + nmax = mfcc_buffer_samples_without_wrap(buf, (int16_t *)out); + n = MIN(n, nmax); + in = x + source_channel; + if (emph->enable) { + delay = emph->delay; + for (i = 0; i < n; i++) { + AE_L32_XP(sample32, in, in_inc); + /* Shift left by 8 to sign-extend to Q1.31 */ + sample32 = AE_SLAI32(sample32, 8); + /* Then shift right by 16 to get 16-bit */ + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + /* Q1.15 -> Q1.31 */ + temp = AE_CVT32X2F16_10(sample); + AE_MULAF16SS_00(temp, delay, coef); + delay = sample; + sample = AE_ROUND16X4F32SSYM(temp, temp); + AE_S16_0_IP(sample, out, 2); + } + emph->delay = delay; + } else { + for (i = 0; i < n; i++) { + AE_L32_XP(sample32, in, in_inc); + /* Shift left by 8 to sign-extend to Q1.31 */ + sample32 = AE_SLAI32(sample32, 8); + /* Then shift right by 16 to get 16-bit */ + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + AE_S16_0_IP(sample, out, 2); + } + } - return (int16_t *)out; + x = audio_stream_wrap(source, x + n * num_channels); + out = (ae_int16 *)mfcc_buffer_wrap(buf, (int16_t *)out); + } + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S24LE */ -int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, - int samples, int16_t *r_ptr) +#if CONFIG_FORMAT_S32LE +void mfcc_source_copy_s32(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int copied; + int nmax; + int n; int i; - int n = samples >> 2; - int m = samples & 0x03; - ae_int16x4 *out = (ae_int16x4 *)w_ptr; - ae_int16x4 *in = (ae_int16x4 *)r_ptr; - ae_valign outu = AE_ZALIGN64(); - ae_valign inu = AE_ZALIGN64(); - const int inc = sizeof(ae_int16); - ae_int16x4 in_sample; + int num_channels = audio_stream_get_channels(source); + ae_int32 *in; + ae_int32 *x = (ae_int32 *)audio_stream_get_rptr(source); + ae_int16 *out = (ae_int16 *)buf->w_ptr; + ae_int32x2 sample32; + ae_int16x4 sample; + ae_int32x2 temp; + ae_int16x4 coef = emph->coef; + ae_int16x4 delay; + const int in_inc = sizeof(ae_int32) * num_channels; - set_circular_buf0(sink->addr, sink->end_addr); + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n = audio_stream_frames_without_wrap(source, x); + n = MIN(n, nmax); + nmax = mfcc_buffer_samples_without_wrap(buf, (int16_t *)out); + n = MIN(n, nmax); + in = x + source_channel; + if (emph->enable) { + delay = emph->delay; + for (i = 0; i < n; i++) { + AE_L32_XP(sample32, in, in_inc); + /* S32: shift right by 16 to get 16-bit */ + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + /* Q1.15 -> Q1.31 */ + temp = AE_CVT32X2F16_10(sample); + AE_MULAF16SS_00(temp, delay, coef); + delay = sample; + sample = AE_ROUND16X4F32SSYM(temp, temp); + AE_S16_0_IP(sample, out, 2); + } + emph->delay = delay; + } else { + for (i = 0; i < n; i++) { + AE_L32_XP(sample32, in, in_inc); + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + AE_S16_0_IP(sample, out, 2); + } + } - inu = AE_LA64_PP(in); - for (i = 0; i < n; i++) { - AE_LA16X4_IP(in_sample, inu, in); - AE_SA16X4_IC(in_sample, outu, out); - } - AE_SA64POS_FP(outu, out); - /* process the left samples that less than 4 - * one by one to avoid memory access overrun - */ - for (i = 0; i < m ; i++) { - AE_L16_XP(in_sample, (ae_int16 *)in, inc); - AE_S16_0_XC(in_sample, (ae_int16 *)out, inc); + x = audio_stream_wrap(source, x + n * num_channels); + out = (ae_int16 *)mfcc_buffer_wrap(buf, (int16_t *)out); } - - return (int16_t *)out; + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S32LE */ -#endif /* CONFIG_FORMAT_S16LE */ -#endif +#endif /* MFCC_HIFI3 */ diff --git a/src/audio/mfcc/mfcc_hifi4.c b/src/audio/mfcc/mfcc_hifi4.c index 60a4de62ec23..511a9bbf1dca 100644 --- a/src/audio/mfcc/mfcc_hifi4.c +++ b/src/audio/mfcc/mfcc_hifi4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause // -// Copyright(c) 2023 Intel Corporation. All rights reserved. +// Copyright(c) 2023-2026 Intel Corporation. // // Author: Andrula Song @@ -41,6 +41,8 @@ static inline void set_circular_buf1(const void *start, const void *end) /* * MFCC algorithm code */ + +#if CONFIG_FORMAT_S16LE void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, struct mfcc_pre_emph *emph, int frames, int source_channel) { @@ -87,6 +89,7 @@ void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffe buf->s_free -= frames; buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S16LE */ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, int prev_data_length) @@ -121,50 +124,6 @@ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, buf->r_ptr = (int16_t *)in; } -void mfcc_fill_fft_buffer(struct mfcc_state *state) -{ - struct mfcc_buffer *buf = &state->buf; - struct mfcc_fft *fft = &state->fft; - int idx = fft->fft_fill_start_idx; - ae_int16 *out = (ae_int16 *)&fft->fft_buf[idx].real; - ae_int16 *in = (ae_int16 *)state->prev_data; - ae_int16x4 sample; - const int buf_inc = sizeof(ae_int16); - const int fft_inc = sizeof(fft->fft_buf[0]); - int j; - - /* Copy overlapped samples from state buffer. Imaginary part of input - * remains zero. - */ - for (j = 0; j < state->prev_data_size; j++) { - AE_L16_XP(sample, in, buf_inc); - AE_S16_0_XP(sample, out, fft_inc); - } - - /* Copy hop size of new data from circular buffer */ - idx += state->prev_data_size; - in = (ae_int16 *)buf->r_ptr; - out = (ae_int16 *)&fft->fft_buf[idx].real; - set_circular_buf0(buf->addr, buf->end_addr); - for (j = 0; j < fft->fft_hop_size; j++) { - AE_L16_XC(sample, in, buf_inc); - AE_S16_0_XP(sample, out, fft_inc); - } - - buf->s_avail -= fft->fft_hop_size; - buf->s_free += fft->fft_hop_size; - buf->r_ptr = (int16_t *)in; - - /* Copy for next time data back to overlap buffer */ - idx = fft->fft_fill_start_idx + fft->fft_hop_size; - in = (ae_int16 *)&fft->fft_buf[idx].real; - out = (ae_int16 *)state->prev_data; - for (j = 0; j < state->prev_data_size; j++) { - AE_L16_XP(sample, in, fft_inc); - AE_S16_0_XP(sample, out, buf_inc); - } -} - #ifdef MFCC_NORMALIZE_FFT int mfcc_normalize_fft_buffer(struct mfcc_state *state) { @@ -217,7 +176,8 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift) for (j = 0; j < fft->fft_size; j++) { AE_L32_IP(sample, fft_in, 0); AE_L16_XP(win, win_in, win_inc); - temp = AE_MULFP32X16X2RS_H(sample, win); + /* Data is 16-bit in 32-bit container, shift to Q1.31 for fractional multiply */ + sample = AE_SLAI32S(sample, 16); temp = AE_MULFP32X16X2RS_L(sample, win); temp = AE_SLAA32S(temp, input_shift); AE_S32_L_XP(temp, fft_in, fft_inc); @@ -225,65 +185,111 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift) #endif } -#if CONFIG_FORMAT_S16LE - -int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, - int16_t *w_ptr, int samples) +#if CONFIG_FORMAT_S24LE +void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int num_channels = audio_stream_get_channels(source); + ae_int32 *in = (ae_int32 *)source->r_ptr + source_channel; + ae_int16 *out = (ae_int16 *)buf->w_ptr; + ae_int32x2 sample32; + ae_int16x4 sample; + ae_int32x2 temp; + ae_int16x4 coef; + ae_int16x4 delay; + const int in_inc = sizeof(ae_int32) * num_channels; + const int out_inc = sizeof(ae_int16); int i; - int n = samples >> 2; - int m = samples & 0x03; - ae_int16x4 *out = (ae_int16x4 *)w_ptr; - const int inc = sizeof(ae_int16); - ae_valign outu = AE_ZALIGN64(); - ae_int16x4 zero = AE_ZERO16(); - - set_circular_buf0(sink->addr, sink->end_addr); - for (i = 0; i < n; i++) - AE_SA16X4_IC(zero, outu, out); + set_circular_buf1(buf->addr, buf->end_addr); + set_circular_buf0(source->addr, source->end_addr); - AE_SA64POS_FP(outu, out); - /* process the left samples that less than 4 - * one by one to avoid memory access overrun - */ - for (i = 0; i < m ; i++) - AE_S16_0_XC(zero, (ae_int16 *)out, inc); + if (emph->enable) { + delay = emph->delay; + coef = emph->coef; + for (i = 0; i < frames; i++) { + AE_L32_XC(sample32, in, in_inc); + /* Shift left by 8 to sign-extend to Q1.31 */ + sample32 = AE_SLAI32(sample32, 8); + /* Then shift right by 16 to get 16-bit */ + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + /* Q1.15 -> Q1.31 */ + temp = AE_CVT32X2F16_10(sample); + AE_MULAF16SS_00(temp, delay, coef); + delay = sample; + sample = AE_ROUND16X4F32SSYM(temp, temp); + AE_S16_0_XC1(sample, out, out_inc); + } + emph->delay = delay; + } else { + for (i = 0; i < frames; i++) { + AE_L32_XC(sample32, in, in_inc); + /* Shift left by 8 to sign-extend to Q1.31 */ + sample32 = AE_SLAI32(sample32, 8); + /* Then shift right by 16 to get 16-bit */ + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + AE_S16_0_XC1(sample, out, out_inc); + } + } - return (int16_t *)out; + buf->s_avail += frames; + buf->s_free -= frames; + buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S24LE */ -int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, - int samples, int16_t *r_ptr) +#if CONFIG_FORMAT_S32LE +void mfcc_source_copy_s32(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int num_channels = audio_stream_get_channels(source); + ae_int32 *in = (ae_int32 *)source->r_ptr + source_channel; + ae_int16 *out = (ae_int16 *)buf->w_ptr; + ae_int32x2 sample32; + ae_int16x4 sample; + ae_int32x2 temp; + ae_int16x4 coef; + ae_int16x4 delay; + const int in_inc = sizeof(ae_int32) * num_channels; + const int out_inc = sizeof(ae_int16); int i; - int n = samples >> 2; - int m = samples & 0x03; - ae_int16x4 *out = (ae_int16x4 *)w_ptr; - ae_int16x4 *in = (ae_int16x4 *)r_ptr; - ae_valign outu = AE_ZALIGN64(); - ae_valign inu = AE_ZALIGN64(); - const int inc = sizeof(ae_int16); - ae_int16x4 in_sample; - set_circular_buf0(sink->addr, sink->end_addr); + set_circular_buf1(buf->addr, buf->end_addr); + set_circular_buf0(source->addr, source->end_addr); - inu = AE_LA64_PP(in); - for (i = 0; i < n; i++) { - AE_LA16X4_IP(in_sample, inu, in); - AE_SA16X4_IC(in_sample, outu, out); - } - AE_SA64POS_FP(outu, out); - /* process the left samples that less than 4 - * one by one to avoid memory access overrun - */ - for (i = 0; i < m ; i++) { - AE_L16_XP(in_sample, (ae_int16 *)in, inc); - AE_S16_0_XC(in_sample, (ae_int16 *)out, inc); + if (emph->enable) { + delay = emph->delay; + coef = emph->coef; + for (i = 0; i < frames; i++) { + AE_L32_XC(sample32, in, in_inc); + /* S32: shift right by 16 to get 16-bit */ + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + /* Q1.15 -> Q1.31 */ + temp = AE_CVT32X2F16_10(sample); + AE_MULAF16SS_00(temp, delay, coef); + delay = sample; + sample = AE_ROUND16X4F32SSYM(temp, temp); + AE_S16_0_XC1(sample, out, out_inc); + } + emph->delay = delay; + } else { + for (i = 0; i < frames; i++) { + AE_L32_XC(sample32, in, in_inc); + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + AE_S16_0_XC1(sample, out, out_inc); + } } - return (int16_t *)out; + buf->s_avail += frames; + buf->s_free -= frames; + buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S32LE */ -#endif /* CONFIG_FORMAT_S16LE */ -#endif +#endif /* MFCC_HIFI4 */ diff --git a/src/audio/mfcc/mfcc_setup.c b/src/audio/mfcc/mfcc_setup.c index dded450673ad..a936371ccd1c 100644 --- a/src/audio/mfcc/mfcc_setup.c +++ b/src/audio/mfcc/mfcc_setup.c @@ -50,10 +50,12 @@ static int mfcc_get_window(struct mfcc_state *state, enum sof_mfcc_fft_window_ty case MFCC_HAMMING_WINDOW: win_hamming_16b(state->window, fft->fft_size); return 0; + case MFCC_HANN_WINDOW: + win_hann_16b(state->window, fft->fft_size); + return 0; case MFCC_POVEY_WINDOW: win_povey_16b(state->window, fft->fft_size); return 0; - default: return -EINVAL; } @@ -139,10 +141,9 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i return -EINVAL; } - comp_info(dev, "source_channel = %d, stream_channels = %d", - config->channel, channels); - if (config->channel >= channels) { - comp_err(dev, "Illegal channel"); + if (config->channel >= channels || (config->channel < 0 && channels != 1)) { + comp_err(dev, "Illegal source_channel %d for stream channels %d", config->channel, + channels); return -EINVAL; } @@ -151,6 +152,7 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i else state->source_channel = config->channel; + state->mmax = (int32_t)config->mmax_init << 16; /* Q9.7 -> Q9.23 */ state->emph.enable = config->preemphasis_coefficient > 0; state->emph.coef = -config->preemphasis_coefficient; /* Negate config parameter */ fft->fft_size = config->frame_length; @@ -224,7 +226,7 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i ret = mfcc_get_window(state, config->window); if (ret < 0) { comp_err(dev, "Failed Window function"); - goto free_fft_out; + goto free_fft_plan; } /* Setup Mel auditory filterbank. FFT input and output buffers are used @@ -246,39 +248,53 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i ret = mod_psy_get_mel_filterbank(mod, fb); if (ret < 0) { comp_err(dev, "Failed Mel filterbank"); - goto free_fft_out; - } - - /* Setup DCT */ - dct->num_in = config->num_mel_bins; - dct->num_out = config->num_ceps; - dct->type = (enum dct_type)config->dct; - dct->ortho = true; - ret = mod_dct_initialize_16(mod, dct); - if (ret < 0) { - comp_err(dev, "Failed DCT init"); - goto free_melfb_data; + goto free_fft_plan; } - state->lifter.num_ceps = config->num_ceps; - state->lifter.cepstral_lifter = config->cepstral_lifter; /* Q7.9 max 64.0*/ - ret = mfcc_get_cepstral_lifter(mod, &state->lifter); - if (ret < 0) { - comp_err(dev, "Failed cepstral lifter"); - goto free_dct_matrix; + /* Setup DCT and cepstral lifter only when num_ceps > 0. + * When num_ceps is zero, skip DCT/lifter and output Mel + * log spectra directly. + */ + if (config->num_ceps > 0) { + dct->num_in = config->num_mel_bins; + dct->num_out = config->num_ceps; + dct->type = (enum dct_type)config->dct; + dct->ortho = true; + ret = mod_dct_initialize_16(mod, dct); + if (ret < 0) { + comp_err(dev, "Failed DCT init"); + goto free_melfb_data; + } + + state->lifter.num_ceps = config->num_ceps; + state->lifter.cepstral_lifter = config->cepstral_lifter; /* Q7.9 max 64.0*/ + ret = mfcc_get_cepstral_lifter(mod, &state->lifter); + if (ret < 0) { + comp_err(dev, "Failed cepstral lifter"); + goto free_dct_matrix; + } + + state->mel_only = false; + } else { + comp_info(dev, "num_ceps is 0, Mel log spectra output mode"); + dct->num_in = config->num_mel_bins; + dct->num_out = 0; + dct->matrix = NULL; + state->lifter.matrix = NULL; + state->mel_only = true; } /* Scratch overlay during runtime * * +--------------------------------------------------------+ - * | 1. fft_buf[], 16 bits,size x 4, e.g. 512 -> 2048 bytes | + * | 1. fft_buf[], 32 bits, size x 8, e.g. 512 -> 4096 bytes| + * +-------------------------------------+------------------+ + * | 3. power_spectra[], | 6. mel_log_32[], | + * | 32 bits, e.g. x257 -> 1028 bytes | 32b, 92 bytes | * +-------------------------------------+------------------+ - * | 3. power_spectra[], | - * | 32 bits, e.g. x257 -> 1028 bytes | - * +-------------------------------------+ * * +---------------------------------------------------------------------------------+ - * | 2. fft_out[], 16 bits,size x 4, e.g. 512 -> 2048 bytes | + * | 2. fft_out[], 32 bits, size x 8, e.g. 512 -> 4096 bytes | * +----------------------------------+----------------------------------+-----------+ * | 4. mel_spectra[], | 5. cepstral_coef[], | * | 16 bits, e.g. x23 -> 46 bytes | 16 bits, e.g. 13x -> 26 bytes | @@ -288,31 +304,78 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i /* Use FFT buffer as scratch for later computed data */ state->power_spectra = (int32_t *)&fft->fft_buf[0]; + state->mel_log_32 = &state->power_spectra[fft->half_fft_size]; + + /* Check that mel_log_32 fits in the remaining fft_buf scratch space */ + int mel_log_32_space = (int)(fft->fft_buffer_size / sizeof(int32_t)) - fft->half_fft_size; + + if (config->num_mel_bins > mel_log_32_space) { + comp_err(dev, "num_mel_bins %d exceeds mel_log_32 scratch space %d", + config->num_mel_bins, mel_log_32_space); + ret = -EINVAL; + goto free_lifter; + } + state->mel_spectra = (struct mat_matrix_16b *)&fft->fft_out[0]; - state->cepstral_coef = (struct mat_matrix_16b *) - &state->mel_spectra->data[state->dct.num_in]; + if (!state->mel_only) { + state->cepstral_coef = + (struct mat_matrix_16b *)&state->mel_spectra->data[state->dct.num_in]; + } else { + state->cepstral_coef = NULL; + } + + /* Allocate output buffer for multi-period output. Size allows for + * current output data plus leftover from previous period. + */ + int max_out_per_hop = state->mel_only ? dct->num_in : dct->num_out; + + /* Check that output data can be drained within the periods spanned by one + * FFT hop. Each hop consumes fft_hop_size input samples and produces + * max_out_per_hop + 2 (magic) int16_t output values. The sink provides at + * least fft_hop_size * channels int16_t samples per hop (worst case s16). + * If output exceeds this, data accumulates and will eventually overflow. + */ + int out_per_hop = max_out_per_hop + 2; + int sink_per_hop = fft->fft_hop_size * channels; + + if (out_per_hop > sink_per_hop) { + comp_err(dev, "Output %d int16 per hop exceeds sink capacity %d (hop %d x ch %d)", + out_per_hop, sink_per_hop, fft->fft_hop_size, channels); + ret = -EINVAL; + goto free_lifter; + } /* Set initial state for STFT */ state->waiting_fill = true; state->prev_samples_valid = false; + state->magic_pending = false; + state->out_data_ptr = NULL; + state->out_data_ptr_32 = NULL; + state->out_remain = 0; comp_dbg(dev, "done"); return 0; +free_lifter: + mod_free(mod, state->lifter.matrix); + free_dct_matrix: - rfree(state->dct.matrix); + mod_free(mod, state->dct.matrix); free_melfb_data: - rfree(fb->data); + mod_free(mod, fb->data); + +free_fft_plan: + mod_fft_plan_free(mod, fft->fft_plan); free_fft_out: - rfree(fft->fft_out); + mod_free(mod, fft->fft_out); free_fft_buf: - rfree(fft->fft_buf); + mod_free(mod, fft->fft_buf); free_buffers: - rfree(state->buffers); + mod_free(mod, state->buffers); exit: return ret; diff --git a/src/audio/mfcc/tune/README.txt b/src/audio/mfcc/tune/README.txt index fb8208992ed4..a0c3189e81a3 100644 --- a/src/audio/mfcc/tune/README.txt +++ b/src/audio/mfcc/tune/README.txt @@ -8,20 +8,43 @@ need to be created with "scripts/build-tools.sh -t". Next the testbench is build with "scripts/rebuild-testbench.sh". Once the previous steps are done, a sample wav file can be processed -into stream of cepstral coefficients with script run_mfcc.sh. E.g. -next command processes an ALSA test file with speech clip "front center". -The output file is hard-coded to mfcc.raw. +with script run_mfcc.sh. The script converts the input to raw 16 kHz +stereo format and runs the testbench for S16, S24, and S32 bit depths, +producing both cepstral coefficient (MFCC) and Mel spectrogram outputs. ./run_mfcc.sh /usr/share/sounds/alsa/Front_Center.wav -The output can be plotted and retrieved with Matlab or Octave command: +Output files from host testbench: + mfcc_s16.raw, mfcc_s24.raw, mfcc_s32.raw - cepstral coefficients + mel_s16.raw, mel_s24.raw, mel_s32.raw - Mel spectrogram -[ceps, t, n] = decode_ceps('mfcc.raw', 13); +If the XTENSA_PATH environment variable is set, the script also runs +the Xtensa build of the testbench (via xt-run) and produces additional +output files prefixed with "xt_": + xt_mfcc_s16.raw, xt_mfcc_s24.raw, xt_mfcc_s32.raw + xt_mel_s16.raw, xt_mel_s24.raw, xt_mel_s32.raw + +All output files can be decoded and plotted at once in Matlab or Octave +with the decode_all.m script: + +decode_all + +This calls decode_ceps for each MFCC file (13 cepstral coefficients) and +decode_mel for each Mel file (80 Mel bins), plotting spectrograms for all +files that exist including the Xtensa variants. + +Individual files can also be decoded manually: + +[ceps, t, n] = decode_ceps('mfcc_s16.raw', 13); In the above it's known from configuration script that MFCC was set up to output 13 cepstral coefficients from each FFT -> Mel -> DCT -> Cepstral coefficients computation run. +The 80 bands Mel output can be visualized with command: + +[mel, t, n] = decode_mel('mel_s16.raw', 80); + Other kind of signals have quite big visual difference in audio features. Try e.g. other sound files found in computer. diff --git a/src/audio/mfcc/tune/decode_all.m b/src/audio/mfcc/tune/decode_all.m new file mode 100644 index 000000000000..d5b60289b4cf --- /dev/null +++ b/src/audio/mfcc/tune/decode_all.m @@ -0,0 +1,39 @@ +% decode_all.m - Decode all MFCC and Mel raw output files from run_mfcc.sh +% +% SPDX-License-Identifier: BSD-3-Clause +% Copyright(c) 2026 Intel Corporation. + +num_ceps = 13; +num_mel = 80; + +% MFCC cepstral output files +ceps_files = {'mfcc_s16.raw', 'mfcc_s24.raw', 'mfcc_s32.raw'}; + +% Mel output files with corresponding format +mel_files = {'mel_s16.raw', 'mel_s24.raw', 'mel_s32.raw'}; +mel_fmts = {'s16', 's24', 's32'}; + +% Xtensa prefixed variants +xt_ceps_files = {'xt_mfcc_s16.raw', 'xt_mfcc_s24.raw', 'xt_mfcc_s32.raw'}; +xt_mel_files = {'xt_mel_s16.raw', 'xt_mel_s24.raw', 'xt_mel_s32.raw'}; + +all_ceps_files = [ceps_files, xt_ceps_files]; +all_mel_files = [mel_files, xt_mel_files]; +all_mel_fmts = [mel_fmts, mel_fmts]; + +for i = 1:length(all_ceps_files) + fn = all_ceps_files{i}; + if exist(fn, 'file') + fprintf('Decoding MFCC ceps: %s\n', fn); + [ceps, t, n] = decode_ceps(fn, num_ceps); + end +end + +for i = 1:length(all_mel_files) + fn = all_mel_files{i}; + fmt = all_mel_fmts{i}; + if exist(fn, 'file') + fprintf('Decoding Mel: %s\n', fn); + [mel, t, n] = decode_mel(fn, num_mel, fmt); + end +end diff --git a/src/audio/mfcc/tune/decode_mel.m b/src/audio/mfcc/tune/decode_mel.m new file mode 100644 index 000000000000..899d17ac72bd --- /dev/null +++ b/src/audio/mfcc/tune/decode_mel.m @@ -0,0 +1,138 @@ +% [mel, t, n] = decode_mel(fn, num_mel, fmt, num_channels) +% +% Input +% fn - File with MFCC data in .raw or .wav format +% num_mel - number of Mel coefficients per frame +% fmt - format of the MFCC data ('s16', 's24', 's32') +% num_channels - needed for .raw format, omit for .wav +% +% Outputs +% mel - Mel coefficients +% t - time vector for plotting +% n - mel 1..num_mel vector for plotting + +% SPDX-License-Identifier: BSD-3-Clause +% Copyright(c) 2026 Intel Corporation. + +function [mel, t, n] = decode_mel(fn, num_mel, fmt, num_channels) + +if nargin < 3 + fmt = 's16'; +end +if nargin < 4 + num_channels = 1; +end + +% MFCC stream +fs = 16e3; + +switch fmt + case 's16' + qformat = 7; + magic = [25443 28006]; % ASCII 'mfcc' as two int16 + num_magic = 2; + case 's24' + qformat = 15; + magic = int32(1835426659); % 0x6D666363 as int32 + num_magic = 1; + case 's32' + qformat = 23; + magic = int32(1835426659); % 0x6D666363 as int32 + num_magic = 1; +end + +% Load output data +[data, num_channels] = get_file(fn, num_channels, fmt); + +if strcmp(fmt, 's16') + idx1 = find(data == magic(1)); + idx = []; + for i = 1:length(idx1) + if data(idx1(i) + 1) == magic(2) + idx = [idx idx1(i)]; + end + end +else + idx = find(data == magic); +end + +if isempty(idx) + error('No magic value markers found from stream'); +end + +period_mel = idx(2)-idx(1); +num_frames = length(idx); + +% Last frame can be incomplete due to span over multiple periods +last = idx(end) + num_mel - 1; +if (last > length(data)) + num_frames = num_frames - 1; +end + +t_mel = period_mel / num_channels / fs; +t = (0:num_frames -1) * t_mel; +n = 1:num_mel; + +mel = zeros(num_mel, num_frames); +for i = 1:num_frames + i1 = idx(i) + num_magic; + i2 = i1 + num_mel - 1; + mel(:,i) = double(data(i1:i2)) / 2^qformat; +end + +figure; +imagesc(t, n, mel); +axis xy; +colormap(jet); +colorbar; +tstr = sprintf('SOF MFCC Mel coefficients (%s)', fn); +title(tstr, 'Interpreter', 'None'); +xlabel('Time (s)'); +ylabel('Mel coef #'); + +end + +function [data, num_channels] = get_file(fn, num_channels, fmt) + +[~, ~, ext] = fileparts(fn); + +switch fmt + case 's16' + read_fmt = 'int16'; + case {'s24', 's32'} + read_fmt = 'int32'; +end + +switch lower(ext) + case '.raw' + fh = fopen(fn, 'r'); + data = fread(fh, read_fmt); + fclose(fh); + case '.wav' + tmp = audioread(fn, 'native'); + t = whos('tmp'); + switch fmt + case 's16' + if ~strcmp(t.class, 'int16') + error('Expected 16-bit wav for s16 format'); + end + case {'s24', 's32'} + if ~strcmp(t.class, 'int32') + error('Expected 32-bit wav for %s format', fmt); + end + end + s = size(tmp); + num_channels = s(2); + if num_channels > 1 + data = zeros(prod(s), 1, t.class); + for i = 1:num_channels + data(i:num_channels:end) = tmp(:, i); + end + else + data = tmp; + end + otherwise + error('Unknown audio format'); +end + +end diff --git a/src/audio/mfcc/tune/run_mfcc.sh b/src/audio/mfcc/tune/run_mfcc.sh index d531e4519755..e3c309fbc03e 100755 --- a/src/audio/mfcc/tune/run_mfcc.sh +++ b/src/audio/mfcc/tune/run_mfcc.sh @@ -4,19 +4,52 @@ set -e -RAW_INPUT=in.raw -RAW_OUTPUT=mfcc.raw +RAW_INPUT_S16=in_s16.raw +RAW_INPUT_S24=in_s24.raw +RAW_INPUT_S32=in_s32.raw +VALGRIND="valgrind --leak-check=full" +#VALGRIND="" TESTBENCH=$SOF_WORKSPACE/sof/tools/testbench/build_testbench/install/bin/sof-testbench4 -TOPOLOGY=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfcc16.tplg -OPT="-r 16000 -c 2 -b S16_LE -p 3,4 -t $TOPOLOGY -i $RAW_INPUT -o $RAW_OUTPUT" +TESTBENCH_RUN="$VALGRIND $TESTBENCH" -# Convert input audio file raw 16 kHz 1 channel 16 bit -sox --encoding signed-integer "$1" -L -r 16000 -c 1 -b 16 "$RAW_INPUT" +convert_input() { + sox -R --encoding signed-integer "$1" -L -r 16000 -c 2 -b 16 "$RAW_INPUT_S16" + sox -R --no-dither --encoding signed-integer -L -r 16000 -c 2 -b 16 \ + "$RAW_INPUT_S16" -b 32 "$RAW_INPUT_S32" + sox -R --no-dither --encoding signed-integer -L -r 16000 -c 2 -b 16 \ + "$RAW_INPUT_S16" -b 32 "$RAW_INPUT_S24" vol 0.003906250000 +} -# Run testbench -$TESTBENCH $OPT -i "$RAW_INPUT" -o "$RAW_OUTPUT" +run_testbench() { + local tplg_base="$1" + local out_s16="$2" + local out_s24="$3" + local out_s32="$4" + local label="$5" + local tplg_s16="${SOF_WORKSPACE}/sof/tools/build_tools/topology/topology2/development/${tplg_base}16.tplg" + local tplg_s24="${SOF_WORKSPACE}/sof/tools/build_tools/topology/topology2/development/${tplg_base}24.tplg" + local tplg_s32="${SOF_WORKSPACE}/sof/tools/build_tools/topology/topology2/development/${tplg_base}32.tplg" -echo ----------------------------------------------- -echo The MFCC data was output to file $RAW_OUTPUT -echo ----------------------------------------------- + $TESTBENCH_RUN -r 16000 -c 2 -b S16_LE -p 3,4 -t "$tplg_s16" -i "$RAW_INPUT_S16" -o "$out_s16" + $TESTBENCH_RUN -r 16000 -c 2 -b S24_LE -p 3,4 -t "$tplg_s24" -i "$RAW_INPUT_S24" -o "$out_s24" + $TESTBENCH_RUN -r 16000 -c 2 -b S32_LE -p 3,4 -t "$tplg_s32" -i "$RAW_INPUT_S32" -o "$out_s32" + + echo ---------------------------------------------------------------------------------- + echo "The ${label} data was output to file ${out_s16}, ${out_s24}, ${out_s32}" + echo ---------------------------------------------------------------------------------- +} + +main() { + convert_input "$1" + run_testbench "sof-hda-benchmark-mfcc" mfcc_s16.raw mfcc_s24.raw mfcc_s32.raw "MFCC" + run_testbench "sof-hda-benchmark-mfccmel" mel_s16.raw mel_s24.raw mel_s32.raw "MFCC Mel" + + if [ -n "$XTENSA_PATH" ]; then + TESTBENCH_RUN="$XTENSA_PATH/xt-run $SOF_WORKSPACE/sof/tools/testbench/build_xt_testbench/sof-testbench4" + run_testbench "sof-hda-benchmark-mfcc" xt_mfcc_s16.raw xt_mfcc_s24.raw xt_mfcc_s32.raw "Xtensa MFCC" + run_testbench "sof-hda-benchmark-mfccmel" xt_mel_s16.raw xt_mel_s24.raw xt_mel_s32.raw "Xtensa MFCC Mel" + fi +} + +main "$@" diff --git a/src/audio/mfcc/tune/setup_mfcc.m b/src/audio/mfcc/tune/setup_mfcc.m index e0d42e1e034d..bd2b3f11e60b 100644 --- a/src/audio/mfcc/tune/setup_mfcc.m +++ b/src/audio/mfcc/tune/setup_mfcc.m @@ -1,23 +1,36 @@ -% setup_mfcc(cfg) +% setup_mfcc() % -% Input -% cfg - optional MFCC configuration parameters struct, see -% below from code -% -% Create binary configuration blob for MFCC component. The hex data -% is written to tools/topology/topology2/include/components/mfcc and -% tools/topology/topology1/m4/mfcc. +% Create binary configuration blobs for the MFCC component. +% The hex data is written to files in directory +% tools/topology/topology2/include/components/mfcc. % SPDX-License-Identifier: BSD-3-Clause % -% Copyright (c) 2018-2026, Intel Corporation. All rights reserved. +% Copyright (c) 2018-2026, Intel Corporation. + +function setup_mfcc() + + gen_cfg.tplg_ver = 2; + gen_cfg.ipc_ver = 4; + gen_cfg.tools_path = '../../../../tools/'; + gen_cfg.mfcc_conf_path = [gen_cfg.tools_path 'topology/topology2/include/components/mfcc/']; + + % Default blob + setup = get_mfcc_default_config(); + setup.tplg_fn = 'default.conf'; + export_mfcc_setup(gen_cfg, setup); -function setup_mfcc(cfg) + % Blob for mel spectrogram data + setup = get_mel_spectrogram_config(); + setup.tplg_fn = 'mel80.conf'; + export_mfcc_setup(gen_cfg, setup); -if nargin < 1 +end + +function cfg = get_mfcc_default_config() cfg.blackman_coef = 0.42; cfg.cepstral_lifter = 22.0; - cfg.channel = -1; % -1 expect mono, 0 left, 1 right ... + cfg.channel = 0; % -1 expect mono, 0 left, 1 right ... cfg.dither = 0.0; % no support cfg.energy_floor = 1.0; cfg.frame_length = 25.0; % ms @@ -44,26 +57,54 @@ function setup_mfcc(cfg) cfg.mel_log = 'log'; % Set to 'db' for librosa, set to 'log10' for matlab cfg.pmin = 5e-10; % Set to 1e-10 for librosa cfg.top_db = 200; % Set to 80 for librosa + cfg.mel_offset = 0; % For mel_only mode, no impact with num_ceps > 0 + cfg.mel_scale = 0; % same + cfg.mmax_init = 0; % same + cfg.mmax_coef = 0; % same + cfg.dynamic_mmax = false; % same end -cfg.tools = '../../../../tools/'; - -cfg.tplg_fn = [cfg.tools 'topology/topology1/m4/mfcc/mfcc_config.m4']; -cfg.tplg_ver = 1; -cfg.ipc_ver = 3; -export_mfcc_setup(cfg); - -cfg.tplg_fn = [cfg.tools 'topology/topology2/include/components/mfcc/default.conf']; -cfg.tplg_ver = 2; -cfg.ipc_ver = 4; -export_mfcc_setup(cfg); - +function cfg = get_mel_spectrogram_config() + cfg.blackman_coef = 0; + cfg.cepstral_lifter = 0; + cfg.channel = 0; + cfg.dither = 0; + cfg.energy_floor = 1.0; + cfg.frame_length = 25.0; % 400 samples at 16 kHz + cfg.frame_shift = 10.0; % 160 samples at 16 kHz + cfg.high_freq = 8000; + cfg.htk_compat = false; + cfg.low_freq = 0; + cfg.num_ceps = 0; % Mel-only mode, no DCT + cfg.min_duration = 0; + cfg.norm = 'slaney'; + cfg.num_mel_bins = 80; + cfg.preemphasis_coefficient = 0; + cfg.raw_energy = false; + cfg.remove_dc_offset = false; + cfg.round_to_power_of_two = true; + cfg.sample_frequency = 16000; + cfg.snip_edges = true; + cfg.subtract_mean = false; + cfg.use_energy = false; + cfg.vtln_high = 0; + cfg.vtln_low = 0; + cfg.vtln_warp = 1.0; + cfg.window_type = 'hann'; + cfg.mel_log = 'log10'; + cfg.pmin = 1e-10; + cfg.top_db = 8; % applied for log10, would be 80 dB clamp for decibels as 10*log10() + cfg.mel_offset = 4.0; % For whisper like Mel scale and normalize + cfg.mel_scale = 0.25; % For whisper like Mel scale and normalize + cfg.mmax_init = 0; % Initial value max Mel value, data clamp is mmax - top_db + cfg.mmax_coef = 0; % Dynamic max Mel value decay coefficient (zero lock to found max) + cfg.dynamic_mmax = true; end -function export_mfcc_setup(cfg) +function export_mfcc_setup(gen_cfg, cfg) %% Use blob tool from EQ -addpath([cfg.tools 'tune/common']); +addpath([gen_cfg.tools_path 'tune/common']); %% Blob size, size plus reserved(8) + current parameters nbytes_data = 104; @@ -73,7 +114,7 @@ function export_mfcc_setup(cfg) sh16 = [0 -8]; %% Get ABI information -[abi_bytes, nbytes_abi] = sof_get_abi(nbytes_data, cfg.ipc_ver); +[abi_bytes, nbytes_abi] = sof_get_abi(nbytes_data, gen_cfg.ipc_ver); %% Initialize correct size uint8 array nbytes = nbytes_abi + nbytes_data; @@ -86,14 +127,21 @@ function export_mfcc_setup(cfg) %% Apply default MFCC configuration, first struct header and reserved, then data [b8, j] = add_w32b(nbytes_data, b8, j); -for i = 1:8 + +v = q_convert(cfg.mel_offset, 7); [b8, j] = add_w16b(v, b8, j); +v = q_convert(cfg.mel_scale, 12); [b8, j] = add_w16b(v, b8, j); +v = q_convert(cfg.mmax_init, 7); [b8, j] = add_w16b(v, b8, j); +v = q_convert(cfg.mmax_coef, 15); [b8, j] = add_w16b(v, b8, j); + +% Reserved +for i = 1:6 [b8, j] = add_w32b(0, b8, j); end v = q_convert(cfg.sample_frequency, 0); [b8, j] = add_w32b(v, b8, j); v = q_convert(cfg.pmin, 31); [b8, j] = add_w32b(v, b8, j); -v = 0; [b8, j] = add_w32b(v, b8, j); % enum mel_log -v = 0; [b8, j] = add_w32b(v, b8, j); % enum norm +v = get_mel_log_value(cfg.mel_log); [b8, j] = add_w32b(v, b8, j); % enum mel_log +v = get_norm_value(cfg.norm); [b8, j] = add_w32b(v, b8, j); % enum norm v = 0; [b8, j] = add_w32b(v, b8, j); % enum pad v = get_window(cfg); [b8, j] = add_w32b(v, b8, j); % enum window v = 1; [b8, j] = add_w32b(v, b8, j); % enum dct type @@ -119,22 +167,24 @@ function export_mfcc_setup(cfg) v = cfg.snip_edges; [b8, j] = add_w8b(v, b8, j); % bool v = cfg.subtract_mean; [b8, j] = add_w8b(v, b8, j); % bool v = cfg.use_energy; [b8, j] = add_w8b(v, b8, j); % bool +v = cfg.dynamic_mmax; [b8, j] = add_w8b(v, b8, j); % bool %% Export -switch cfg.tplg_ver +tplg_fn = [gen_cfg.mfcc_conf_path cfg.tplg_fn]; +switch gen_cfg.tplg_ver case 1 - sof_tplg_write(cfg.tplg_fn, b8, "DEF_MFCC_PRIV", ... + sof_tplg_write(tplg_fn, b8, "DEF_MFCC_PRIV", ... "Exported with script setup_mfcc.m", ... "cd src/audio/mfcc/tune; octave setup_mfcc.m"); case 2 - sof_tplg2_write(cfg.tplg_fn, b8, "mfcc_config", ... + sof_tplg2_write(tplg_fn, b8, "mfcc_config", ... "Exported MFCC configuration", ... "cd src/audio/mfcc/tune; octave setup_mfcc.m"); otherwise - error("Illegal cfg.tplg_ver, use 1 for topology v1 or 2 topology v2."); + error("Illegal tplg_ver, use 1 for topology v1 or 2 topology v2."); end -rmpath([cfg.tools 'tune/common']); +rmpath([gen_cfg.tools_path 'tune/common']); end @@ -157,6 +207,30 @@ function export_mfcc_setup(cfg) end end +function n = get_mel_log_value(mel_log) + switch lower(mel_log) + case 'log' + n = 0; + case 'log10' + n = 1; + case 'db' + n = 2; + otherwise + error('Unknown mel_log type'); + end +end + +function n = get_norm_value(norm) + switch lower(norm) + case 'none' + n = 0; + case 'slaney' + n = 1; + otherwise + error('Unknown norm type'); + end +end + function bytes = w8b(word) bytes = uint8(zeros(1,1)); bytes(1) = bitand(word, 255); diff --git a/src/include/sof/audio/mfcc/mfcc_comp.h b/src/include/sof/audio/mfcc/mfcc_comp.h index 7323428ec37d..abee71faf947 100644 --- a/src/include/sof/audio/mfcc/mfcc_comp.h +++ b/src/include/sof/audio/mfcc/mfcc_comp.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause * - * Copyright(c) 2022 Intel Corporation. All rights reserved. + * Copyright(c) 2022-2026 Intel Corporation. * * Author: Seppo Ingalsuo */ @@ -36,7 +36,7 @@ * set to 32 the FFT and Mel filterbank are computed with better 32 bit precision. There * is also need to enable 32 bit FFT from Kconfig if set. */ -#define MFCC_FFT_BITS 16 +#define MFCC_FFT_BITS 32 /* MFCC with 16 bit FFT benefits from data normalize, for 32 bits there's no * significant impact. The amount of left shifts for FFT input is limited to @@ -114,6 +114,8 @@ struct mfcc_state { struct mat_matrix_16b *mel_spectra; /**< Pointer to scratch */ struct mat_matrix_16b *cepstral_coef; /**< Pointer to scratch */ int32_t *power_spectra; /**< Pointer to scratch */ + int32_t *mel_log_32; /**< Pointer to scratch for 32-bit Mel output Q9.23 */ + int32_t mmax; /**< Maximum Mel value in Q9.23 */ int16_t buf_avail; int16_t *buffers; int16_t *prev_data; /**< prev_data_size */ @@ -125,9 +127,14 @@ struct mfcc_state { int low_freq; int high_freq; int sample_rate; + bool mel_only; /**< When true, output Mel spectra instead of cepstral coefficients */ bool waiting_fill; /**< booleans */ bool prev_samples_valid; + bool magic_pending; /**< True when magic word not yet written for current output */ size_t sample_buffers_size; /**< bytes */ + int16_t *out_data_ptr; /**< Read pointer into scratch data for multi-period output */ + int32_t *out_data_ptr_32; /**< Read pointer for 32-bit mel-only output */ + int out_remain; /**< Remaining int16_t samples to write to sink from scratch */ }; /* MFCC component private data */ @@ -156,12 +163,6 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int rate, int chan void mfcc_free_buffers(struct processing_module *mod); -void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer *bsource, - struct output_stream_buffer *bsink, int frames); - -void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, - struct mfcc_pre_emph *emph, int frames, int source_channel); - void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, int prev_data_length); @@ -175,16 +176,31 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift); #if CONFIG_FORMAT_S16LE -int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, - int16_t *w_ptr, int samples); - -int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, - int samples, int16_t *r_ptr); +void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel); void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer *bsource, struct output_stream_buffer *bsink, int frames); #endif +#if CONFIG_FORMAT_S24LE + +void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel); + +void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, int frames); +#endif + +#if CONFIG_FORMAT_S32LE + +void mfcc_source_copy_s32(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel); + +void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, int frames); +#endif + #ifdef UNIT_TEST void sys_comp_module_mfcc_interface_init(void); #endif diff --git a/src/include/sof/math/auditory.h b/src/include/sof/math/auditory.h index b09017786e36..b3fd46dcf26f 100644 --- a/src/include/sof/math/auditory.h +++ b/src/include/sof/math/auditory.h @@ -108,6 +108,6 @@ void psy_apply_mel_filterbank_16(struct psy_mel_filterbank *mel_fb, struct icomp * be subtracted from the log or decibels notation. */ void psy_apply_mel_filterbank_32(struct psy_mel_filterbank *mel_fb, struct icomplex32 *fft_out, - int32_t *power_spectra, int16_t *mel_log, int bitshift); + int32_t *power_spectra, int32_t *mel_log, int bitshift); #endif /* __SOF_MATH_AUDITORY_H__ */ diff --git a/src/include/sof/math/fft.h b/src/include/sof/math/fft.h index df06baf47c81..f98cb724506a 100644 --- a/src/include/sof/math/fft.h +++ b/src/include/sof/math/fft.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include diff --git a/src/include/user/mfcc.h b/src/include/user/mfcc.h index 7a5b7fcca98e..8a0defcd9883 100644 --- a/src/include/user/mfcc.h +++ b/src/include/user/mfcc.h @@ -50,7 +50,11 @@ enum sof_mfcc_dct_type { */ struct sof_mfcc_config { uint32_t size; /**< Size of this struct in bytes */ - uint32_t reserved[8]; + int16_t mel_offset; /**< Q8.7 default 0, use 4.0 for Whisper */ + int16_t mel_scale; /**< Q4.12 default 1.0, use 0.25 for Whisper */ + int16_t mmax_init; /**< Q8.7 default 0, with dynamic_mmax false, can sim. Whisper mmax */ + int16_t mmax_coef; /**< Q1.15 decay coefficient for dynamic mmax, a small value for slow */ + uint32_t reserved[6]; int32_t sample_frequency; /**< Hz. e.g. 16000 */ int32_t pmin; /**< Q1.31 linear power, limit minimum Mel energy, e.g. 1e-9 */ enum sof_mfcc_mel_log_type mel_log; /**< Use MEL_LOG_IS_LOG, LOG10 or DB*/ @@ -69,7 +73,7 @@ struct sof_mfcc_config { int16_t num_ceps; /**< Number of cepstral coefficients, e.g. 13 */ int16_t num_mel_bins; /**< Number of internal Mel bands, e.g. 23 */ int16_t preemphasis_coefficient; /**< Q1.15, e.g. 0.97, or 0 for disable */ - int16_t top_db; /**< Q8.7 dB, limit Mel energies to this value e.g. 200 */ + int16_t top_db; /**< Q8.7 dB, limit min. Mel energies to chunk max - top_dB, e.g. 80 */ int16_t vtln_high; /**< Reserved, no support */ int16_t vtln_low; /**< Reserved, no support */ int16_t vtln_warp; /**< Reserved, no support */ @@ -80,7 +84,7 @@ struct sof_mfcc_config { bool snip_edges; /**< Must be true (1) */ bool subtract_mean; /**< Must be false (0) */ bool use_energy; /**< Must be false (0) */ - bool reserved_bool1; + bool dynamic_mmax; /**< Track max Mel value for clamp with top_db value */ bool reserved_bool2; bool reserved_bool3; } __attribute__((packed)); diff --git a/src/math/auditory/mel_filterbank_32.c b/src/math/auditory/mel_filterbank_32.c index a80d09ad624a..414ddf482f93 100644 --- a/src/math/auditory/mel_filterbank_32.c +++ b/src/math/auditory/mel_filterbank_32.c @@ -12,7 +12,7 @@ #include void psy_apply_mel_filterbank_32(struct psy_mel_filterbank *fb, struct icomplex32 *fft_out, - int32_t *power_spectra, int16_t *mel_log, int bitshift) + int32_t *power_spectra, int32_t *mel_log, int bitshift) { int64_t pmax; int64_t p; @@ -79,8 +79,8 @@ void psy_apply_mel_filterbank_32(struct psy_mel_filterbank *fb, struct icomplex3 */ log -= ((int32_t)lshift + 2 * bitshift) << 16; - /* Scale for desired log */ - log = Q_MULTSR_32X32((int64_t)log, fb->log_mult, 16, 29, 7); - mel_log[i] = sat_int16(log); /* Q8.7 */ + /* Scale for desired log, output as Q9.23 */ + log = Q_MULTSR_32X32((int64_t)log, fb->log_mult, 16, 29, 23); + mel_log[i] = log; /* Q9.23 */ } } diff --git a/test/cmocka/src/math/auditory/auditory.c b/test/cmocka/src/math/auditory/auditory.c index dc05c387cfae..ff222e52fadd 100644 --- a/test/cmocka/src/math/auditory/auditory.c +++ b/test/cmocka/src/math/auditory/auditory.c @@ -163,7 +163,8 @@ static void filterbank_32_test(const int32_t *fft_real, const int32_t *fft_imag, float error_rms; float delta_max = 0; int32_t *power_spectra; - int16_t *mel_log; + int32_t *mel_log; + int16_t mel_log_16; int i; const int half_fft = num_fft_bins / 2 + 1; const int fft_size = num_fft_bins * sizeof(struct icomplex32); @@ -181,7 +182,7 @@ static void filterbank_32_test(const int32_t *fft_real, const int32_t *fft_imag, goto err_out_alloc; } - mel_log = malloc(MEL_FILTERBANK_32_TEST1_NUM_MEL_BINS * sizeof(int16_t)); + mel_log = malloc(num_mel_bins * sizeof(int32_t)); if (!mel_log) { fprintf(stderr, "Failed to allocate output vector\n"); goto err_mel_alloc; @@ -215,9 +216,10 @@ static void filterbank_32_test(const int32_t *fft_real, const int32_t *fft_imag, power_spectra = (int32_t *)&fft_buf[0]; psy_apply_mel_filterbank_32(&fb, fft_out, power_spectra, mel_log, shift); - /* Check */ + /* Check: convert Q9.23 output to Q9.7 for comparison with reference */ for (i = 0; i < num_mel_bins; i++) { - delta = (float)ref_mel_log[i] - (float)mel_log[i]; + mel_log_16 = (int16_t)(mel_log[i] >> 16); + delta = (float)ref_mel_log[i] - (float)mel_log_16; sum_squares += delta * delta; if (delta > delta_max) delta_max = delta; @@ -233,7 +235,7 @@ static void filterbank_32_test(const int32_t *fft_real, const int32_t *fft_imag, FILE *fh = fopen("mel_filterbank_32.txt", "w"); for (i = 0; i < num_mel_bins; i++) - fprintf(fh, "%d %d\n", ref_mel_log[i], mel_log[i]); + fprintf(fh, "%d %d\n", ref_mel_log[i], (int16_t)(mel_log[i] >> 16)); fclose(fh); #endif diff --git a/tools/topology/topology2/cavs-benchmark-hda.conf b/tools/topology/topology2/cavs-benchmark-hda.conf index 62c0ad4f4fbc..95ab67431812 100644 --- a/tools/topology/topology2/cavs-benchmark-hda.conf +++ b/tools/topology/topology2/cavs-benchmark-hda.conf @@ -834,6 +834,16 @@ IncludeByKey.BENCH_CONFIG { } + "mfccmel16" { + + } + "mfccmel24" { + + } + "mfccmel32" { + + } + # # Micsel component # diff --git a/tools/topology/topology2/development/tplg-targets-bench.cmake b/tools/topology/topology2/development/tplg-targets-bench.cmake index eff707d49aa9..5c0f82dc7dfc 100644 --- a/tools/topology/topology2/development/tplg-targets-bench.cmake +++ b/tools/topology/topology2/development/tplg-targets-bench.cmake @@ -19,6 +19,7 @@ set(components "igo_nr" "level_multiplier" "mfcc" + "mfccmel" "micsel" "rtnr" "sound_dose" @@ -45,6 +46,7 @@ set(component_parameters "BENCH_IGO_NR_PARAMS=default" "BENCH_LEVEL_MULTIPLIER_PARAMS=default" "BENCH_MFCC_PARAMS=default" + "BENCH_MFCC_PARAMS=mel80" "BENCH_MICSEL_PARAMS=passthrough" "BENCH_RTNR_PARAMS=default" "BENCH_SOUND_DOSE_PARAMS=default" diff --git a/tools/topology/topology2/include/bench/mfcc_controls_capture.conf b/tools/topology/topology2/include/bench/mfcc_controls_capture.conf index 56a731b86687..d45baec1ee8f 100644 --- a/tools/topology/topology2/include/bench/mfcc_controls_capture.conf +++ b/tools/topology/topology2/include/bench/mfcc_controls_capture.conf @@ -6,6 +6,7 @@ name '$ANALOG_CAPTURE_PCM MFCC bytes' IncludeByKey.BENCH_MFCC_PARAMS { "default" "include/components/mfcc/default.conf" + "mel80" "include/components/mfcc/mel80.conf" } } #mixer."1" { diff --git a/tools/topology/topology2/include/bench/mfcc_controls_playback.conf b/tools/topology/topology2/include/bench/mfcc_controls_playback.conf index 7649678c8468..cc2ada04b8d7 100644 --- a/tools/topology/topology2/include/bench/mfcc_controls_playback.conf +++ b/tools/topology/topology2/include/bench/mfcc_controls_playback.conf @@ -6,6 +6,7 @@ name '$ANALOG_PLAYBACK_PCM MFCC bytes' IncludeByKey.BENCH_MFCC_PARAMS { "default" "include/components/mfcc/default.conf" + "mel80" "include/components/mfcc/mel80.conf" } } #mixer."1" { diff --git a/tools/topology/topology2/include/bench/mfccmel_s16.conf b/tools/topology/topology2/include/bench/mfccmel_s16.conf new file mode 100644 index 000000000000..ec89bffb90a1 --- /dev/null +++ b/tools/topology/topology2/include/bench/mfccmel_s16.conf @@ -0,0 +1,13 @@ + # Created with script "./bench_comp_generate.sh mfcc" + Object.Widget.mfcc.1 { + index $BENCH_PLAYBACK_HOST_PIPELINE + + + } + Object.Widget.mfcc.2 { + index $BENCH_CAPTURE_HOST_PIPELINE + + + } + + diff --git a/tools/topology/topology2/include/bench/mfccmel_s24.conf b/tools/topology/topology2/include/bench/mfccmel_s24.conf new file mode 100644 index 000000000000..73571fabe5f2 --- /dev/null +++ b/tools/topology/topology2/include/bench/mfccmel_s24.conf @@ -0,0 +1,13 @@ + # Created with script "./bench_comp_generate.sh mfcc" + Object.Widget.mfcc.1 { + index $BENCH_PLAYBACK_HOST_PIPELINE + + + } + Object.Widget.mfcc.2 { + index $BENCH_CAPTURE_HOST_PIPELINE + + + } + + diff --git a/tools/topology/topology2/include/bench/mfccmel_s32.conf b/tools/topology/topology2/include/bench/mfccmel_s32.conf new file mode 100644 index 000000000000..75c01eaf4a43 --- /dev/null +++ b/tools/topology/topology2/include/bench/mfccmel_s32.conf @@ -0,0 +1,13 @@ + # Created with script "./bench_comp_generate.sh mfcc" + Object.Widget.mfcc.1 { + index $BENCH_PLAYBACK_HOST_PIPELINE + + + } + Object.Widget.mfcc.2 { + index $BENCH_CAPTURE_HOST_PIPELINE + + + } + + diff --git a/tools/topology/topology2/include/components/mfcc/default.conf b/tools/topology/topology2/include/components/mfcc/default.conf index 1f9141886de9..42a6d6608b8b 100644 --- a/tools/topology/topology2/include/components/mfcc/default.conf +++ b/tools/topology/topology2/include/components/mfcc/default.conf @@ -1,9 +1,9 @@ -# Exported MFCC configuration 24-Jul-2024 -# cd tools/tune/mfcc; octave setup_mfcc.m +# Exported MFCC configuration 05-May-2026 +# cd src/audio/mfcc/tune; octave setup_mfcc.m Object.Base.data."mfcc_config" { bytes " 0x53,0x4f,0x46,0x34,0x00,0x00,0x00,0x00, - 0x68,0x00,0x00,0x00,0x00,0xa0,0x01,0x03, + 0x68,0x00,0x00,0x00,0x01,0xd0,0x01,0x03, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x68,0x00,0x00,0x00,0x00,0x00,0x00,0x00, @@ -14,7 +14,7 @@ Object.Base.data."mfcc_config" { 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x02,0x00,0x00,0x00,0x01,0x00,0x00,0x00, - 0xc3,0x35,0x00,0x2c,0xff,0xff,0x00,0x00, + 0xc3,0x35,0x00,0x2c,0x00,0x00,0x00,0x00, 0x90,0x01,0xa0,0x00,0x00,0x00,0x14,0x00, 0x0d,0x00,0x17,0x00,0x00,0x00,0x00,0x64, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01, diff --git a/tools/topology/topology2/include/components/mfcc/mel80.conf b/tools/topology/topology2/include/components/mfcc/mel80.conf new file mode 100644 index 000000000000..04aa2a15c660 --- /dev/null +++ b/tools/topology/topology2/include/components/mfcc/mel80.conf @@ -0,0 +1,22 @@ +# Exported MFCC configuration 05-May-2026 +# cd src/audio/mfcc/tune; octave setup_mfcc.m +Object.Base.data."mfcc_config" { + bytes " + 0x53,0x4f,0x46,0x34,0x00,0x00,0x00,0x00, + 0x68,0x00,0x00,0x00,0x01,0xd0,0x01,0x03, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x68,0x00,0x00,0x00,0x00,0x02,0x00,0x04, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x80,0x3e,0x00,0x00, + 0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, + 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x03,0x00,0x00,0x00,0x01,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x90,0x01,0xa0,0x00,0x40,0x1f,0x00,0x00, + 0x00,0x00,0x50,0x00,0x00,0x00,0x00,0x04, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x01,0x01,0x00,0x00,0x01,0x00,0x00" +}