Audio: aec: optimize acoustic echo cancellation processing

This check-in introduces performance optimization modifications to the audio Echo Cancellation (AEC) implementation. The enhancements primarily focus on refining loop structures and memory copy operations to ensure more efficient use of cycles. Signed-off-by: shastry <[email protected]>
thesofproject · Feb 26, 2024 · cfd021f · cfd021f
1 parent 3681e09
commit cfd021f
Show file tree

Hide file tree

Showing 2 changed files with 266 additions and 86 deletions.
diff --git a/src/audio/google/google_rtc_audio_processing.c b/src/audio/google/google_rtc_audio_processing.c
@@ -42,6 +42,11 @@
 #define GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES 100
 #define GOOGLE_RTC_NUM_INPUT_PINS 2
 #define GOOGLE_RTC_NUM_OUTPUT_PINS 1
+#define ERR_INVALID_REF -1
+#define ERR_MEMCPY_FAIL -2
+#define ERR_INVALID_SRC -3
+#define ERR_INVALID_DST -4
+
 
 LOG_MODULE_REGISTER(google_rtc_audio_processing, CONFIG_SOF_LOG_LEVEL);
 
@@ -791,7 +796,6 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 	size_t dst_buf_size;
 
 	size_t num_of_bytes_to_process;
-	size_t channel;
 	size_t buffer_offset;
 
 	struct sof_source *ref_stream, *src_stream;
@@ -822,23 +826,66 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 	/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
 	 * 16int: linearize buffer, skip channels if > Max
 	 */
+	/* Reduce cycle waste by streamlining the inner loop,
+	 * converting from array indexing to pointer arithmetic,
+	 * and putting data copy verification outside the loop.
+	 */
 	buffer_offset = 0;
-	for (int i = 0; i < cd->num_frames; i++) {
-		for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
+	const int16_t *ref_end = ref + cd->num_frames * cd->num_aec_reference_channels;
+
+	/* Check if the calculated end of the reference buffer exceeds the actual end
+	 * of the buffer
+	 */
+	if (ref_end >= (const int16_t *)ref_buf_end)
+		/* If it does, wrap the reference buffer end pointer back to the start
+		 * of the buffer
+		 */
+		ref_end = (const int16_t *)ref_buf_start;
+
 #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
-			cd->aec_reference_buffer_ptrs[channel][i] =
-					convert_int16_to_float(ref[channel]);
+	float **ref_ptr = cd->aec_reference_buffer_ptrs;
+	int s_chan;
+	int i;
+
+	/*  Loop over frames and channels, converting data from int16 to float */
+	for (i = 0; i < cd->num_frames; ++i) {
+		for (s_chan = 0; s_chan < cd->num_aec_reference_channels; ++s_chan) {
+		/*  Check that ref is within the valid range of the ref_buf buffer */
+			if (ref && (void *)ref >= (void *)ref_buf_start &&
+			    (void *)ref < (void *)ref_buf_end)
+				(*ref_ptr)[s_chan] = convert_int16_to_float(*ref++);
+			else
+				/*  ref does not point to valid int16_t data */
+				return ERR_INVALID_REF;
+		}
+		ref_ptr++;
+	}
+
 #else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
-			cd->aec_reference_buffer[buffer_offset++] = ref[channel];
-#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
+	int16_t *ref_buf = cd->aec_reference_buffer;
 
-		}
+	/* Check that ref is within the valid range of the ref_buf buffer */
+	if (ref && ref >= (int16_t *)ref_buf_start && ref < (int16_t *)ref_buf_end) {
+		/* Use memcpy_s to copy the data from ref buffer to ref_buf buffer until it reaches
+		 * ref_end
+		 * This assumes that the data in the ref buffer is contiguous
+		 */
+		size_t num_bytes = (ref_end - ref) * sizeof(*ref);
 
-		ref += cd->num_aec_reference_channels;
-		if ((void *)ref >= (void *)ref_buf_end)
-			ref = (void *)ref_buf_start;
+		if (memcpy_s(ref_buf, num_bytes, ref, num_bytes) != 0) {
+			/*  Handle error */
+			return ERR_MEMCPY_FAIL;
+		}
+		/*  Update the ref and ref_buf pointers */
+		ref = ref_end;
+		ref_buf += (ref_end - ref);
+	} else {
+		/*  ref does not point to valid int16_t data */
+		return ERR_MEMCPY_FAIL;
 	}
 
+#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
+
 #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
 	GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
 						      (const float **)
@@ -855,26 +902,73 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 			      (const void **)&src_buf_start,  &src_buf_size);
 	assert(!ret);
 	src_buf_end = src_buf_start + src_buf_size;
-
+	/* The second optimization eliminates the inner loop
+	 * and replaces it with pointer arithmetic for speedier access.
+	 * To reduce cycle waste, the data copy check is moved outside of the loop.
+	 */
+	/* Initialize error_code to 0 (no error) */
+	int error_code = 0;
 	buffer_offset = 0;
-	for (int i = 0; i < cd->num_frames; i++) {
-		for (channel = 0; channel < cd->num_capture_channels; channel++)
+	const int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;
+
+	/* Check if the calculated end of the source buffer exceeds the actual end of the buffer */
+	if (src_end >= (const int16_t *)src_buf_end)
+		/* If it does, wrap the source buffer end pointer back to the start of the
+		 * buffer
+		 */
+		src_end = (const int16_t *)src_buf_start;
+
 #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
-		cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
+	/* Declare a pointer to the process buffer */
+	float **proc_ptr = cd->process_buffer_ptrs;
+
+	/* Check for null pointers and buffer overflows */
+	if (!src || !proc_ptr || src >= (const int16_t *)src_end)
+		/* If there's an error, set error_code to ERR_INVALID_SRC but don't return yet */
+		error_code = ERR_INVALID_SRC;
+	else
+		/* If there's no error, continue processing */
+		while (src != (const int16_t *)src_end) {
+			/* If the source pointer has reached or exceeded the end of the source
+			 * buffer, wrap it back to the start
+			 */
+			if (src >= (const int16_t *)src_buf_end)
+				src = (const int16_t *)src_buf_start;
+
+			/* Convert the source data from int16_t to float and store it in the
+			 * process buffer
+			 */
+			*proc_ptr++ = convert_int16_to_float(src++);
+		}
+
 #else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
-		cd->process_buffer[buffer_offset++] = src[channel];
-#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
+	/* Declare a pointer to the process buffer */
+	int16_t *proc_buf = cd->process_buffer;
+
+	/* Check for null pointers and buffer overflows */
+	if (!src || !proc_buf || src >= (int16_t *)src_end)
+		/* If there's an error, set error_code to ERR_INVALID_SRC but don't return yet */
+		error_code = ERR_INVALID_SRC;
+	else
+		/* If there's no error, continue processing */
+		while (src != (int16_t *)src_end) {
+			/* If the source pointer has reached or exceeded the end of the source
+			 * buffer, wrap it back to the start
+			 */
+			if (src >= (int16_t *)src_buf_end)
+				src = (int16_t *)src_buf_start;
+
+			/* Copy the source data to the process buffer */
+			*proc_buf++ = *src++;
+		}
 
-		/* move pointer to next frame
-		 * number of incoming channels may be < cd->num_capture_channels
-		 */
-		src += cd->config.output_fmt.channels_count;
-		if ((void *)src >= (void *)src_buf_end)
-			src = (void *)src_buf_start;
-	}
+#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
 
 	source_release_data(src_stream, num_of_bytes_to_process);
 
+	/* Return the error code. If there was no error, this will be 0 */
+	return error_code;
+
 	/* call the library, use same in/out buffers */
 #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
 	GoogleRtcAudioProcessingProcessCapture_float32(cd->state,
@@ -894,25 +988,45 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 
 	/* process all channels in output stream */
 	buffer_offset = 0;
-	for (int i = 0; i < cd->num_frames; i++) {
-		for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
-			/* set data in processed channels, zeroize not processed */
-			if (channel < cd->num_capture_channels)
+
+	/* Calculate the end of the destination buffer based on the number of frames and
+	 * channels
+	 */
+	int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;
+
+	/* Check if the calculated end of the destination buffer exceeds the actual end
+	 *of the buffer
+	 */
+	if (dst_end >= (int16_t *)dst_buf_end)
+		/* If it does, wrap the destination buffer end pointer back to the start of
+		 * the buffer
+		 */
+		dst_end = (int16_t *)dst_buf_start;
+
 #if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
-				dst[channel] = convert_float_to_int16(
-						   cd->process_buffer_ptrs[channel][i]);
+	float **proc_ptr = cd->process_buffer_ptrs;
+
+	/* Check for null pointers and buffer overflows */
+	if (!dst || !proc_ptr || dst >= dst_end || *proc_ptr >= *proc_ptr + cd->num_frames)
+		return ERR_INVALID_DST;
+
+	/* Convert data from float to int16_t and store it in the destination buffer */
+	for (; dst != dst_end; ++dst, ++proc_ptr)
+		*dst = convert_float_to_int16(*proc_ptr);
+
 #else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
-				dst[channel] = cd->process_buffer[buffer_offset++];
-#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
-			else
-				dst[channel] = 0;
-		}
+	int16_t *process_buffer = cd->process_buffer;
 
-		dst += cd->config.output_fmt.channels_count;
-		if ((void *)dst >= (void *)dst_buf_end)
-			dst = (void *)dst_buf_start;
-	}
+	/* Check for null pointers and buffer overflows */
+	if (!dst || !process_buffer || dst >= dst_end ||
+	    process_buffer >= process_buffer + cd->num_frames)
+		return ERR_INVALID_DST;
 
+	/* Copy the data from the process buffer to the destination buffer */
+	for (; dst != dst_end; ++dst, ++process_buffer)
+		*dst = *process_buffer;
+
+#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
 	sink_commit_buffer(dst_stream, num_of_bytes_to_process);
 
 	return 0;
@@ -928,6 +1042,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 	int16_t *src, *dst, *ref;
 	uint32_t num_aec_reference_frames;
 	uint32_t num_aec_reference_bytes;
+	int ref_channels;
+	int aec_ref_product;
 	int num_samples_remaining;
 	int num_frames_remaining;
 	int channel;
@@ -950,25 +1066,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 	ref_stream = ref_streamb->data;
 	ref = audio_stream_get_rptr(ref_stream);
 
+	/* Pre-calculate the number of channels in the reference stream for efficiency */
+	ref_channels = audio_stream_get_channels(ref_stream);
+
+	/*  Pre-calculate the product of the number of AEC reference channels and the AEC
+	 *  reference frame index
+	 */
+	aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
+
 	num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
 	num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;
 
-	num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
+	num_samples_remaining = num_aec_reference_frames * ref_channels;
 	while (num_samples_remaining) {
 		nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
 		n = MIN(num_samples_remaining, nmax);
 		for (i = 0; i < n; i += cd->num_aec_reference_channels) {
-			j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
+			j = aec_ref_product;
 			for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
 				cd->aec_reference_buffer[j++] = ref[channel];
-
-			ref += audio_stream_get_channels(ref_stream);
+			ref += ref_channels;
 			++cd->aec_reference_frame_index;
-
 			if (cd->aec_reference_frame_index == cd->num_frames) {
 				GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
-									    cd->aec_reference_buffer);
+									 cd->aec_reference_buffer);
 				cd->aec_reference_frame_index = 0;
+				/* Reset the product as the frame index is reset */
+				aec_ref_product = 0;
 			}
 		}
 		num_samples_remaining -= n;
@@ -984,6 +1108,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 	src = audio_stream_get_rptr(mic_stream);
 	dst = audio_stream_get_wptr(out_stream);
 
+	/* Move out of loop */
+	int mic_stream_channels = audio_stream_get_channels(mic_stream);
 	frames = input_buffers[cd->raw_microphone_source].size;
 	num_frames_remaining = frames;
 
@@ -993,34 +1119,40 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
 		nmax = audio_stream_frames_without_wrap(out_stream, dst);
 		n = MIN(n, nmax);
 		for (i = 0; i < n; i++) {
-			memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
-						      cd->num_capture_channels]),
-				 cd->num_frames * cd->num_capture_channels *
-				 sizeof(cd->raw_mic_buffer[0]), src,
-				 sizeof(int16_t) * cd->num_capture_channels);
-			++cd->raw_mic_buffer_frame_index;
-
-			memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
-				 sizeof(cd->output_buffer[0]),
-				 &(cd->output_buffer[cd->output_buffer_frame_index *
-						     cd->num_capture_channels]),
-				 sizeof(int16_t) * cd->num_capture_channels);
-			++cd->output_buffer_frame_index;
-
-			if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
-				GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
-									     cd->raw_mic_buffer,
-									     cd->output_buffer);
-				cd->output_buffer_frame_index = 0;
-				cd->raw_mic_buffer_frame_index = 0;
+			/* If we haven't filled the buffer yet, copy the data */
+			if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
+				memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
+							      cd->num_capture_channels]),
+					 cd->num_frames * cd->num_capture_channels *
+					 sizeof(cd->raw_mic_buffer[0]), src,
+					 sizeof(int16_t) * cd->num_capture_channels);
+				++cd->raw_mic_buffer_frame_index;
+			}
+
+			if (cd->output_buffer_frame_index < cd->num_frames) {
+				memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
+					 sizeof(cd->output_buffer[0]),
+					 &(cd->output_buffer[cd->output_buffer_frame_index *
+							     cd->num_capture_channels]),
+					 sizeof(int16_t) * cd->num_capture_channels);
+				++cd->output_buffer_frame_index;
 			}
 
-			src += audio_stream_get_channels(mic_stream);
-			dst += audio_stream_get_channels(out_stream);
+			src += mic_stream_channels;
+			dst += mic_stream_channels;
 		}
 		num_frames_remaining -= n;
 		src = audio_stream_wrap(mic_stream, src);
 		dst = audio_stream_wrap(out_stream, dst);
+
+		/* If we've filled the buffer, process the data */
+		if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
+			GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
+								     cd->raw_mic_buffer,
+								     cd->output_buffer);
+			cd->output_buffer_frame_index = 0;
+			cd->raw_mic_buffer_frame_index = 0;
+		}
 	}
 
 	module_update_buffer_position(&input_buffers[cd->raw_microphone_source],