client-sdk-cpp/include/livekit/audio_processing_module.h at 2080da2fb3c9e70d885de5b061baff5b52621977 · livekit/client-sdk-cpp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/*
 * Copyright 2025 LiveKit
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include <cstdint>

#include "livekit/audio_frame.h"
#include "livekit/ffi_handle.h"

namespace livekit {

/**
 * @brief WebRTC Audio Processing Module (APM) for real-time audio enhancement.
 *
 * AudioProcessingModule exposes WebRTC's built-in audio processing capabilities
 * including echo cancellation, noise suppression, automatic gain control, and
 * high-pass filtering.
 *
 * This class is designed for scenarios where you need explicit control over
 * audio processing, separate from the built-in processing in AudioSource.
 *
 * Typical usage pattern for echo cancellation:
 * 1. Create an APM with desired features enabled
 * 2. Call processReverseStream() with speaker/playback audio (reference signal)
 * 3. Call processStream() with microphone audio (near-end signal)
 * 4. The processed microphone audio will have echo removed
 *
 * Note: Audio frames must be exactly 10ms in duration.
 */
class AudioProcessingModule {
public:
  /**
   * @brief Configuration options for the Audio Processing Module.
   */
  struct Options {
    /// Enable acoustic echo cancellation (AEC3).
    /// Removes acoustic echo in two-way communication scenarios.
    bool echo_cancellation = false;

    /// Enable noise suppression.
    /// Reduces background noise from non-speech sources.
    bool noise_suppression = false;

    /// Enable high-pass filter.
    /// Removes low-frequency noise below ~80 Hz (DC offset, rumble).
    bool high_pass_filter = false;

    /// Enable automatic gain control (AGC).
    /// Auto-adjusts microphone gain to maintain consistent audio levels.
    bool auto_gain_control = false;

    /// Default constructor.
    Options() = default;
  };

  /**
   * @brief Create a new Audio Processing Module with default options (all
   * disabled).
   *
   * @throws std::runtime_error if the APM could not be created.
   */
  AudioProcessingModule();

  /**
   * @brief Create a new Audio Processing Module with the specified options.
   *
   * @param options Configuration for which processing features to enable.
   * @throws std::runtime_error if the APM could not be created.
   */
  explicit AudioProcessingModule(const Options &options);

  virtual ~AudioProcessingModule() = default;

  // Non-copyable
  AudioProcessingModule(const AudioProcessingModule &) = delete;
  AudioProcessingModule &operator=(const AudioProcessingModule &) = delete;

  // Movable
  AudioProcessingModule(AudioProcessingModule &&) noexcept = default;
  AudioProcessingModule &operator=(AudioProcessingModule &&) noexcept = default;

  /**
   * @brief Process the forward (near-end/microphone) audio stream.
   *
   * This method processes audio captured from the local microphone. It applies
   * the enabled processing features (noise suppression, gain control, etc.)
   * and removes echo based on the reference signal provided via
   * processReverseStream().
   *
   * The audio data is modified in-place.
   *
   * @param frame The audio frame to process (modified in-place).
   *
   * @throws std::runtime_error if processing fails.
   *
   * @note The frame must contain exactly 10ms of audio.
   */
  void processStream(AudioFrame &frame);

  /**
   * @brief Process the reverse (far-end/speaker) audio stream.
   *
   * This method provides the reference signal for echo cancellation. Call this
   * with the audio that is being played through the speakers, so the APM can
   * learn the acoustic characteristics and remove the echo from the microphone
   * signal.
   *
   * The audio data is modified in-place.
   *
   * @param frame The audio frame to process (modified in-place).
   *
   * @throws std::runtime_error if processing fails.
   *
   * @note The frame must contain exactly 10ms of audio.
   */
  void processReverseStream(AudioFrame &frame);

  /**
   * @brief Set the estimated delay between the reverse and forward streams.
   *
   * This must be called if and only if echo processing is enabled.
   *
   * Sets the delay in ms between processReverseStream() receiving a far-end
   * frame and processStream() receiving a near-end frame containing the
   * corresponding echo. On the client-side this can be expressed as:
   *
   *   delay = (t_render - t_analyze) + (t_process - t_capture)
   *
   * where:
   *   - t_analyze is the time a frame is passed to processReverseStream() and
   *     t_render is the time the first sample of the same frame is rendered by
   *     the audio hardware.
   *   - t_capture is the time the first sample of a frame is captured by the
   *     audio hardware and t_process is the time the same frame is passed to
   *     processStream().
   *
   * @param delay_ms Delay in milliseconds.
   *
   * @throws std::runtime_error if setting the delay fails.
   */
  void setStreamDelayMs(int delay_ms);

private:
  /// Check if the APM handle is valid (used internally).
  bool valid() const noexcept { return handle_.valid(); }

  /// Get the underlying FFI handle ID (used internally).
  std::uint64_t ffi_handle_id() const noexcept {
    return static_cast<std::uint64_t>(handle_.get());
  }

  FfiHandle handle_;
};

} // namespace livekit