diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..863d1ea --- /dev/null +++ b/.clang-format @@ -0,0 +1,116 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: false +IndentPPDirectives: None +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +RawStringFormats: + - Language: TextProto + Delimiters: + - 'pb' + - 'proto' + BasedOnStyle: google +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: c++17 +TabWidth: 8 +UseTab: Never +... \ No newline at end of file diff --git a/.github/workflows/build-check.yml b/.github/workflows/build-check.yml index 88f549e..3ffe2e5 100644 --- a/.github/workflows/build-check.yml +++ b/.github/workflows/build-check.yml @@ -9,49 +9,43 @@ on: jobs: build: name: Build ${{ matrix.arch }} - runs-on: ubuntu-latest + runs-on: ${{ matrix.runner }} strategy: matrix: - arch: [arm64, amd64] + include: + - arch: amd64 + runner: ubuntu-latest + - arch: arm64 + runner: ubuntu-24.04-arm steps: - name: Checkout code uses: actions/checkout@v4 with: submodules: recursive - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake git - - name: Build srtla for ${{ matrix.arch }} + - name: Build srtla run: | - PLATFORM="linux/${{ matrix.arch }}" + cmake -B build -DCMAKE_BUILD_TYPE=Release + cmake --build build -j$(nproc) - docker buildx build \ - --platform "$PLATFORM" \ - --load \ - -t srtla-builder:${{ matrix.arch }} \ - -f - . <<'DOCKERFILE' - FROM debian:bookworm-slim - RUN apt-get update && apt-get install -y \ - build-essential \ - cmake \ - git \ - libspdlog-dev \ - && rm -rf /var/lib/apt/lists/* - WORKDIR /build - COPY . . - RUN cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) - DOCKERFILE + - name: Run tests + run: | + cd build + ctest --output-on-failure - - name: Verify binaries were built + - name: Verify binaries run: | - docker run --rm srtla-builder:${{ matrix.arch }} ls -la /build/build/srtla_send /build/build/srtla_rec + ls -la build/srtla_send build/srtla_rec + file build/srtla_send build/srtla_rec - name: Build Summary run: | echo "## ✅ Build Check Passed" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "**Architecture:** ${{ matrix.arch }}" >> $GITHUB_STEP_SUMMARY + echo "**Runner:** ${{ matrix.runner }}" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml index 1eeda81..3be9a98 100644 --- a/.github/workflows/publish-release.yml +++ b/.github/workflows/publish-release.yml @@ -66,57 +66,33 @@ jobs: build-deb: name: Build Debian Package (${{ matrix.arch }}) needs: calculate-version - runs-on: ubuntu-latest + runs-on: ${{ matrix.runner }} strategy: matrix: - arch: [arm64, amd64] + include: + - arch: amd64 + runner: ubuntu-latest + - arch: arm64 + runner: ubuntu-24.04-arm steps: - name: Checkout code uses: actions/checkout@v4 with: submodules: recursive - - name: Set up QEMU - if: matrix.arch == 'arm64' - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build in Docker (${{ matrix.arch }}) + - name: Install build dependencies run: | - mkdir -p build-output - - cat > Dockerfile.build << 'DOCKERFILE' - FROM debian:bookworm - - RUN apt-get update && apt-get install -y \ - build-essential \ - cmake \ - pkg-config \ - libspdlog-dev \ - && rm -rf /var/lib/apt/lists/* - - WORKDIR /src - COPY . . - - RUN cmake -B build -DCMAKE_BUILD_TYPE=Release -DSRTLA_BUILD_TESTS=OFF \ - -DCMAKE_INSTALL_PREFIX=/usr - RUN cmake --build build -j$(nproc) - RUN DESTDIR=/output cmake --install build - DOCKERFILE + sudo apt-get update + sudo apt-get install -y build-essential cmake pkg-config ruby-dev - docker buildx build \ - --platform linux/${{ matrix.arch }} \ - --output type=local,dest=build-output \ - -f Dockerfile.build \ - . + - name: Build srtla + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release -DSRTLA_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=/usr + cmake --build build -j$(nproc) + DESTDIR=$PWD/install cmake --install build - name: Install FPM - run: | - sudo apt-get update - sudo apt-get install -y ruby-dev gcc g++ - sudo gem install fpm + run: sudo gem install fpm - name: Create packages env: @@ -134,14 +110,12 @@ jobs: --maintainer "CERALIVE " \ --url "https://github.com/CERALIVE/srtla" \ --license "AGPL-3.0" \ - --depends "srt" \ - --depends "libspdlog1" \ -p "dist/srtla_${VERSION}_${ARCH}.deb" \ - build-output/usr/=/usr/ + install/usr/=/usr/ # Create .tar.gz archive mkdir -p tarball/srtla-${VERSION} - cp -r build-output/usr/* tarball/srtla-${VERSION}/ + cp -r install/usr/* tarball/srtla-${VERSION}/ cd tarball tar -czvf ../dist/srtla_${VERSION}_${ARCH}.tar.gz srtla-${VERSION} cd .. @@ -247,7 +221,6 @@ jobs: ``` srt └── srtla (this package) - ├── Depends: srt, libspdlog1 │ └── Used by: ceracoder → ceralive-device ``` diff --git a/.serena/.gitignore b/.serena/.gitignore new file mode 100644 index 0000000..f9da9e9 --- /dev/null +++ b/.serena/.gitignore @@ -0,0 +1 @@ +/cache diff --git a/.serena/project.yml b/.serena/project.yml new file mode 100644 index 0000000..02d324b --- /dev/null +++ b/.serena/project.yml @@ -0,0 +1,84 @@ +# list of languages for which language servers are started; choose from: +# al bash clojure cpp csharp csharp_omnisharp +# dart elixir elm erlang fortran go +# haskell java julia kotlin lua markdown +# nix perl php python python_jedi r +# rego ruby ruby_solargraph rust scala swift +# terraform typescript typescript_vts yaml zig +# Note: +# - For C, use cpp +# - For JavaScript, use typescript +# Special requirements: +# - csharp: Requires the presence of a .sln file in the project folder. +# When using multiple languages, the first language server that supports a given file will be used for that file. +# The first language is the default language and the respective language server will be used as a fallback. +# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored. +languages: +- cpp + +# the encoding used by text files in the project +# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings +encoding: "utf-8" + +# whether to use the project's gitignore file to ignore files +# Added on 2025-04-07 +ignore_all_files_in_gitignore: true + +# list of additional paths to ignore +# same syntax as gitignore, so you can use * and ** +# Was previously called `ignored_dirs`, please update your config if you are using that. +# Added (renamed) on 2025-04-07 +ignored_paths: [] + +# whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 +read_only: false + +# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details. +# Below is the complete list of tools for convenience. +# To make sure you have the latest list of tools, and to view their descriptions, +# execute `uv run scripts/print_tool_overview.py`. +# +# * `activate_project`: Activates a project by name. +# * `check_onboarding_performed`: Checks whether project onboarding was already performed. +# * `create_text_file`: Creates/overwrites a file in the project directory. +# * `delete_lines`: Deletes a range of lines within a file. +# * `delete_memory`: Deletes a memory from Serena's project-specific memory store. +# * `execute_shell_command`: Executes a shell command. +# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced. +# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type). +# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type). +# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. +# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file. +# * `initial_instructions`: Gets the initial instructions for the current project. +# Should only be used in settings where the system prompt cannot be set, +# e.g. in clients you have no control over, like Claude Desktop. +# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. +# * `insert_at_line`: Inserts content at a given line in a file. +# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. +# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). +# * `list_memories`: Lists memories in Serena's project-specific memory store. +# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). +# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context). +# * `read_file`: Reads a file within the project directory. +# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store. +# * `remove_project`: Removes a project from the Serena configuration. +# * `replace_lines`: Replaces a range of lines within a file with new content. +# * `replace_symbol_body`: Replaces the full definition of a symbol. +# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen. +# * `search_for_pattern`: Performs a search for a pattern in the project. +# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase. +# * `switch_modes`: Activates modes by providing a list of their names +# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information. +# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task. +# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed. +# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store. +excluded_tools: [] + +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). +initial_prompt: "" + +project_name: "srtla" +included_optional_tools: [] diff --git a/CMakeLists.txt b/CMakeLists.txt index 12d08ef..c0407fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,88 +1,121 @@ -cmake_minimum_required(VERSION 3.16) -project(srtla VERSION 1.0.0 LANGUAGES C CXX) - -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules") - -# Options -option(SRTLA_BUILD_TESTS "Build tests" ON) -option(SRTLA_ENABLE_CLANG_TIDY "Enable clang-tidy static analysis" OFF) - -find_package(spdlog REQUIRED) - -# Clang-tidy integration -if(SRTLA_ENABLE_CLANG_TIDY) - find_program(CLANG_TIDY_EXE NAMES clang-tidy) - if(CLANG_TIDY_EXE) - message(STATUS "clang-tidy found: ${CLANG_TIDY_EXE}") - set(CMAKE_C_CLANG_TIDY "${CLANG_TIDY_EXE}") - set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_EXE}") - else() - message(WARNING "clang-tidy requested but not found") - endif() -endif() - -add_library(common_obj OBJECT - src/common.c - src/common.h) - -add_executable(srtla_rec - src/receiver.cpp - src/receiver.h) - -target_include_directories(srtla_rec PRIVATE - "deps/argparse/include") -target_link_libraries(srtla_rec PRIVATE - common_obj - spdlog::spdlog - -Wl,-rpath,/usr/local/lib -) -target_compile_features(srtla_rec PRIVATE cxx_std_17) -target_compile_options(srtla_rec PRIVATE -Wall -Wextra) -target_compile_definitions(srtla_rec PUBLIC VERSION="${CMAKE_PROJECT_VERSION}") - -add_executable(srtla_send - src/sender.cpp - src/sender.h) - -target_include_directories(srtla_send PRIVATE - "deps/argparse/include") -target_link_libraries(srtla_send PRIVATE - common_obj - spdlog::spdlog - -Wl,-rpath,/usr/local/lib -) -target_compile_features(srtla_send PRIVATE cxx_std_17) -target_compile_options(srtla_send PRIVATE -Wall -Wextra) -target_compile_definitions(srtla_send PUBLIC VERSION="${CMAKE_PROJECT_VERSION}") - -set(CMAKE_BUILD_TYPE RelWithDebInfo) -install(TARGETS srtla_rec srtla_send RUNTIME DESTINATION bin) - -# Testing -if(SRTLA_BUILD_TESTS) - enable_testing() - add_subdirectory(tests) -endif() - -# Custom lint target for manual clang-tidy runs -add_custom_target(lint - COMMAND ${CMAKE_COMMAND} -E echo "Running clang-tidy on C files..." - COMMAND clang-tidy - ${CMAKE_CURRENT_SOURCE_DIR}/src/common.c - -- - -I${CMAKE_CURRENT_SOURCE_DIR}/src - -std=c11 - -D_GNU_SOURCE - COMMAND ${CMAKE_COMMAND} -E echo "Running clang-tidy on C++ files..." - COMMAND clang-tidy - ${CMAKE_CURRENT_SOURCE_DIR}/src/receiver.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/sender.cpp - -- - -I${CMAKE_CURRENT_SOURCE_DIR}/deps/argparse/include - -I${CMAKE_CURRENT_SOURCE_DIR}/src - -std=c++17 - -D_GNU_SOURCE - '-DVERSION="${CMAKE_PROJECT_VERSION}"' - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Running clang-tidy static analysis..." -) +cmake_minimum_required(VERSION 3.16) +project(srtla VERSION 1.0.0 LANGUAGES C CXX) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules") + +# Options +option(SRTLA_BUILD_TESTS "Build tests" ON) +option(SRTLA_ENABLE_CLANG_TIDY "Enable clang-tidy static analysis" OFF) + +include(FetchContent) + +# Fetch and build spdlog statically +FetchContent_Declare( + spdlog + GIT_REPOSITORY https://github.com/irlserver/spdlog.git + GIT_TAG 1.9.2 +) +set(SPDLOG_BUILD_SHARED OFF CACHE BOOL "Build spdlog as shared library") +set(SPDLOG_BUILD_EXAMPLE OFF CACHE BOOL "Build spdlog examples") +FetchContent_MakeAvailable(spdlog) + +# Clang-tidy integration +if(SRTLA_ENABLE_CLANG_TIDY) + find_program(CLANG_TIDY_EXE NAMES clang-tidy) + if(CLANG_TIDY_EXE) + message(STATUS "clang-tidy found: ${CLANG_TIDY_EXE}") + set(CMAKE_C_CLANG_TIDY "${CLANG_TIDY_EXE}") + set(CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_EXE}") + else() + message(WARNING "clang-tidy requested but not found") + endif() +endif() + +add_library(common_obj OBJECT + src/common.c + src/common.h) + +add_executable(srtla_rec + src/receiver_main.cpp + src/connection/connection.cpp + src/connection/connection_group.cpp + src/connection/connection_registry.cpp + src/quality/metrics_collector.cpp + src/quality/quality_evaluator.cpp + src/quality/load_balancer.cpp + src/protocol/srtla_handler.cpp + src/protocol/srt_handler.cpp + src/utils/network_utils.cpp + src/utils/nak_dedup.cpp) + +target_include_directories(srtla_rec PRIVATE + "deps/argparse/include" + "${CMAKE_CURRENT_SOURCE_DIR}/src") + +target_link_libraries(srtla_rec PRIVATE + common_obj + spdlog::spdlog +) +target_compile_features(srtla_rec PRIVATE cxx_std_17) +target_compile_options(srtla_rec PRIVATE -Wall -Wextra) +target_compile_definitions(srtla_rec PUBLIC VERSION="${CMAKE_PROJECT_VERSION}") + +add_executable(srtla_send + src/sender.cpp + src/sender.h) + +target_include_directories(srtla_send PRIVATE + "deps/argparse/include") +target_link_libraries(srtla_send PRIVATE + common_obj + spdlog::spdlog +) +target_compile_features(srtla_send PRIVATE cxx_std_17) +target_compile_options(srtla_send PRIVATE -Wall -Wextra) +target_compile_definitions(srtla_send PUBLIC VERSION="${CMAKE_PROJECT_VERSION}") + +if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Build type" FORCE) +endif() +install(TARGETS srtla_rec srtla_send RUNTIME DESTINATION bin) + +# Testing +if(SRTLA_BUILD_TESTS) + enable_testing() + if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tests/CMakeLists.txt") + add_subdirectory(tests) + endif() +endif() + +# Custom lint target for manual clang-tidy runs +add_custom_target(lint + COMMAND ${CMAKE_COMMAND} -E echo "Running clang-tidy on C files..." + COMMAND clang-tidy + ${CMAKE_CURRENT_SOURCE_DIR}/src/common.c + -- + -I${CMAKE_CURRENT_SOURCE_DIR}/src + -std=c11 + -D_GNU_SOURCE + COMMAND ${CMAKE_COMMAND} -E echo "Running clang-tidy on C++ files..." + COMMAND clang-tidy + ${CMAKE_CURRENT_SOURCE_DIR}/src/receiver_main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/sender.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/connection/connection.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/connection/connection_group.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/connection/connection_registry.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/quality/metrics_collector.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/quality/quality_evaluator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/quality/load_balancer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/protocol/srtla_handler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/protocol/srt_handler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/network_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/nak_dedup.cpp + -- + -I${CMAKE_CURRENT_SOURCE_DIR}/deps/argparse/include + -I${CMAKE_CURRENT_SOURCE_DIR}/src + -std=c++17 + -D_GNU_SOURCE + '-DVERSION="${CMAKE_PROJECT_VERSION}"' + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Running clang-tidy static analysis..." +) diff --git a/EXTENDED_KEEPALIVE_FIX.md b/EXTENDED_KEEPALIVE_FIX.md new file mode 100644 index 0000000..8878d5c --- /dev/null +++ b/EXTENDED_KEEPALIVE_FIX.md @@ -0,0 +1,145 @@ +# Extended Keepalive Feedback Loop Fix + +## Problem + +When using srtla_send with extended keepalives (38-byte keepalives with connection_info_t), +one connection would drop to 0 bandwidth and never recover, while the other connection +carried 100% of the traffic. This did NOT occur with vanilla srtla_send (minimal 2-byte keepalives). + +## Root Cause Analysis + +### The Feedback Loop + +1. **Initial state**: Both connections share traffic load +2. **Minor network event**: One connection experiences slight degradation (e.g., packet loss) +3. **Client reduces usage**: Sender uses the degraded connection less +4. **Connection becomes idle**: Idle connections send extended keepalives (by design) +5. **Receiver measures 0 bandwidth**: Since connection is idle, receiver-side bandwidth measurement = 0 +6. **Heavy bandwidth penalty**: Receiver applies 40 error points for performance_ratio < 0.3 +7. **ACK throttling**: 40+ error points → WEIGHT_CRITICAL → 20% ACK throttle +8. **Client further reduces usage**: Fewer ACKs → lower window growth → connection scored poorly +9. **Permanent 0 bandwidth**: Connection locked at 0, never recovers + +### Why It Only Happens with Extended Keepalives + +- **Legacy senders (minimal keepalives)**: Idle connections don't provide telemetry, so receiver + can't distinguish them as clearly. Bandwidth penalties apply but without the enhanced evaluation, + the feedback loop is less severe. + +- **Extended keepalives**: Idle connections send full telemetry, triggering "full evaluation mode". + Receiver confidently applies aggressive bandwidth penalties, creating a strong feedback loop. + +## Solution + +### 1. Lighter Bandwidth Penalties for Connections with Telemetry + +**File**: `src/quality/quality_evaluator.cpp:175-203` + +For connections WITH sender telemetry (extended keepalives): +- Reduce bandwidth penalty from 40 → 10 points (for performance_ratio < 0.3) +- Reduce other tiers proportionally +- Rely more on telemetry metrics (RTT, NAK rate, window utilization) as primary indicators + +For connections WITHOUT telemetry (legacy senders): +- Keep original aggressive penalties (40 points for < 0.3) +- Bandwidth remains the primary quality indicator + +**Rationale**: +- Bandwidth penalties create feedback loops with ACK throttling +- When we have telemetry, we can use more direct quality indicators (packet loss, RTT, NAKs) +- Legacy senders need bandwidth penalties as they lack alternative quality signals + +### 2. Recovery Boost for Throttled Connections + +**File**: `src/quality/load_balancer.cpp:86-96` + +For connections with recent telemetry that are heavily throttled (<50%) but show improvement +(error points < 15): +- Apply a 15% throttle boost (up to 60% max) +- This helps connections escape the feedback loop when network quality improves + +Only applies to connections with sender telemetry. Legacy senders don't get this boost. + +**Rationale**: +- Breaks the feedback loop: low throttle → low usage → low bandwidth → low throttle +- Only applies when connection has actually improved (error points dropped) +- Conservative boost (15%) prevents over-correction + +## Expected Behavior After Fix + +### With Extended Keepalives (srtla_send) + +**Before**: +``` +[::ffff:51973] BW: 7469 kbps, Loss: 0%, Error: 0, Weight: 100%, Throttle: 1.00 +[::ffff:47884] BW: 0 kbps, Loss: 0%, Error: 40, Weight: 10%, Throttle: 0.20 ← STUCK +``` + +**After**: +``` +[::ffff:51973] BW: 7200 kbps, Loss: 0%, Error: 0, Weight: 100%, Throttle: 1.00 +[::ffff:47884] BW: 300 kbps, Loss: 0%, Error: 10, Weight: 70%, Throttle: 0.70 ← RECOVERED +``` + +Idle connections get lower error points (10 instead of 40), enabling them to participate +in load balancing when they receive traffic again. + +### With Legacy Keepalives (vanilla srtla_send) + +**Behavior unchanged** - legacy senders continue to use original bandwidth penalty logic +since they lack alternative quality signals. + +## Technical Details + +### Bandwidth Penalty Comparison + +| Performance Ratio | Legacy Senders | With Telemetry | +|-------------------|----------------|----------------| +| < 0.3 | 40 points | 10 points | +| 0.3 - 0.5 | 25 points | 7 points | +| 0.5 - 0.7 | 15 points | 4 points | +| 0.7 - 0.85 | 5 points | 2 points | + +### Recovery Boost Logic + +```cpp +if (has_recent_telemetry && old_throttle < 0.5 && error_points < 15) { + new_throttle = min(new_throttle + 0.15, 0.6); +} +``` + +Conditions: +1. Connection must have sent extended keepalives recently +2. Current throttle must be below 50% (heavily throttled) +3. Error points must be below 15 (showing improvement) + +Result: Throttle boosted by 15%, capped at 60% + +## Testing + +Test the fix by: + +1. **Extended keepalive scenario**: + - Use srtla_send with extended keepalives + - Verify both connections participate in load balancing + - Temporarily degrade one connection (artificial packet loss) + - Verify connection recovers when packet loss stops + +2. **Legacy scenario**: + - Use vanilla srtla_send (minimal keepalives) + - Verify behavior is unchanged from before + - Confirm aggressive bandwidth penalties still apply + +## Files Modified + +- `src/quality/quality_evaluator.cpp`: Conditional bandwidth penalties +- `src/quality/load_balancer.cpp`: Recovery boost for throttled connections + +## Backward Compatibility + +✅ **Fully backward compatible** + +- Legacy senders: No behavioral change +- Extended keepalives: Fixed feedback loop issue +- No protocol changes +- No configuration changes needed diff --git a/README.md b/README.md index 608f732..12b68e2 100644 --- a/README.md +++ b/README.md @@ -1,147 +1,246 @@ -# SRTLA - SRT Link Aggregation - -SRTLA bonds multiple network connections together for live video streaming, providing increased bandwidth and redundancy. - -This is a fork of the [BELABOX SRTLA project](https://github.com/BELABOX/srtla), with contributions from IRLToolkit, IRLServer, and CeraLive. - -## What It Does - -``` -┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ -│ Encoder │────▶│ srtla_send │═══════════════▶│ srtla_rec │────▶│ SRT Server │ -│ (SRT) │ │ │ Multiple IPs │ │ │ │ -└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ - │ - ┌─────┴─────┐ - ▼ ▼ ▼ - LTE LTE WiFi - 1 2 -``` - -- **Combine bandwidth**: 3× 5Mbps connections → ~15Mbps total -- **Redundancy**: One link fails, others continue -- **Adaptive**: Better links automatically get more traffic - -## Quick Start - -### Receiver (Server Side) - -```bash -srtla_rec --srtla_port 5000 --srt_hostname 127.0.0.1 --srt_port 5001 -``` - -### Sender (Encoder Side) - -**With CeraUI** (recommended): The IP list is managed automatically. CeraUI detects network interfaces, writes the IP file, and signals `srtla_send` when interfaces change. - -**Standalone usage**: - -1. Create IP list file: - ```bash - echo "10.0.0.10" > /tmp/srtla_ips # usb0 IP - echo "10.0.1.10" >> /tmp/srtla_ips # usb1 IP - echo "192.168.1.50" >> /tmp/srtla_ips # wlan0 IP - ``` - -2. Start sender: - ```bash - srtla_send 5000 relay.example.com 5001 /tmp/srtla_ips - ``` - -3. Configure encoder to send SRT to `localhost:5000` - -4. When interfaces change, update the file and signal reload: - ```bash - kill -HUP $(pidof srtla_send) - ``` - -### ⚠️ Critical: Network Setup Required! - -**SRTLA will NOT work correctly without source-based routing!** - -Without it, all traffic goes through one interface regardless of which source IP is used. - -See **[Network Setup Guide](docs/NETWORK_SETUP.md)** for step-by-step instructions. - -## Documentation - -| Document | Description | -|----------|-------------| -| [Network Setup](docs/NETWORK_SETUP.md) | **Start here!** Routing config, IP list management | -| [How It Works](docs/HOW_IT_WORKS.md) | Protocol details, architecture, congestion control | -| [Troubleshooting](docs/TROUBLESHOOTING.md) | Common issues and solutions | - -> **Note**: If using CeraUI, the IP list (`/tmp/srtla_ips`) is managed automatically. See [Managing the IP List](docs/NETWORK_SETUP.md#managing-the-ip-list) for details. - -## Building - -```bash -mkdir build && cd build -cmake .. -make -sudo make install -``` - -**Dependencies:** -- CMake 3.16+ -- spdlog -- C++17 compiler - -## Command Reference - -### srtla_send - -```bash -srtla_send [--verbose] -``` - -| Argument | Description | Default | -|----------|-------------|---------| -| `listen_port` | Port for local SRT encoder | 5000 | -| `srtla_host` | Remote SRTLA receiver hostname | 127.0.0.1 | -| `srtla_port` | Remote SRTLA receiver port | 5001 | -| `ips_file` | File with source IPs (one per line) | /tmp/srtla_ips | -| `--verbose` | Enable debug logging | off | - -**Signals:** -- `SIGHUP`: Reload IP list without restart - -### srtla_rec - -```bash -srtla_rec --srtla_port --srt_hostname --srt_port [--verbose] -``` - -| Argument | Description | Default | -|----------|-------------|---------| -| `--srtla_port` | Listen port for SRTLA connections | 5000 | -| `--srt_hostname` | Downstream SRT server | 127.0.0.1 | -| `--srt_port` | Downstream SRT port | 5001 | -| `--verbose` | Enable debug logging | off | - -## Setup Checklist - -- [ ] Routing tables added to `/etc/iproute2/rt_tables` -- [ ] DHCP hook installed for USB/Ethernet modems -- [ ] NetworkManager dispatcher installed for WiFi -- [ ] Public DNS configured -- [ ] UDP buffer sizes increased (`sysctl`) -- [ ] Firewall allows UDP traffic -- [ ] Source IPs file created -- [ ] Verified with `ip route get ... from ` - -## Support the Project - -If you find SRTLA useful, consider supporting CeraLive development: - -- ☕ [Ko-fi](https://ko-fi.com/andrescera) -- 💳 [PayPal](https://www.paypal.com/donate/?business=7KKQS9KBSAMNE&no_recurring=0&item_name=CERALIVE+Development+Support¤cy_code=USD) - -## License - -GNU Affero General Public License v3.0 (AGPL-3.0) - -Copyright (C) 2020-2021 BELABOX project -Copyright (C) 2024 IRLToolkit Inc. -Copyright (C) 2025 IRLServer.com -Copyright (C) 2025 CeraLive +# SRTLA - SRT Link Aggregation + +SRTLA bonds multiple network connections together for live video streaming, providing increased bandwidth and redundancy. + +This is a fork of the [BELABOX SRTLA project](https://github.com/BELABOX/srtla), with contributions from IRLToolkit, IRLServer, OpenIRL, and CeraLive. + +## What It Does + +``` +┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ Encoder │────▶│ srtla_send │═══════════════▶│ srtla_rec │────▶│ SRT Server │ +│ (SRT) │ │ │ Multiple IPs │ │ │ │ +└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ + │ + ┌─────┴─────┐ + ▼ ▼ ▼ + LTE LTE WiFi + 1 2 +``` + +- **Combine bandwidth**: 3× 5Mbps connections → ~15Mbps total +- **Redundancy**: One link fails, others continue +- **Adaptive**: Better links automatically get more traffic + +## Features + +- Support for link aggregation across multiple network connections +- Automatic management of connection groups and individual connections +- Robust error handling and timeouts for inactive connections +- Logging of connection details for easy diagnostics +- Improved load balancing through ACK throttling +- Connection recovery mechanism for temporary network issues + +## Quick Start + +### Receiver (Server Side) + +```bash +srtla_rec --srtla_port 5000 --srt_hostname 127.0.0.1 --srt_port 5001 +``` + +### Sender (Encoder Side) + +**With CeraUI** (recommended): The IP list is managed automatically. CeraUI detects network interfaces, writes the IP file, and signals `srtla_send` when interfaces change. + +**Standalone usage**: + +1. Create IP list file: + ```bash + echo "10.0.0.10" > /tmp/srtla_ips # usb0 IP + echo "10.0.1.10" >> /tmp/srtla_ips # usb1 IP + echo "192.168.1.50" >> /tmp/srtla_ips # wlan0 IP + ``` + +2. Start sender: + ```bash + srtla_send 5000 relay.example.com 5001 /tmp/srtla_ips + ``` + +3. Configure encoder to send SRT to `localhost:5000` + +4. When interfaces change, update the file and signal reload: + ```bash + kill -HUP $(pidof srtla_send) + ``` + +### Critical: Network Setup Required! + +**SRTLA will NOT work correctly without source-based routing!** + +Without it, all traffic goes through one interface regardless of which source IP is used. + +See **[Network Setup Guide](docs/NETWORK_SETUP.md)** for step-by-step instructions. + +## Building + +```bash +mkdir build && cd build +cmake .. +make +sudo make install +``` + +**Dependencies:** +- CMake 3.16+ +- C++17 compiler +- spdlog (fetched automatically via CMake) +- argparse (included in deps/) + +## Documentation + +| Document | Description | +|----------|-------------| +| [Network Setup](docs/NETWORK_SETUP.md) | **Start here!** Routing config, IP list management | +| [How It Works](docs/HOW_IT_WORKS.md) | Protocol details, architecture, congestion control | +| [Troubleshooting](docs/TROUBLESHOOTING.md) | Common issues and solutions | +| [Connection Info Comparison](docs/connection-info-comparison.md) | Connection metrics and comparison | +| [Keepalive Improvements](docs/keepalive-improvements.md) | Extended keepalive fix documentation | + +> **Note**: If using CeraUI, the IP list (`/tmp/srtla_ips`) is managed automatically. See [Managing the IP List](docs/NETWORK_SETUP.md#managing-the-ip-list) for details. + +## Command Reference + +### srtla_send + +```bash +srtla_send [--verbose] +``` + +| Argument | Description | Default | +|----------|-------------|---------| +| `listen_port` | Port for local SRT encoder | 5000 | +| `srtla_host` | Remote SRTLA receiver hostname | 127.0.0.1 | +| `srtla_port` | Remote SRTLA receiver port | 5001 | +| `ips_file` | File with source IPs (one per line) | /tmp/srtla_ips | +| `--verbose` | Enable debug logging | off | + +**Signals:** +- `SIGHUP`: Reload IP list without restart + +### srtla_rec + +```bash +srtla_rec --srtla_port --srt_hostname --srt_port [--verbose] [--debug] +``` + +| Argument | Description | Default | +|----------|-------------|---------| +| `--srtla_port` | Listen port for SRTLA connections | 5000 | +| `--srt_hostname` | Downstream SRT server | 127.0.0.1 | +| `--srt_port` | Downstream SRT port | 4001 | +| `--verbose` | Enable verbose logging | off | +| `--debug` | Enable debug logging | off | + +## Technical Details + +### How It Works + +1. srtla_rec creates a UDP socket for incoming SRTLA connections. +2. Clients register with srtla_rec and create connection groups. +3. Multiple connections can be added to a group. +4. Data is received across all connections and forwarded to the SRT server. +5. ACK packets are sent across all connections for timely delivery. +6. Inactive connections and groups are automatically cleaned up. + +### Two-phase Registration Process + +- Sender (conn 0): `SRTLA_REG1` (contains sender-generated random ID) +- Receiver: `SRTLA_REG2` (contains full ID with receiver-generated values) +- Sender (conn 0): `SRTLA_REG2` (with full ID) +- Receiver: `SRTLA_REG3` +- Additional connections follow a similar pattern + +### Error Handling + +The receiver can send error responses: +- `SRTLA_REG_ERR`: Operation temporarily failed +- `SRTLA_REG_NGP`: Invalid ID, group must be re-registered + +## Enhanced Load Balancing and Recovery + +This version includes improvements to address key issues in the original implementation: + +### Connection Recovery + +In the original implementation, connections with temporary problems were completely disabled. Now: + +- Connections showing signs of recovery enter a "recovery mode" +- These connections receive more frequent keepalive packets for a set period (5 seconds) +- After successful recovery, they are fully reactivated for data transmission +- Recovery attempts are abandoned after a certain time if unsuccessful + +### ACK Throttling for Load Balancing + +The central innovation is ACK throttling for load distribution: + +1. The SRT/SRTLA client (srtla_send) selects connections based on a score derived from window size and in-flight packets +2. The window size in the client is adjusted when ACKs are received +3. By selectively throttling ACK frequency, we indirectly control how quickly the window grows +4. This causes the client to prefer better connections without requiring client modifications + +### Connection Quality Assessment + +Connection quality is assessed by measuring: + +- **Bandwidth Performance**: Compares actual bandwidth to expected bandwidth +- **Packet Loss**: Higher loss rates lead to more error points +- **Dynamic Bandwidth Evaluation**: Connections evaluated against median or minimum thresholds +- **Grace Period**: New connections receive a 10-second grace period before penalties + +Weight levels: +- 100% (WEIGHT_FULL): Optimal connection +- 85% (WEIGHT_EXCELLENT): Excellent connection +- 70% (WEIGHT_DEGRADED): Slightly impaired connection +- 55% (WEIGHT_FAIR): Fair connection +- 40% (WEIGHT_POOR): Severely impaired connection +- 10% (WEIGHT_CRITICAL): Critically impaired connection + +### Configuration Parameters + +Adjustable parameters for optimization: + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `KEEPALIVE_PERIOD` | 1s | Interval for keepalive packets during recovery | +| `RECOVERY_CHANCE_PERIOD` | 5s | Period for connection recovery attempt | +| `CONN_QUALITY_EVAL_PERIOD` | 5s | Interval for evaluating connection quality | +| `ACK_THROTTLE_INTERVAL` | 100ms | Base interval for ACK throttling | +| `MIN_ACK_RATE` | 20% | Minimum ACK rate to keep connections alive | +| `MIN_ACCEPTABLE_TOTAL_BANDWIDTH_KBPS` | 1000 | Minimum total bandwidth for acceptable streaming | +| `GOOD_CONNECTION_THRESHOLD` | 50% | Threshold for considering a connection "good" | +| `CONNECTION_GRACE_PERIOD` | 10s | Grace period before applying penalties | + +## Socket Information + +srtla_rec creates information files about active connections under `/tmp/srtla-group-[PORT]`. These files contain the client IP addresses connected to a specific socket. + +## Setup Checklist + +- [ ] Routing tables added to `/etc/iproute2/rt_tables` +- [ ] DHCP hook installed for USB/Ethernet modems +- [ ] NetworkManager dispatcher installed for WiFi +- [ ] Public DNS configured +- [ ] UDP buffer sizes increased (`sysctl`) +- [ ] Firewall allows UDP traffic +- [ ] Source IPs file created +- [ ] Verified with `ip route get ... from ` + +## Support the Project + +If you find SRTLA useful, consider supporting CeraLive development: + +- [Ko-fi](https://ko-fi.com/andrescera) +- [PayPal](https://www.paypal.com/donate/?business=7KKQS9KBSAMNE&no_recurring=0&item_name=CERALIVE+Development+Support¤cy_code=USD) + +## License + +GNU Affero General Public License v3.0 (AGPL-3.0) + +Copyright (C) 2020-2021 BELABOX project +Copyright (C) 2024 IRLToolkit Inc. +Copyright (C) 2024 OpenIRL +Copyright (C) 2025 IRLServer.com +Copyright (C) 2025 CeraLive + +You can use, modify, and distribute this code according to the terms of the AGPL-3.0. diff --git a/bindings/typescript/dist/receiver/args.d.ts b/bindings/typescript/dist/receiver/args.d.ts index 6c287c9..e8609b5 100644 --- a/bindings/typescript/dist/receiver/args.d.ts +++ b/bindings/typescript/dist/receiver/args.d.ts @@ -5,6 +5,6 @@ export interface SrtlaRecArgsResult { } /** * Build CLI args for srtla_rec. - * Shape: --srtla_port --srt_hostname --srt_port [--verbose] + * Shape: --srtla_port --srt_hostname --srt_port [--log_level ] */ export declare function buildSrtlaRecArgs(input: SrtlaRecOptionsInput): SrtlaRecArgsResult; diff --git a/bindings/typescript/dist/receiver/args.js b/bindings/typescript/dist/receiver/args.js index 00455df..870b394 100644 --- a/bindings/typescript/dist/receiver/args.js +++ b/bindings/typescript/dist/receiver/args.js @@ -1,7 +1,7 @@ import { srtlaRecOptionsSchema } from "./types.js"; /** * Build CLI args for srtla_rec. - * Shape: --srtla_port --srt_hostname --srt_port [--verbose] + * Shape: --srtla_port --srt_hostname --srt_port [--log_level ] */ export function buildSrtlaRecArgs(input) { const options = srtlaRecOptionsSchema.parse(input); @@ -13,8 +13,8 @@ export function buildSrtlaRecArgs(input) { "--srt_port", String(options.srtPort), ]; - if (options.verbose) { - args.push("--verbose"); + if (options.logLevel) { + args.push("--log_level", options.logLevel); } return { args, options }; } diff --git a/bindings/typescript/dist/receiver/args.test.js b/bindings/typescript/dist/receiver/args.test.js index 01ee7dd..ad6cb2a 100644 --- a/bindings/typescript/dist/receiver/args.test.js +++ b/bindings/typescript/dist/receiver/args.test.js @@ -9,18 +9,18 @@ describe("buildSrtlaRecArgs", () => { "--srt_hostname", "127.0.0.1", "--srt_port", - "5001", + "4001", ]); expect(options.srtlaPort).toBe(5000); expect(options.srtHostname).toBe("127.0.0.1"); - expect(options.srtPort).toBe(5001); + expect(options.srtPort).toBe(4001); }); - test("includes verbose flag when set", () => { + test("includes log_level when set", () => { const { args } = buildSrtlaRecArgs({ srtlaPort: 6000, srtHostname: "0.0.0.0", srtPort: 6001, - verbose: true, + logLevel: "debug", }); expect(args.slice(0, 6)).toEqual([ "--srtla_port", @@ -30,6 +30,13 @@ describe("buildSrtlaRecArgs", () => { "--srt_port", "6001", ]); - expect(args[args.length - 1]).toBe("--verbose"); + expect(args).toContain("--log_level"); + expect(args).toContain("debug"); + }); + test("omits log_level when not set", () => { + const { args } = buildSrtlaRecArgs({ + srtlaPort: 5000, + }); + expect(args).not.toContain("--log_level"); }); }); diff --git a/bindings/typescript/dist/receiver/process.d.ts b/bindings/typescript/dist/receiver/process.d.ts index a8dc90d..1160378 100644 --- a/bindings/typescript/dist/receiver/process.d.ts +++ b/bindings/typescript/dist/receiver/process.d.ts @@ -6,11 +6,11 @@ export interface SpawnSrtlaRecOptions { spawnOptions?: SpawnOptions; } export declare function getSrtlaRecExec(execPath?: string): string; -export declare function spawnSrtlaRec(options: SpawnSrtlaRecOptions): ChildProcess; +export declare function spawnSrtlaRec(options: SpawnSrtlaRecOptions): import("node:child_process").ChildProcess; export declare function sendSrtlaRecHup(): Promise; export declare function sendSrtlaRecTerm(): Promise; export declare function isSrtlaRecRunning(): Promise; /** * Convenience: build args from options and spawn the process. */ -export declare function buildAndSpawnSrtlaRec(options: SrtlaRecOptionsInput, spawnOptions?: SpawnOptions): ChildProcess; +export declare function buildAndSpawnSrtlaRec(options: SrtlaRecOptionsInput, spawnOptions?: SpawnOptions): import("node:child_process").ChildProcess; diff --git a/bindings/typescript/dist/receiver/types.d.ts b/bindings/typescript/dist/receiver/types.d.ts index 6e78154..f653383 100644 --- a/bindings/typescript/dist/receiver/types.d.ts +++ b/bindings/typescript/dist/receiver/types.d.ts @@ -1,4 +1,26 @@ import { z } from "zod"; -export declare const srtlaRecOptionsSchema: any; +export declare const logLevelSchema: z.ZodEnum<{ + error: "error"; + trace: "trace"; + debug: "debug"; + info: "info"; + warn: "warn"; + critical: "critical"; +}>; +export type LogLevel = z.infer; +export declare const srtlaRecOptionsSchema: z.ZodObject<{ + srtlaPort: z.ZodDefault; + srtHostname: z.ZodDefault; + srtPort: z.ZodDefault; + logLevel: z.ZodOptional>; + execPath: z.ZodOptional; +}, z.core.$strip>; export type SrtlaRecOptionsInput = z.input; export type SrtlaRecOptions = z.output; diff --git a/bindings/typescript/dist/receiver/types.js b/bindings/typescript/dist/receiver/types.js index 0cfea7f..ac3cf45 100644 --- a/bindings/typescript/dist/receiver/types.js +++ b/bindings/typescript/dist/receiver/types.js @@ -1,8 +1,16 @@ import { z } from "zod"; +export const logLevelSchema = z.enum([ + "trace", + "debug", + "info", + "warn", + "error", + "critical", +]); export const srtlaRecOptionsSchema = z.object({ srtlaPort: z.number().int().min(1).max(65535).default(5000), srtHostname: z.string().min(1).default("127.0.0.1"), - srtPort: z.number().int().min(1).max(65535).default(5001), - verbose: z.boolean().optional(), + srtPort: z.number().int().min(1).max(65535).default(4001), + logLevel: logLevelSchema.optional(), execPath: z.string().optional(), }); diff --git a/bindings/typescript/dist/sender/process.d.ts b/bindings/typescript/dist/sender/process.d.ts index 43980d9..5b4cd33 100644 --- a/bindings/typescript/dist/sender/process.d.ts +++ b/bindings/typescript/dist/sender/process.d.ts @@ -6,11 +6,11 @@ export interface SpawnSrtlaSendOptions { spawnOptions?: SpawnOptions; } export declare function getSrtlaSendExec(execPath?: string): string; -export declare function spawnSrtlaSend(options: SpawnSrtlaSendOptions): ChildProcess; +export declare function spawnSrtlaSend(options: SpawnSrtlaSendOptions): import("node:child_process").ChildProcess; export declare function sendSrtlaSendHup(): Promise; export declare function sendSrtlaSendTerm(): Promise; export declare function isSrtlaSendRunning(): Promise; /** * Convenience: build args from options and spawn the process. */ -export declare function buildAndSpawnSrtlaSend(options: SrtlaSendOptionsInput, spawnOptions?: SpawnOptions): ChildProcess; +export declare function buildAndSpawnSrtlaSend(options: SrtlaSendOptionsInput, spawnOptions?: SpawnOptions): import("node:child_process").ChildProcess; diff --git a/bindings/typescript/dist/sender/types.d.ts b/bindings/typescript/dist/sender/types.d.ts index 8796b6c..fdf0d6c 100644 --- a/bindings/typescript/dist/sender/types.d.ts +++ b/bindings/typescript/dist/sender/types.d.ts @@ -1,4 +1,11 @@ import { z } from "zod"; -export declare const srtlaSendOptionsSchema: any; +export declare const srtlaSendOptionsSchema: z.ZodObject<{ + listenPort: z.ZodDefault; + srtlaHost: z.ZodString; + srtlaPort: z.ZodDefault; + ipsFile: z.ZodDefault; + verbose: z.ZodOptional; + execPath: z.ZodOptional; +}, z.core.$strip>; export type SrtlaSendOptionsInput = z.input; export type SrtlaSendOptions = z.output; diff --git a/bindings/typescript/dist/shared/ip-list.d.ts b/bindings/typescript/dist/shared/ip-list.d.ts index e409ed6..4ab33e5 100644 --- a/bindings/typescript/dist/shared/ip-list.d.ts +++ b/bindings/typescript/dist/shared/ip-list.d.ts @@ -1,5 +1,5 @@ import { z } from "zod"; -export declare const ipListSchema: any; +export declare const ipListSchema: z.ZodArray; export type IpList = z.output; export type IpListInput = z.input; /** diff --git a/bindings/typescript/src/receiver/args.test.ts b/bindings/typescript/src/receiver/args.test.ts index 2f5e888..d07bd91 100644 --- a/bindings/typescript/src/receiver/args.test.ts +++ b/bindings/typescript/src/receiver/args.test.ts @@ -12,19 +12,19 @@ describe("buildSrtlaRecArgs", () => { "--srt_hostname", "127.0.0.1", "--srt_port", - "5001", + "4001", ]); expect(options.srtlaPort).toBe(5000); expect(options.srtHostname).toBe("127.0.0.1"); - expect(options.srtPort).toBe(5001); + expect(options.srtPort).toBe(4001); }); - test("includes verbose flag when set", () => { + test("includes log_level when set", () => { const { args } = buildSrtlaRecArgs({ srtlaPort: 6000, srtHostname: "0.0.0.0", srtPort: 6001, - verbose: true, + logLevel: "debug", }); expect(args.slice(0, 6)).toEqual([ @@ -35,6 +35,15 @@ describe("buildSrtlaRecArgs", () => { "--srt_port", "6001", ]); - expect(args[args.length - 1]).toBe("--verbose"); + expect(args).toContain("--log_level"); + expect(args).toContain("debug"); + }); + + test("omits log_level when not set", () => { + const { args } = buildSrtlaRecArgs({ + srtlaPort: 5000, + }); + + expect(args).not.toContain("--log_level"); }); }); diff --git a/bindings/typescript/src/receiver/args.ts b/bindings/typescript/src/receiver/args.ts index e1b2b11..78e5e38 100644 --- a/bindings/typescript/src/receiver/args.ts +++ b/bindings/typescript/src/receiver/args.ts @@ -7,7 +7,7 @@ export interface SrtlaRecArgsResult { /** * Build CLI args for srtla_rec. - * Shape: --srtla_port --srt_hostname --srt_port [--verbose] + * Shape: --srtla_port --srt_hostname --srt_port [--log_level ] */ export function buildSrtlaRecArgs(input: SrtlaRecOptionsInput): SrtlaRecArgsResult { const options = srtlaRecOptionsSchema.parse(input); @@ -19,8 +19,8 @@ export function buildSrtlaRecArgs(input: SrtlaRecOptionsInput): SrtlaRecArgsResu "--srt_port", String(options.srtPort), ]; - if (options.verbose) { - args.push("--verbose"); + if (options.logLevel) { + args.push("--log_level", options.logLevel); } return { args, options }; } diff --git a/bindings/typescript/src/receiver/types.ts b/bindings/typescript/src/receiver/types.ts index d42370a..d2135fa 100644 --- a/bindings/typescript/src/receiver/types.ts +++ b/bindings/typescript/src/receiver/types.ts @@ -1,10 +1,21 @@ import { z } from "zod"; +export const logLevelSchema = z.enum([ + "trace", + "debug", + "info", + "warn", + "error", + "critical", +]); + +export type LogLevel = z.infer; + export const srtlaRecOptionsSchema = z.object({ srtlaPort: z.number().int().min(1).max(65535).default(5000), srtHostname: z.string().min(1).default("127.0.0.1"), - srtPort: z.number().int().min(1).max(65535).default(5001), - verbose: z.boolean().optional(), + srtPort: z.number().int().min(1).max(65535).default(4001), + logLevel: logLevelSchema.optional(), execPath: z.string().optional(), }); diff --git a/docs/connection-info-comparison.md b/docs/connection-info-comparison.md new file mode 100644 index 0000000..b6ab735 --- /dev/null +++ b/docs/connection-info-comparison.md @@ -0,0 +1,350 @@ +# Connection Info Algorithm: Real-Time Comparison Guide + +## Overview + +This implementation allows you to compare the **Connection Info algorithm** (with sender telemetry) against the **Legacy algorithm** (receiver-side metrics only) **in real-time on the same data stream**. + +Both algorithms run simultaneously on every connection evaluation, so you can see how they differ under identical network conditions without needing to replicate setups. + +## How It Works + +The system runs **both algorithms in parallel**: + +1. **Connection Info Algorithm** (NEW): Uses extended telemetry from keepalive packets + - RTT measurements from sender + - Window size and in-flight packets + - Sender NAK count + - Sender bitrate + - Receiver bandwidth and packet loss + +2. **Legacy Algorithm** (OLD): Uses only receiver-side measurements + - Receiver bandwidth (calculated from received bytes) + - Receiver packet loss rate + - No RTT, window, or sender NAK data + +Both algorithms produce: +- Error points (quality assessment) +- Weight percentage (connection quality: 100% = best, 10% = worst) +- ACK throttle factor (load balancing control: 1.0 = no throttling, 0.2 = minimum) + +## Comparison Mode Flag + +The comparison mode is controlled in `src/receiver_config.h:13-15`: + +```cpp +#define ENABLE_ALGO_COMPARISON 1 // Enable comparison (BOTH algorithms run) +#define ENABLE_ALGO_COMPARISON 0 // Disable comparison (production mode) +``` + +**Default: ENABLED** for development and testing. + +## Log Output + +### Keepalive Packet Logs (Always Shown) + +Every keepalive with connection info logs the detailed telemetry: + +``` +[INFO] [192.168.1.100:5000] [Group: 0x...] Per-connection keepalive: ID=0, BW: 2500.00 kbits/s, Window=8192, In-flight=120, RTT=45ms, NAKs=3 +``` + +### Algorithm Comparison Logs (When Enabled) + +When algorithms **disagree** (weight delta ≥ 5% OR error points delta ≥ 5), you'll see: + +``` +[INFO] [192.168.1.100:5000] [ALGO_CMP] ConnInfo: Err=15 W=70% T=0.70 | Legacy: Err=5 W=85% T=0.85 | Delta: E=+10 W=-15% T=-0.15 +``` + +This shows: +- **ConnInfo**: Connection Info algorithm results (uses sender telemetry) +- **Legacy**: Legacy algorithm results (receiver-side only) +- **Delta**: Difference (positive = ConnInfo more pessimistic, negative = Legacy more pessimistic) + +When algorithms **agree** (within 5% threshold), only debug logging occurs to reduce spam. + +### Load Balancer Adjustment Logs + +Every 5 seconds (or when quality changes), you'll see side-by-side comparison: + +``` +[INFO] [Group: 0x...] Connection parameters adjusted: +[INFO] [192.168.1.100:5000] [COMPARISON] ConnInfo: Weight=70%, Throttle=0.70, ErrPts=15 | Legacy: Weight=85%, Throttle=0.85, ErrPts=5 | Delta: W=-15%, T=-0.15, E=+10 +``` + +This shows the final decisions from both algorithms for all connections. + +## What the Deltas Mean + +### Error Points Delta + +- **Positive (+)**: Connection Info algorithm is **more pessimistic** (detected more issues) + - Likely due to RTT problems, NAK rate, or window congestion not visible to legacy +- **Negative (-)**: Legacy algorithm is **more pessimistic** + - Unusual; could happen if receiver sees packet loss that sender hasn't reported yet +- **Zero or small**: Both algorithms see similar connection quality + +### Weight Delta + +- **Positive (+)**: Connection Info gives **higher weight** (more optimistic) + - Rare; would indicate legacy is penalizing incorrectly +- **Negative (-)**: Connection Info gives **lower weight** (more pessimistic) + - Common; Connection Info detects RTT/NAK/window issues legacy misses +- **Zero or small**: Both algorithms agree on connection quality + +### Throttle Delta + +- **Positive (+)**: Connection Info throttles **less** (more aggressive ACKs) +- **Negative (-)**: Connection Info throttles **more** (fewer ACKs, shifts load away) +- Follows weight delta (throttle = max(0.2, weight/100)) + +## Key Differences Between Algorithms + +| Metric | Connection Info Algorithm | Legacy Algorithm | +|--------|--------------------------|------------------| +| **Bandwidth** | ✅ Receiver calculated | ✅ Receiver calculated | +| **Packet Loss** | ✅ Receiver detected | ✅ Receiver detected | +| **RTT** | ✅ Sender measurement | ❌ Not available | +| **RTT Variance** | ✅ Tracked (jitter penalty) | ❌ Not available | +| **Window Utilization** | ✅ Window/in-flight ratio | ❌ Not available | +| **Sender NAK Rate** | ✅ Sender-reported NAKs | ❌ Not available | +| **Bitrate Validation** | ✅ Sender vs receiver check | ❌ Not available | +| **Max Error Points** | Higher (RTT+NAK+window penalties) | Lower (bandwidth+loss only) | + +## Example Scenarios + +### Scenario 1: High RTT Connection + +**Keepalive:** +``` +[INFO] Per-connection keepalive: ID=0, BW: 2000.00 kbits/s, Window=8192, In-flight=50, RTT=350ms, NAKs=1 +``` + +**Comparison:** +``` +[ALGO_CMP] ConnInfo: Err=25 W=70% T=0.70 | Legacy: Err=5 W=85% T=0.85 | Delta: E=+20 W=-15% T=-0.15 +``` + +**Interpretation:** +- Connection Info detects high RTT (350ms > 200ms threshold) → +10 error points +- RTT variance penalty → +10 more error points +- Legacy only sees bandwidth/loss, doesn't detect RTT issue +- **Result**: Connection Info throttles more aggressively (shifts load to better connections) + +### Scenario 2: High NAK Rate + +**Keepalive:** +``` +[INFO] Per-connection keepalive: ID=1, BW: 1500.00 kbits/s, Window=4096, In-flight=2048, RTT=50ms, NAKs=500 +``` + +**Comparison:** +``` +[ALGO_CMP] ConnInfo: Err=50 W=10% T=0.20 | Legacy: Err=10 W=70% T=0.70 | Delta: E=+40 W=-60% T=-0.50 +``` + +**Interpretation:** +- High NAK rate (500 NAKs) → +20-40 error points (Connection Info only) +- High window utilization (2048/4096 = 50%) → potential congestion +- Legacy doesn't see sender NAKs, only receiver packet loss +- **Result**: Connection Info severely throttles, Legacy doesn't recognize severity + +### Scenario 3: Both Algorithms Agree + +**Keepalive:** +``` +[INFO] Per-connection keepalive: ID=2, BW: 3000.00 kbits/s, Window=8192, In-flight=100, RTT=30ms, NAKs=2 +``` + +**Comparison:** +``` +[DEBUG] [ALGO_CMP] Algorithms agree: Err=0 W=100% (delta: E=+0 W=+0%) +``` + +**Interpretation:** +- Good bandwidth, low RTT, low NAK rate, good window utilization +- Both algorithms assign 0 error points, 100% weight +- No comparison log at INFO level (reduced spam) + +## Analyzing Comparison Data + +### Extract Comparison Logs + +```bash +# Get all algorithm comparison logs +grep "ALGO_CMP" logs/srtla_rec.log > comparison.log + +# Get only divergences (meaningful differences) +grep "ALGO_CMP.*Delta: E=[+-][5-9]" logs/srtla_rec.log +grep "ALGO_CMP.*Delta: E=[+-][0-9][0-9]" logs/srtla_rec.log + +# Extract weight deltas +grep -oP 'Delta:.*W=\K[+-][0-9]+' comparison.log +``` + +### Statistics Script + +```bash +#!/bin/bash +# Calculate average deltas + +echo "=== Algorithm Comparison Statistics ===" + +# Average error delta +grep "ALGO_CMP" logs/srtla_rec.log | \ + grep -oP 'E=\K[+-]?[0-9]+(?= W)' | \ + awk '{sum+=$1; count++} END {print "Avg Error Delta:", sum/count}' + +# Average weight delta +grep "ALGO_CMP" logs/srtla_rec.log | \ + grep -oP 'W=\K[+-]?[0-9]+(?=%)|W=\K[+-]?[0-9]+(?= T)' | \ + awk '{sum+=$1; count++} END {print "Avg Weight Delta:", sum/count "%"}' + +# Times Connection Info was more pessimistic +grep "ALGO_CMP" logs/srtla_rec.log | \ + grep -c "E=+[0-9]" +echo "^ Times Connection Info found more errors" + +# Times Legacy was more pessimistic +grep "ALGO_CMP" logs/srtla_rec.log | \ + grep -c "E=-[0-9]" +echo "^ Times Legacy found more errors" +``` + +## Production vs Comparison Mode + +### Comparison Mode (ENABLE_ALGO_COMPARISON=1) + +**Use when:** +- Developing/testing the connection info algorithm +- Analyzing algorithm behavior differences +- Validating improvements + +**Characteristics:** +- Both algorithms run on every evaluation cycle +- Comparison logs when algorithms disagree +- Slightly higher CPU usage (negligible) +- Extra fields in ConnectionStats struct + +### Production Mode (ENABLE_ALGO_COMPARISON=0) + +**Use when:** +- Deploying to production +- Algorithm is proven and stable +- No need for comparison data + +**Characteristics:** +- Only Connection Info algorithm runs +- No comparison logging +- Minimal overhead +- Legacy fields not used + +**To switch:** +```cpp +// In src/receiver_config.h +#define ENABLE_ALGO_COMPARISON 0 +``` + +Then rebuild: +```bash +cd build && make -j$(nproc) +``` + +## Expected Insights + +### Connection Info Should Detect: + +1. **High RTT**: RTT > 200ms → extra error points +2. **RTT Variance**: Jitter > 50ms → extra error points +3. **High NAK Rate**: Sender NAKs > 10% → extra error points +4. **Window Congestion**: In-flight/window > 95% → extra error points +5. **Bitrate Discrepancies**: Sender vs receiver > 20% → warning logs + +### When Algorithms Might Disagree: + +- **Connection Info more pessimistic**: Detects latency/congestion issues legacy misses +- **Legacy more pessimistic**: Extremely rare (both use same bandwidth/loss base) +- **Both agree**: Stable, healthy connections with no hidden issues + +## Code Locations + +| Component | File | Lines | +|-----------|------|-------| +| Comparison flag | `src/receiver_config.h` | 13-15 | +| Legacy algorithm stats | `src/receiver_config.h` | 104-106 | +| Keepalive comparison | `src/protocol/srtla_handler.cpp` | 364-437 | +| Legacy algorithm impl | `src/quality/quality_evaluator.cpp` | 325-373 | +| Quality evaluation | `src/quality/quality_evaluator.cpp` | 182-188 | +| Load balancer comparison | `src/quality/load_balancer.cpp` | 111-127 | + +## Notes + +- Comparison mode has **minimal performance impact** (both algorithms are lightweight) +- Logs are **non-spammy**: Only shown when algorithms diverge meaningfully (≥5% delta) +- Both algorithms use the **same data** from the same keepalive packets +- The **Connection Info algorithm is active** (makes actual ACK throttling decisions) +- The **Legacy algorithm runs in parallel** for comparison only (results logged but not used) +- Disable comparison mode in production once algorithm is validated + +## Disabling Comparison Mode + +When you're satisfied with the Connection Info algorithm and don't need comparisons: + +1. Edit `src/receiver_config.h`: + ```cpp + #define ENABLE_ALGO_COMPARISON 0 + ``` + +2. Rebuild: + ```bash + cd build && make clean && make -j$(nproc) + ``` + +3. The legacy algorithm won't run, comparison logs disappear, and you save the extra struct fields. + +## Backwards Compatibility: No Connection Info in Keepalive + +### What Happens? + +If a sender doesn't send connection info in keepalive packets (e.g., older srtla_send clients): + +✅ **Both algorithms continue to work** +✅ **Legacy algorithm**: Unchanged - only needs receiver-side bandwidth and packet loss +✅ **Connection Info algorithm**: Gracefully degrades to legacy behavior + +### Why Both Algorithms Give Same Results Without Connection Info + +The Connection Info algorithm **only adds penalties** for: +- RTT variance (requires sender RTT data) +- High sender NAK rate (requires sender NAK count) +- Window congestion (requires sender window/in-flight data) + +Without this telemetry, these penalties are all **zero**, making it functionally identical to the legacy algorithm. + +### Logs When Connection Info Missing + +**Keepalive:** +``` +[DEBUG] [IP:PORT] [Group: 0x...] Keepalive without connection info - both algorithms will use receiver-side metrics only +``` + +**Quality Evaluation (every 5 seconds):** +``` +[INFO] [Group: 0x...] Connection parameters adjusted: +[INFO] [IP:PORT] [COMPARISON] ConnInfo: Weight=85%, Throttle=0.85, ErrPts=10 | Legacy: Weight=85%, Throttle=0.85, ErrPts=10 | Delta: W=+0%, T=+0.00, E=+0 +``` + +Notice: **Delta is zero** because both algorithms see the same data and make identical decisions. + +### Mixed Environment + +If you have **multiple senders** with different capabilities: + +- Sender A (new): Sends connection info → Connection Info algorithm uses extra telemetry +- Sender B (old): No connection info → Both algorithms behave identically for this sender + +The comparison logs will show: +- Deltas for Sender A's connections (Connection Info finds more issues) +- Zero/minimal deltas for Sender B's connections (both algorithms agree) + +This is completely normal and expected! diff --git a/docs/keepalive-improvements.md b/docs/keepalive-improvements.md new file mode 100644 index 0000000..ed77a65 --- /dev/null +++ b/docs/keepalive-improvements.md @@ -0,0 +1,191 @@ +# Keepalive-Based Algorithm Improvements + +## Overview + +This document tracks the implementation of improvements to SRTLA's load balancing and quality evaluation algorithms by leveraging connection information from extended keepalive packets. + +## Current State + +### Extended Keepalive Protocol + +The codebase already supports extended keepalive packets that include rich connection telemetry from the sender: + +```c +typedef struct __attribute__((__packed__)) { + uint32_t conn_id; + int32_t window; // SRT window size + int32_t in_flight; // Packets currently in flight + uint32_t rtt_ms; // Round-trip time in milliseconds + uint32_t nak_count; // NAK (retransmission) count + uint32_t bitrate_bytes_per_sec; // Client-side bitrate measurement +} connection_info_t; +``` + +**Packet Length**: 38 bytes (extended keepalive) + +**Previous Status**: This data was only parsed and logged, not used for decision-making. + +**Current Status**: **FULLY IMPLEMENTED** - All telemetry data is now stored and used for quality assessment. + +**Location**: `src/protocol/srtla_handler.cpp` (handler), `src/quality/quality_evaluator.cpp` (evaluation) + +## Proposed Improvements + +### Phase 1: RTT-Based Quality Assessment (HIGH PRIORITY) + +**Rationale**: Latency is often a better early indicator of connection problems than bandwidth. High or increasing RTT signals congestion, routing issues, or link instability. + +**Implementation**: +- Store RTT values in `ConnectionStats` +- Track RTT history for trend analysis +- Add error points based on RTT thresholds +- Monitor RTT variance (jitter) + +**Error Point Thresholds**: +- RTT > 500ms: +20 error points +- RTT > 200ms: +10 error points +- RTT > 100ms: +5 error points +- High RTT variance: +10 error points + +**Status**: NOT STARTED + +### Phase 2: NAK Count Validation (HIGH PRIORITY) + +**Rationale**: The sender's NAK count provides ground truth about packet loss and retransmissions, which is more accurate than receiver-side estimation. + +**Implementation**: +- Store sender NAK count in `ConnectionStats` +- Compare with receiver-side packet loss tracking +- Use NAK rate (NAKs per packet) for quality scoring +- Replace or supplement current loss detection + +**Error Point Thresholds**: +- NAK rate > 20%: +40 error points +- NAK rate > 10%: +20 error points +- NAK rate > 5%: +10 error points +- NAK rate > 1%: +5 error points + +**Status**: NOT STARTED + +### Phase 3: Window Utilization Analysis (MEDIUM PRIORITY) + +**Rationale**: The ratio of `in_flight/window` reveals how aggressively the sender is using each connection and can indicate congestion or throttling. + +**Implementation**: +- Calculate window utilization ratio +- Detect persistently full windows (congestion) +- Detect low utilization (client-side issues) +- Use for advanced load balancing decisions + +**Analysis**: +- Utilization > 95%: Possible congestion, reduce priority +- Utilization < 30%: Client throttling, investigate +- Optimal range: 60-80% utilization + +**Status**: NOT STARTED + +### Phase 4: Sender Bitrate Validation (LOW PRIORITY) + +**Rationale**: Comparing sender and receiver bitrate measurements can detect path issues and validate metrics. + +**Implementation**: +- Store sender bitrate in `ConnectionStats` +- Compare sender vs receiver measurements +- Alert on significant discrepancies (>20% difference) +- Use for debugging and diagnostics + +**Status**: ✅ **COMPLETED** (2025-12-04) + +## Implementation Plan (COMPLETED) + +### Step 1: Data Structure Updates ✅ +- [x] Add keepalive metrics fields to `ConnectionStats` (receiver_config.h) + - `uint32_t rtt_ms` + - `uint32_t rtt_history[RTT_HISTORY_SIZE]` + - `uint8_t rtt_history_idx` + - `time_t last_keepalive` + - `int32_t window` + - `int32_t in_flight` + - `uint32_t sender_nak_count` + - `uint32_t last_sender_nak_count` + - `uint32_t sender_bitrate_bps` + +### Step 2: Keepalive Handler Updates ✅ +- [x] Modify `SRTLAHandler::handle_keepalive()` to store metrics +- [x] Update connection stats with keepalive data +- [x] Track timestamp of last keepalive received +- [x] Add helper functions for RTT history and variance + +### Step 3: Quality Evaluator Enhancements ✅ +- [x] Add RTT-based error point calculation +- [x] Add NAK rate error point calculation +- [x] Add window utilization analysis +- [x] Add bitrate comparison logic + +### Step 4: Testing and Validation ⏳ +- [ ] Test with simulated high-latency connections +- [ ] Test with varying packet loss scenarios +- [ ] Validate error point calculations +- [ ] Monitor impact on load balancing behavior + +### Step 5: Documentation ✅ +- [x] Update keepalive-improvements.md with implementation details +- [x] Document keepalive metrics in technical docs +- [x] Add configuration parameters +- [ ] Update README.md with new quality metrics + +## Expected Benefits + +1. **Earlier Problem Detection**: RTT increases often precede bandwidth degradation +2. **More Accurate Loss Tracking**: Sender NAK count is ground truth +3. **Better Load Distribution**: Window utilization reveals true connection capacity +4. **Improved Debugging**: Bitrate comparison helps diagnose path issues +5. **Reduced Latency**: Penalizing high-RTT connections improves stream responsiveness + +## Configuration Parameters + +New parameters to add: + +```cpp +// RTT thresholds (milliseconds) +inline constexpr uint32_t RTT_THRESHOLD_CRITICAL = 500; // 500ms +inline constexpr uint32_t RTT_THRESHOLD_HIGH = 200; // 200ms +inline constexpr uint32_t RTT_THRESHOLD_MODERATE = 100; // 100ms + +// Window utilization thresholds +inline constexpr double WINDOW_UTILIZATION_CONGESTED = 0.95; +inline constexpr double WINDOW_UTILIZATION_LOW = 0.30; + +// Bitrate comparison tolerance +inline constexpr double BITRATE_DISCREPANCY_THRESHOLD = 0.20; // 20% + +// RTT variance threshold for jitter detection +inline constexpr uint32_t RTT_VARIANCE_THRESHOLD = 50; // 50ms stddev +``` + +## Risks and Mitigations + +### Risk: Keepalive packets might not arrive regularly +- **Mitigation**: Only apply RTT-based penalties if keepalive received within last 2 seconds +- **Mitigation**: Fall back to receiver-side metrics if keepalives stale + +### Risk: Sender-side metrics might be inaccurate +- **Mitigation**: Use as supplementary data, not sole decision factor +- **Mitigation**: Validate against receiver measurements + +### Risk: Too aggressive RTT penalties might exclude viable connections +- **Mitigation**: Use gradual error point increases, not binary decisions +- **Mitigation**: Maintain grace period for new connections + +## Progress Tracking + +- **Phase 1 (RTT)**: ✅ 100% complete +- **Phase 2 (NAK)**: ✅ 100% complete +- **Phase 3 (Window)**: ✅ 100% complete +- **Phase 4 (Bitrate)**: ✅ 100% complete + +**Overall Progress**: ✅ 100% (Implementation Complete) + +**Implementation Date**: 2025-12-04 +**Build Status**: ✅ Successful +**Next Steps**: Testing and validation diff --git a/src/common.c b/src/common.c index a872ece..2da9844 100644 --- a/src/common.c +++ b/src/common.c @@ -1,7 +1,8 @@ /* - srtla_rec - SRT transport proxy with link aggregation, forked by IRLToolkit and IRLServer + srtla_rec - SRT transport proxy with link aggregation Copyright (C) 2020-2021 BELABOX project Copyright (C) 2024 IRLToolkit Inc. + Copyright (C) 2024 OpenIRL Copyright (C) 2025 IRLServer.com This program is free software: you can redistribute it and/or modify @@ -18,12 +19,12 @@ along with this program. If not, see . */ +#include #include +#include #include #include #include -#include -#include #include #include "common.h" @@ -32,7 +33,8 @@ char _global_addr_buf[ADDR_BUF_SZ]; const char *print_addr(struct sockaddr *addr) { struct sockaddr_in *ain = (struct sockaddr_in *)addr; - return inet_ntop(ain->sin_family, &ain->sin_addr, _global_addr_buf, ADDR_BUF_SZ); + return inet_ntop(ain->sin_family, &ain->sin_addr, _global_addr_buf, + ADDR_BUF_SZ); } int port_no(struct sockaddr *addr) { @@ -42,7 +44,8 @@ int port_no(struct sockaddr *addr) { int parse_ip(struct sockaddr_in *addr, char *ip_str) { in_addr_t ip = inet_addr(ip_str); - if (ip == -1) return -1; + if (ip == -1) + return -1; memset(addr, 0, sizeof(*addr)); addr->sin_family = AF_INET; @@ -53,14 +56,16 @@ int parse_ip(struct sockaddr_in *addr, char *ip_str) { int parse_port(char *port_str) { int port = strtol(port_str, NULL, 10); - if (port <= 0 || port > 65535) return -2; + if (port <= 0 || port > 65535) + return -2; return port; } int get_seconds(time_t *s) { struct timespec ts; int ret = clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); - if (ret != 0) return -1; + if (ret != 0) + return -1; *s = ts.tv_sec; return 0; } @@ -68,14 +73,16 @@ int get_seconds(time_t *s) { int get_ms(uint64_t *ms) { struct timespec ts; int ret = clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); - if (ret != 0) return -1; + if (ret != 0) + return -1; *ms = ((uint64_t)(ts.tv_sec)) * 1000 + ((uint64_t)(ts.tv_nsec)) / 1000 / 1000; return 0; } int32_t get_srt_sn(void *pkt, int n) { - if (n < 4) return -1; + if (n < 4) + return -1; uint32_t sn = be32toh(*((uint32_t *)pkt)); if ((sn & (1 << 31)) == 0) { @@ -86,7 +93,8 @@ int32_t get_srt_sn(void *pkt, int n) { } uint16_t get_srt_type(void *pkt, int n) { - if (n < 2) return 0; + if (n < 2) + return 0; return be16toh(*((uint16_t *)pkt)); } @@ -99,16 +107,58 @@ int is_srtla_keepalive(void *pkt, int n) { } int is_srtla_reg1(void *pkt, int len) { - if (len != SRTLA_TYPE_REG1_LEN) return 0; + if (len != SRTLA_TYPE_REG1_LEN) + return 0; return get_srt_type(pkt, len) == SRTLA_TYPE_REG1; } int is_srtla_reg2(void *pkt, int len) { - if (len != SRTLA_TYPE_REG2_LEN) return 0; + if (len != SRTLA_TYPE_REG2_LEN) + return 0; return get_srt_type(pkt, len) == SRTLA_TYPE_REG2; } int is_srtla_reg3(void *pkt, int len) { - if (len != SRTLA_TYPE_REG3_LEN) return 0; + if (len != SRTLA_TYPE_REG3_LEN) + return 0; return get_srt_type(pkt, len) == SRTLA_TYPE_REG3; } + +int parse_keepalive_conn_info(const uint8_t *buf, int len, + connection_info_t *info) { + if (len < SRTLA_KEEPALIVE_EXT_LEN) + return 0; + + uint16_t packet_type = (buf[0] << 8) | buf[1]; + if (packet_type != SRTLA_TYPE_KEEPALIVE) + return 0; + + // Check magic number at bytes 10-11 + uint16_t magic = (buf[10] << 8) | buf[11]; + if (magic != SRTLA_KEEPALIVE_MAGIC) + return 0; + + // Check version at bytes 12-13 + uint16_t version = (buf[12] << 8) | buf[13]; + if (version != SRTLA_KEEPALIVE_EXT_VERSION) + return 0; + + // Parse connection info (all big-endian) + info->conn_id = ((uint32_t)buf[14] << 24) | ((uint32_t)buf[15] << 16) | + ((uint32_t)buf[16] << 8) | buf[17]; + info->window = + (int32_t)(((uint32_t)buf[18] << 24) | ((uint32_t)buf[19] << 16) | + ((uint32_t)buf[20] << 8) | buf[21]); + info->in_flight = + (int32_t)(((uint32_t)buf[22] << 24) | ((uint32_t)buf[23] << 16) | + ((uint32_t)buf[24] << 8) | buf[25]); + info->rtt_ms = ((uint32_t)buf[26] << 24) | ((uint32_t)buf[27] << 16) | + ((uint32_t)buf[28] << 8) | buf[29]; + info->nak_count = ((uint32_t)buf[30] << 24) | ((uint32_t)buf[31] << 16) | + ((uint32_t)buf[32] << 8) | buf[33]; + info->bitrate_bytes_per_sec = ((uint32_t)buf[34] << 24) | + ((uint32_t)buf[35] << 16) | + ((uint32_t)buf[36] << 8) | buf[37]; + + return 1; +} diff --git a/src/common.h b/src/common.h index ee473a7..caa9b8e 100644 --- a/src/common.h +++ b/src/common.h @@ -1,95 +1,112 @@ -/* - srtla_rec - SRT transport proxy with link aggregation, forked by IRLToolkit - Copyright (C) 2020-2021 BELABOX project - Copyright (C) 2024 IRLToolkit Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -#define MTU 1500 - -#define SRT_TYPE_HANDSHAKE 0x8000 -#define SRT_TYPE_ACK 0x8002 -#define SRT_TYPE_NAK 0x8003 -#define SRT_TYPE_SHUTDOWN 0x8005 - -#define SRTLA_TYPE_KEEPALIVE 0x9000 -#define SRTLA_TYPE_ACK 0x9100 -#define SRTLA_TYPE_REG1 0x9200 -#define SRTLA_TYPE_REG2 0x9201 -#define SRTLA_TYPE_REG3 0x9202 -#define SRTLA_TYPE_REG_ERR 0x9210 -#define SRTLA_TYPE_REG_NGP 0x9211 -#define SRTLA_TYPE_REG_NAK 0x9212 - -#define SRTLA_EXT_IRLTK_CIP_REQ 0xA000 -#define SRTLA_EXT_IRLTK_CIP_RES 0xA001 - -#define SRT_MIN_LEN 16 - -#define SRTLA_ID_LEN 256 -#define SRTLA_TYPE_REG1_LEN (2 + (SRTLA_ID_LEN)) -#define SRTLA_TYPE_REG2_LEN (2 + (SRTLA_ID_LEN)) -#define SRTLA_TYPE_REG3_LEN 2 - -#define SRTLA_EXT_IRLTK_CIP_REQ_LEN 2 -#define SRTLA_EXT_IRLTK_CIP_RES_LEN (2 + sizeof(srtla_pkt_irltk_cip_res)) - -#define SEND_BUF_SIZE (32 * 1024 * 1024) -#define RECV_BUF_SIZE (32 * 1024 * 1024) - +#pragma once + +/* + srtla_rec - SRT transport proxy with link aggregation + + Copyright (C) 2020-2021 BELABOX project + Copyright (C) 2024 IRLToolkit Inc. + Copyright (C) 2024 OpenIRL + Copyright (C) 2025 IRLServer.com + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +#include +#include + +#define MTU 1500 + +#define SRT_TYPE_HANDSHAKE 0x8000 +#define SRT_TYPE_ACK 0x8002 +#define SRT_TYPE_NAK 0x8003 +#define SRT_TYPE_SHUTDOWN 0x8005 + +#define SRTLA_TYPE_KEEPALIVE 0x9000 +#define SRTLA_TYPE_ACK 0x9100 +#define SRTLA_TYPE_REG1 0x9200 +#define SRTLA_TYPE_REG2 0x9201 +#define SRTLA_TYPE_REG3 0x9202 +#define SRTLA_TYPE_REG_ERR 0x9210 +#define SRTLA_TYPE_REG_NGP 0x9211 +#define SRTLA_TYPE_REG_NAK 0x9212 + +// Extended KEEPALIVE with Connection Info +#define SRTLA_KEEPALIVE_MAGIC 0xC01F +#define SRTLA_KEEPALIVE_STD_LEN 10 +#define SRTLA_KEEPALIVE_EXT_LEN 38 +#define SRTLA_KEEPALIVE_EXT_VERSION 0x0001 + +#define SRT_MIN_LEN 16 + +#define SRTLA_ID_LEN 256 +#define SRTLA_TYPE_REG1_LEN (2 + (SRTLA_ID_LEN)) +#define SRTLA_TYPE_REG2_LEN (2 + (SRTLA_ID_LEN)) +#define SRTLA_TYPE_REG3_LEN 2 + +#define SEND_BUF_SIZE (100 * 1024 * 1024) +#define RECV_BUF_SIZE (100 * 1024 * 1024) + +typedef struct __attribute__((__packed__)) { + uint16_t type; + uint16_t subtype; + uint32_t info; + uint32_t timestamp; + uint32_t dest_id; +} srt_header_t; + +typedef struct __attribute__((__packed__)) { + srt_header_t header; + uint32_t version; + uint16_t enc_field; + uint16_t ext_field; + uint32_t initial_seq; + uint32_t mtu; + uint32_t mfw; + uint32_t handshake_type; + uint32_t source_id; + uint32_t syn_cookie; + char peer_ip[16]; +} srt_handshake_t; + +// Extended KEEPALIVE Connection Info structure typedef struct __attribute__((__packed__)) { - uint16_t type; - uint16_t subtype; - uint32_t info; - uint32_t timestamp; - uint32_t dest_id; -} srt_header_t; - -typedef struct __attribute__((__packed__)) { - srt_header_t header; - uint32_t version; - uint16_t enc_field; - uint16_t ext_field; - uint32_t initial_seq; - uint32_t mtu; - uint32_t mfw; - uint32_t handshake_type; - uint32_t source_id; - uint32_t syn_cookie; - char peer_ip[16]; -} srt_handshake_t; - -typedef struct __attribute__((__packed__)) { - uint8_t address_family; - uint8_t address[16]; -} srtla_ext_irltk_cip_res; - -int get_seconds(time_t *s); -int get_ms(uint64_t *ms); - -const char *print_addr(struct sockaddr *addr); -int port_no(struct sockaddr *addr); -int parse_ip(struct sockaddr_in *addr, char *ip_str); -int parse_port(char *port_str); - -int32_t get_srt_sn(void *pkt, int n); -uint16_t get_srt_type(void *pkt, int n); -int is_srt_ack(void *pkt, int n); -int is_srt_shutdown(void *pkt, int n); - -int is_srtla_keepalive(void *pkt, int len); -int is_srtla_reg1(void *pkt, int len); -int is_srtla_reg2(void *pkt, int len); -int is_srtla_reg3(void *pkt, int len); + uint32_t conn_id; + int32_t window; + int32_t in_flight; + uint32_t rtt_ms; + uint32_t nak_count; + uint32_t bitrate_bytes_per_sec; +} connection_info_t; + +int get_seconds(time_t *s); +int get_ms(uint64_t *ms); + +const char *print_addr(struct sockaddr *addr); +int port_no(struct sockaddr *addr); +int parse_ip(struct sockaddr_in *addr, char *ip_str); +int parse_port(char *port_str); + +int32_t get_srt_sn(void *pkt, int n); +uint16_t get_srt_type(void *pkt, int n); +int is_srt_ack(void *pkt, int n); +int is_srt_shutdown(void *pkt, int n); + +int is_srtla_keepalive(void *pkt, int len); +int is_srtla_reg1(void *pkt, int len); +int is_srtla_reg2(void *pkt, int len); +int is_srtla_reg3(void *pkt, int len); + +// Extended KEEPALIVE parsing function +int parse_keepalive_conn_info(const uint8_t *buf, int len, + connection_info_t *info); diff --git a/src/connection/connection.cpp b/src/connection/connection.cpp new file mode 100644 index 0000000..331adc0 --- /dev/null +++ b/src/connection/connection.cpp @@ -0,0 +1,25 @@ +#include "connection.h" + +#include + +namespace srtla::connection { + +Connection::Connection(const struct sockaddr_storage &addr, time_t timestamp) + : addr_(addr), last_rcvd_(timestamp), connection_start_(timestamp) { + recv_log_.fill(0); + + stats_.bytes_received = 0; + stats_.packets_received = 0; + stats_.packets_lost = 0; + stats_.last_eval_time = 0; + stats_.last_bytes_received = 0; + stats_.last_packets_received = 0; + stats_.last_packets_lost = 0; + stats_.error_points = 0; + stats_.weight_percent = WEIGHT_FULL; + stats_.last_ack_sent_time = 0; + stats_.ack_throttle_factor = 1.0; + stats_.nack_count = 0; +} + +} // namespace srtla::connection diff --git a/src/connection/connection.h b/src/connection/connection.h new file mode 100644 index 0000000..e43394a --- /dev/null +++ b/src/connection/connection.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include +#include +#include + +#include "../receiver_config.h" + +extern "C" { +#include "../common.h" +} + +namespace srtla::connection { + +class Connection { +public: + Connection(const struct sockaddr_storage &addr, time_t timestamp); + + const struct sockaddr_storage &address() const { return addr_; } + + time_t last_received() const { return last_rcvd_; } + void update_last_received(time_t ts) { last_rcvd_ = ts; } + + int recv_index() const { return recv_idx_; } + void set_recv_index(int idx) { recv_idx_ = idx; } + + const std::array &recv_log() const { return recv_log_; } + std::array &recv_log() { return recv_log_; } + + ConnectionStats &stats() { return stats_; } + const ConnectionStats &stats() const { return stats_; } + + time_t recovery_start() const { return recovery_start_; } + void set_recovery_start(time_t ts) { recovery_start_ = ts; } + + time_t connection_start() const { return connection_start_; } + + bool extensions_negotiated() const { return extensions_negotiated_; } + void set_extensions_negotiated(bool negotiated) { extensions_negotiated_ = negotiated; } + + uint32_t sender_capabilities() const { return sender_capabilities_; } + void set_sender_capabilities(uint32_t caps) { sender_capabilities_ = caps; } + +private: + struct sockaddr_storage addr_ {}; + time_t last_rcvd_ = 0; + int recv_idx_ = 0; + std::array recv_log_ {}; + + ConnectionStats stats_ {}; + time_t recovery_start_ = 0; + time_t connection_start_ = 0; + + bool extensions_negotiated_ = false; + uint32_t sender_capabilities_ = 0; +}; + +using ConnectionPtr = std::shared_ptr; + +} // namespace srtla::connection diff --git a/src/connection/connection_group.cpp b/src/connection/connection_group.cpp new file mode 100644 index 0000000..820614d --- /dev/null +++ b/src/connection/connection_group.cpp @@ -0,0 +1,97 @@ +#include "connection_group.h" + +#include +#include +#include +#include +#include +#include + +#include + +#include "../utils/network_utils.h" + +namespace srtla::connection { + +using srtla::utils::NetworkUtils; + +ConnectionGroup::ConnectionGroup(const char *client_id, time_t timestamp) + : created_at_(timestamp) { + id_.fill(0); + std::memcpy(id_.data(), client_id, SRTLA_ID_LEN / 2); + + char random_bytes[SRTLA_ID_LEN / 2]; + NetworkUtils::get_random_bytes(random_bytes, sizeof(random_bytes)); + std::copy(random_bytes, + random_bytes + (SRTLA_ID_LEN / 2), + id_.begin() + (SRTLA_ID_LEN / 2)); +} + +ConnectionGroup::~ConnectionGroup() { + conns_.clear(); + + if (srt_sock_ > 0) { + remove_socket_info_file(); + if (epoll_fd_ >= 0) { + NetworkUtils::epoll_remove(epoll_fd_, srt_sock_); + } + close(srt_sock_); + } +} + +void ConnectionGroup::add_connection(const ConnectionPtr &conn) { + conns_.push_back(conn); +} + +void ConnectionGroup::remove_connection(const ConnectionPtr &conn) { + conns_.erase(std::remove(conns_.begin(), conns_.end(), conn), conns_.end()); +} + +void ConnectionGroup::set_srt_socket(int sock) { + srt_sock_ = sock; +} + +std::vector ConnectionGroup::get_client_addresses() const { + std::vector addresses; + addresses.reserve(conns_.size()); + for (const auto &conn : conns_) { + addresses.push_back(conn->address()); + } + return addresses; +} + +void ConnectionGroup::write_socket_info_file() const { + if (srt_sock_ == -1) { + return; + } + + uint16_t local_port = NetworkUtils::get_local_port(srt_sock_); + std::string file_name = std::string(SRT_SOCKET_INFO_PREFIX) + std::to_string(local_port); + + auto client_addresses = get_client_addresses(); + std::ofstream out(file_name); + if (!out.is_open()) { + spdlog::error("[Group: {}] Failed to open socket info file: {}", static_cast(this), file_name); + return; + } + for (const auto &addr : client_addresses) { + auto *mutable_addr = const_cast(reinterpret_cast(&addr)); + out << print_addr(mutable_addr) << std::endl; + } + + spdlog::info("[Group: {}] Wrote SRTLA socket info file", static_cast(this)); +} + +void ConnectionGroup::remove_socket_info_file() const { + if (srt_sock_ == -1) { + return; + } + + uint16_t local_port = NetworkUtils::get_local_port(srt_sock_); + std::string file_name = std::string(SRT_SOCKET_INFO_PREFIX) + std::to_string(local_port); + std::remove(file_name.c_str()); + + spdlog::info("[Group: {}] Removed SRTLA socket info file", static_cast(this)); +} + +} // namespace srtla::connection diff --git a/src/connection/connection_group.h b/src/connection/connection_group.h new file mode 100644 index 0000000..924a3de --- /dev/null +++ b/src/connection/connection_group.h @@ -0,0 +1,81 @@ +#pragma once + +#include +#include +#include +#include + +#include "connection.h" +#include "../receiver_config.h" +#include "../utils/nak_dedup.h" + +extern "C" { +#include "../common.h" +} + +namespace srtla::connection { + +using srtla::utils::NakHashEntry; + +class ConnectionGroup { +public: + ConnectionGroup(const char *client_id, time_t timestamp); + ~ConnectionGroup(); + + const std::array &id() const { return id_; } + + void add_connection(const ConnectionPtr &conn); + void remove_connection(const ConnectionPtr &conn); + + std::vector &connections() { return conns_; } + const std::vector &connections() const { return conns_; } + + time_t created_at() const { return created_at_; } + + int srt_socket() const { return srt_sock_; } + void set_srt_socket(int sock); + + const struct sockaddr_storage &last_address() const { return last_addr_; } + void set_last_address(const struct sockaddr_storage &addr) { last_addr_ = addr; } + + uint64_t total_target_bandwidth() const { return total_target_bandwidth_; } + void set_total_target_bandwidth(uint64_t bw) { total_target_bandwidth_ = bw; } + + time_t last_quality_eval() const { return last_quality_eval_; } + void set_last_quality_eval(time_t ts) { last_quality_eval_ = ts; } + + time_t last_load_balance_eval() const { return last_load_balance_eval_; } + void set_last_load_balance_eval(time_t ts) { last_load_balance_eval_ = ts; } + + bool load_balancing_enabled() const { return load_balancing_enabled_; } + void set_load_balancing_enabled(bool enabled) { load_balancing_enabled_ = enabled; } + + + std::unordered_map &nak_cache() { return nak_seen_hash_; } + + std::vector get_client_addresses() const; + void write_socket_info_file() const; + void remove_socket_info_file() const; + + void set_epoll_fd(int fd) { epoll_fd_ = fd; } + +private: + std::array id_ {}; + std::vector conns_; + time_t created_at_ = 0; + int srt_sock_ = -1; + struct sockaddr_storage last_addr_ {}; + + uint64_t total_target_bandwidth_ = 0; + time_t last_quality_eval_ = 0; + time_t last_load_balance_eval_ = 0; + bool load_balancing_enabled_ = true; + + + std::unordered_map nak_seen_hash_; + int epoll_fd_ = -1; +}; + +using ConnectionGroupPtr = std::shared_ptr; + +} // namespace srtla::connection diff --git a/src/connection/connection_registry.cpp b/src/connection/connection_registry.cpp new file mode 100644 index 0000000..01d964c --- /dev/null +++ b/src/connection/connection_registry.cpp @@ -0,0 +1,166 @@ +#include "connection_registry.h" + +#include + +#include + +#include "../receiver_config.h" +#include "../utils/network_utils.h" + + +extern "C" { +#include "../common.h" +} +namespace srtla::connection { + +using srtla::utils::NetworkUtils; + +namespace { + +bool addresses_equal(const struct sockaddr_storage &a, const struct sockaddr_storage &b) { + if (a.ss_family != b.ss_family) { + return false; + } + + if (a.ss_family == AF_INET6) { + auto *addr_a = reinterpret_cast(&a); + auto *addr_b = reinterpret_cast(&b); + return NetworkUtils::constant_time_compare(&addr_a->sin6_addr, &addr_b->sin6_addr, sizeof(struct in6_addr)) == 0 && + addr_a->sin6_port == addr_b->sin6_port; + } + + auto *addr_a = reinterpret_cast(&a); + auto *addr_b = reinterpret_cast(&b); + return NetworkUtils::constant_time_compare(&addr_a->sin_addr, &addr_b->sin_addr, sizeof(struct in_addr)) == 0 && + addr_a->sin_port == addr_b->sin_port; +} + +bool conn_timed_out(const ConnectionPtr &conn, time_t ts) { + return (conn->last_received() + CONN_TIMEOUT) < ts; +} + +} // namespace + +ConnectionRegistry &ConnectionRegistry::instance() { + static ConnectionRegistry registry; + return registry; +} + +void ConnectionRegistry::add_group(const ConnectionGroupPtr &group) { + groups_.push_back(group); +} + +void ConnectionRegistry::remove_group(const ConnectionGroupPtr &group) { + groups_.erase(std::remove(groups_.begin(), groups_.end(), group), groups_.end()); +} + +ConnectionGroupPtr ConnectionRegistry::find_group_by_id(const char *id) { + for (auto &group : groups_) { + if (NetworkUtils::constant_time_compare(group->id().data(), id, SRTLA_ID_LEN) == 0) { + return group; + } + } + return nullptr; +} + +void ConnectionRegistry::find_by_address(const struct sockaddr_storage *addr, + ConnectionGroupPtr &out_group, + ConnectionPtr &out_conn) { + for (auto &group : groups_) { + for (auto &conn : group->connections()) { + if (addresses_equal(conn->address(), *addr)) { + out_group = group; + out_conn = conn; + return; + } + } + + if (addresses_equal(group->last_address(), *addr)) { + out_group = group; + out_conn.reset(); + return; + } + } + + out_group.reset(); + out_conn.reset(); +} + +void ConnectionRegistry::cleanup_inactive(time_t current_time, + const std::function &keepalive_cb) { + static time_t last_run = 0; + if ((last_run + CLEANUP_PERIOD) > current_time) { + return; + } + last_run = current_time; + + if (groups_.empty()) { + return; + } + + spdlog::debug("Starting a cleanup run..."); + + std::size_t total_groups = groups_.size(); + std::size_t total_connections = 0; + std::size_t removed_groups = 0; + std::size_t removed_connections = 0; + + for (auto group_it = groups_.begin(); group_it != groups_.end();) { + auto group = *group_it; + std::size_t before_conns = group->connections().size(); + total_connections += before_conns; + + auto &connections = group->connections(); + for (auto conn_it = connections.begin(); conn_it != connections.end();) { + auto conn = *conn_it; + + if (conn->recovery_start() > 0) { + if (conn->last_received() > conn->recovery_start()) { + if ((current_time - conn->recovery_start()) > RECOVERY_CHANCE_PERIOD) { + spdlog::info("[{}:{}] [Group: {}] Connection recovery completed", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get())); + conn->set_recovery_start(0); + } + } else if ((conn->recovery_start() + RECOVERY_CHANCE_PERIOD) < current_time) { + spdlog::info("[{}:{}] [Group: {}] Connection recovery failed", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get())); + conn->set_recovery_start(0); + } + } + + if (conn_timed_out(conn, current_time)) { + conn_it = connections.erase(conn_it); + removed_connections++; + spdlog::info("[{}:{}] [Group: {}] Connection removed (timed out)", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get())); + } else { + if (keepalive_cb && (conn->last_received() + KEEPALIVE_PERIOD) < current_time) { + keepalive_cb(conn, current_time); + } + ++conn_it; + } + } + + if (connections.empty() && (group->created_at() + GROUP_TIMEOUT) < current_time) { + group_it = groups_.erase(group_it); + removed_groups++; + spdlog::info("[Group: {}] Group removed (no connections)", static_cast(group.get())); + } else { + if (before_conns != connections.size()) { + group->write_socket_info_file(); + } + ++group_it; + } + } + + spdlog::debug("Clean up run ended. Counted {} groups and {} connections. Removed {} groups and {} connections", + total_groups, total_connections, removed_groups, removed_connections); +} + +} // namespace srtla::connection diff --git a/src/connection/connection_registry.h b/src/connection/connection_registry.h new file mode 100644 index 0000000..63b1c82 --- /dev/null +++ b/src/connection/connection_registry.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include + +#include "connection_group.h" + +namespace srtla::connection { + +class ConnectionRegistry { +public: + ConnectionRegistry() = default; + + static ConnectionRegistry &instance(); + + void add_group(const ConnectionGroupPtr &group); + void remove_group(const ConnectionGroupPtr &group); + + ConnectionGroupPtr find_group_by_id(const char *id); + void find_by_address(const struct sockaddr_storage *addr, + ConnectionGroupPtr &out_group, + ConnectionPtr &out_conn); + + std::vector &groups() { return groups_; } + const std::vector &groups() const { return groups_; } + + void cleanup_inactive(time_t current_time, + const std::function &keepalive_cb); + +private: + std::vector groups_; +}; + +} // namespace srtla::connection diff --git a/src/protocol/srt_handler.cpp b/src/protocol/srt_handler.cpp new file mode 100644 index 0000000..804e3d9 --- /dev/null +++ b/src/protocol/srt_handler.cpp @@ -0,0 +1,145 @@ +#include "srt_handler.h" + +#include +#include +#include +#include + +#include + +extern "C" { +#include "../common.h" +} + +namespace srtla::protocol { + +SRTHandler::SRTHandler(int srtla_socket, + const struct sockaddr_storage &srt_addr, + int epoll_fd, + connection::ConnectionRegistry ®istry) + : srtla_socket_(srtla_socket), srt_addr_(srt_addr), epoll_fd_(epoll_fd), registry_(registry) {} + +void SRTHandler::handle_srt_data(connection::ConnectionGroupPtr group) { + if (!group) { + return; + } + + char buf[MTU]; + int n = recv(group->srt_socket(), buf, MTU, 0); + if (n < SRT_MIN_LEN) { + spdlog::error("[Group: {}] Failed to read the SRT sock, terminating the group", + static_cast(group.get())); + remove_group(group); + return; + } + + if (is_srt_ack(buf, n)) { + for (auto &conn : group->connections()) { + int ret = sendto(srtla_socket_, &buf, n, 0, + reinterpret_cast(&conn->address()), sizeof(struct sockaddr_storage)); + if (ret != n) { + spdlog::error("[{}:{}] [Group: {}] Failed to send the SRT ack", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get())); + } + } + } else { + int ret = sendto(srtla_socket_, &buf, n, 0, + reinterpret_cast(&group->last_address()), sizeof(struct sockaddr_storage)); + if (ret != n) { + spdlog::error("[{}:{}] [Group: {}] Failed to send the SRT packet", + print_addr(const_cast(reinterpret_cast(&group->last_address()))), + port_no(const_cast(reinterpret_cast(&group->last_address()))), + static_cast(group.get())); + } + } +} + +bool SRTHandler::forward_to_srt_server(connection::ConnectionGroupPtr group, const char *buffer, int length) { + if (!ensure_group_socket(group)) { + return false; + } + + int ret = send(group->srt_socket(), buffer, length, 0); + if (ret != length) { + spdlog::error("[Group: {}] Failed to forward SRTLA packet, terminating the group", + static_cast(group.get())); + remove_group(group); + return false; + } + return true; +} + +bool SRTHandler::ensure_group_socket(connection::ConnectionGroupPtr group) { + if (group->srt_socket() >= 0) { + return true; + } + + int sock = socket(srt_addr_.ss_family, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (sock < 0) { + spdlog::error("[Group: {}] Failed to create an SRT socket", static_cast(group.get())); + remove_group(group); + return false; + } + + int bufsize = RECV_BUF_SIZE; + if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)) != 0) { + spdlog::error("failed to set receive buffer size ({})", bufsize); + close(sock); + remove_group(group); + return false; + } + + int sndbufsize = SEND_BUF_SIZE; + if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &sndbufsize, sizeof(sndbufsize)) != 0) { + spdlog::error("failed to set send buffer size ({})", sndbufsize); + close(sock); + remove_group(group); + return false; + } + + int flags = fcntl(sock, F_GETFL, 0); + if (flags == -1 || fcntl(sock, F_SETFL, flags | O_NONBLOCK) == -1) { + spdlog::error("failed to set g->srt_sock non-blocking"); + close(sock); + remove_group(group); + return false; + } + + int ret = -1; + if (srt_addr_.ss_family == AF_INET) { + ret = connect(sock, reinterpret_cast(&srt_addr_), sizeof(struct sockaddr_in)); + } else if (srt_addr_.ss_family == AF_INET6) { + ret = connect(sock, reinterpret_cast(&srt_addr_), sizeof(struct sockaddr_in6)); + } + + if (ret != 0) { + + spdlog::error("[Group: {}] Failed to connect to SRT server: {}", static_cast(group.get()), strerror(errno)); + close(sock); + remove_group(group); + return false; + } + + uint16_t local_port = utils::NetworkUtils::get_local_port(sock); + spdlog::info("[Group: {}] Created SRT socket. Local Port: {}", static_cast(group.get()), local_port); + + if (utils::NetworkUtils::epoll_add(epoll_fd_, sock, EPOLLIN, group.get()) != 0) { + spdlog::error("[Group: {}] Failed to add the SRT socket to the epoll", static_cast(group.get())); + close(sock); + remove_group(group); + return false; + } + + group->set_srt_socket(sock); + group->set_epoll_fd(epoll_fd_); + group->write_socket_info_file(); + return true; +} + +void SRTHandler::remove_group(connection::ConnectionGroupPtr group) { + registry_.remove_group(group); +} + +} // namespace srtla::protocol diff --git a/src/protocol/srt_handler.h b/src/protocol/srt_handler.h new file mode 100644 index 0000000..584460c --- /dev/null +++ b/src/protocol/srt_handler.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +#include "../connection/connection_registry.h" +#include "../utils/network_utils.h" + +namespace srtla::protocol { + +class SRTHandler { +public: + SRTHandler(int srtla_socket, + const struct sockaddr_storage &srt_addr, + int epoll_fd, + connection::ConnectionRegistry ®istry); + + void handle_srt_data(connection::ConnectionGroupPtr group); + bool forward_to_srt_server(connection::ConnectionGroupPtr group, const char *buffer, int length); + +private: + bool ensure_group_socket(connection::ConnectionGroupPtr group); + void remove_group(connection::ConnectionGroupPtr group); + + int srtla_socket_; + struct sockaddr_storage srt_addr_ {}; + int epoll_fd_; + connection::ConnectionRegistry ®istry_; +}; + +} // namespace srtla::protocol diff --git a/src/protocol/srtla_handler.cpp b/src/protocol/srtla_handler.cpp new file mode 100644 index 0000000..33f07d8 --- /dev/null +++ b/src/protocol/srtla_handler.cpp @@ -0,0 +1,468 @@ +#include "srtla_handler.h" + +#include +#include +#include +#include +#include + +#include + +extern "C" { +#include "../common.h" +} + +#include "../quality/quality_evaluator.h" + +namespace srtla::protocol { + +using srtla::connection::ConnectionGroupPtr; +using srtla::connection::ConnectionPtr; +using srtla::utils::NakDeduplicator; + +namespace { +constexpr socklen_t kAddrLen = sizeof(struct sockaddr_storage); + +ConnectionGroupPtr wait_group_by_id(connection::ConnectionRegistry ®istry, + const uint8_t *id, + int max_ms = 200) { + using clock = std::chrono::steady_clock; + const auto deadline = clock::now() + std::chrono::milliseconds(max_ms); + + while (clock::now() < deadline) { + if (auto group = registry.find_group_by_id(reinterpret_cast(id))) { + return group; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + return nullptr; +} + +bool is_srt_nak_packet(const char *pkt, int length) { + if (length < static_cast(sizeof(srt_header_t))) { + return false; + } + uint16_t type = get_srt_type(const_cast(pkt), length); + return type == SRT_TYPE_NAK; +} + +inline bool is_duplicate_nak(ConnectionGroupPtr group, const char *buffer, int length) { + uint64_t hash = NakDeduplicator::hash_nak_payload(reinterpret_cast(buffer), length, 128); + uint64_t now_ms = 0; + get_ms(&now_ms); + return !NakDeduplicator::should_accept_nak(group->nak_cache(), hash, now_ms); +} + +} // namespace + +SRTLAHandler::SRTLAHandler(int srtla_socket, + connection::ConnectionRegistry ®istry, + SRTHandler &srt_handler, + quality::MetricsCollector &metrics_collector) + : srtla_socket_(srtla_socket), + registry_(registry), + srt_handler_(srt_handler), + metrics_(metrics_collector) {} + +void SRTLAHandler::process_packet(time_t ts) { + char buf[MTU] = {}; + struct sockaddr_storage srtla_addr {}; + socklen_t len = kAddrLen; + + int n = recvfrom(srtla_socket_, buf, MTU, 0, reinterpret_cast(&srtla_addr), &len); + if (n < 0) { + spdlog::error("Failed to read an srtla packet {}", strerror(errno)); + return; + } + + if (is_srtla_reg1(buf, n)) { + register_group(&srtla_addr, buf, ts); + return; + } + + if (is_srtla_reg2(buf, n)) { + register_connection(&srtla_addr, buf, ts); + return; + } + + ConnectionGroupPtr group; + ConnectionPtr conn; + registry_.find_by_address(&srtla_addr, group, conn); + if (!group || !conn) { + return; + } + + bool was_timed_out = (conn->last_received() + CONN_TIMEOUT) < ts; + conn->update_last_received(ts); + + if (conn->recovery_start() == 0 && was_timed_out) { + conn->set_recovery_start(ts); + spdlog::info("[{}:{}] [Group: {}] Connection is recovering", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get())); + } + + if (is_srtla_keepalive(buf, n)) { + handle_keepalive(group, conn, &srtla_addr, buf, n); + return; + } + + if (n < SRT_MIN_LEN) { + return; + } + + group->set_last_address(srtla_addr); + metrics_.on_packet_received(conn, static_cast(n)); + + if (is_srt_nak_packet(buf, n)) { + if (is_duplicate_nak(group, buf, n)) { + spdlog::info("[{}:{}] [Group: {}] Duplicate NAK packet suppressed", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get())); + return; + } + + metrics_.on_nak_detected(conn, 1); + spdlog::info("[{}:{}] [Group: {}] Received NAK packet. Total loss: {}", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get()), + conn->stats().packets_lost); + + if (conn->stats().nack_count > 5 && (group->last_quality_eval() + 1) < ts) { + // Trigger immediate quality evaluation for high NAK rates + // (timing protection in evaluator prevents excessive evaluations) + quality::QualityEvaluator evaluator; + evaluator.evaluate_group(group, ts); + } + } + + int32_t sn = get_srt_sn(buf, n); + if (sn >= 0) { + register_packet(group, conn, sn); + } + + if (!srt_handler_.forward_to_srt_server(group, buf, n)) { + return; + } +} + +void SRTLAHandler::send_keepalive(const ConnectionPtr &conn, time_t ts) { + uint16_t pkt = htobe16(SRTLA_TYPE_KEEPALIVE); + int ret = sendto(srtla_socket_, &pkt, sizeof(pkt), 0, + reinterpret_cast(&conn->address()), kAddrLen); + if (ret != sizeof(pkt)) { + spdlog::error("[{}:{}] Failed to send keepalive packet", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address())))); + } else { + spdlog::debug("[{}:{}] Sent keepalive packet", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address())))); + } +} + +int SRTLAHandler::register_group(const struct sockaddr_storage *addr, const char *buffer, time_t ts) { + if (registry_.groups().size() >= MAX_GROUPS) { + uint16_t header = htobe16(SRTLA_TYPE_REG_ERR); + sendto(srtla_socket_, &header, sizeof(header), 0, + reinterpret_cast(addr), kAddrLen); + spdlog::error("[{}:{}] Group registration failed: Max groups reached", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr)))); + return -1; + } + + ConnectionGroupPtr existing_group; + ConnectionPtr existing_conn; + registry_.find_by_address(addr, existing_group, existing_conn); + if (existing_group) { + uint16_t header = htobe16(SRTLA_TYPE_REG_ERR); + sendto(srtla_socket_, &header, sizeof(header), 0, + reinterpret_cast(addr), kAddrLen); + spdlog::error("[{}:{}] Group registration failed: Remote address already registered", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr)))); + return -1; + } + + char *client_id = const_cast(buffer + 2); + auto group = std::make_shared(client_id, ts); + group->set_last_address(*addr); + + char out_buf[SRTLA_TYPE_REG2_LEN]; + uint16_t header = htobe16(SRTLA_TYPE_REG2); + std::memcpy(out_buf, &header, sizeof(header)); + std::memcpy(out_buf + sizeof(header), group->id().data(), SRTLA_ID_LEN); + + int ret = sendto(srtla_socket_, &out_buf, sizeof(out_buf), 0, + reinterpret_cast(addr), kAddrLen); + if (ret != sizeof(out_buf)) { + spdlog::error("[{}:{}] Group registration failed: Send error", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr)))); + return -1; + } + + registry_.add_group(group); + spdlog::info("[{}:{}] [Group: {}] Group registered", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + static_cast(group.get())); + return 0; +} + +int SRTLAHandler::register_connection(const struct sockaddr_storage *addr, const char *buffer, time_t ts) { + const uint8_t *id = reinterpret_cast(buffer + 2); + auto group = wait_group_by_id(registry_, id); + if (!group) { + uint16_t header = htobe16(SRTLA_TYPE_REG_NGP); + sendto(srtla_socket_, &header, sizeof(header), 0, + reinterpret_cast(addr), kAddrLen); + spdlog::error("[{}:{}] Connection registration failed: No group found", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr)))); + return -1; + } + + ConnectionGroupPtr tmp_group; + ConnectionPtr conn; + registry_.find_by_address(addr, tmp_group, conn); + if (tmp_group && tmp_group != group) { + uint16_t header = htobe16(SRTLA_TYPE_REG_ERR); + sendto(srtla_socket_, &header, sizeof(header), 0, + reinterpret_cast(addr), kAddrLen); + spdlog::error("[{}:{}] [Group: {}] Connection registration failed: Provided group ID mismatch", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + static_cast(group.get())); + return -1; + } + + bool already_registered = true; + if (!conn) { + if (group->connections().size() >= MAX_CONNS_PER_GROUP) { + uint16_t header = htobe16(SRTLA_TYPE_REG_ERR); + sendto(srtla_socket_, &header, sizeof(header), 0, + reinterpret_cast(addr), kAddrLen); + spdlog::error("[{}:{}] [Group: {}] Connection registration failed: Max group conns reached", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + static_cast(group.get())); + return -1; + } + + conn = std::make_shared(*addr, ts); + already_registered = false; + } + + uint16_t header = htobe16(SRTLA_TYPE_REG3); + int ret = sendto(srtla_socket_, &header, sizeof(header), 0, + reinterpret_cast(addr), kAddrLen); + if (ret != sizeof(header)) { + spdlog::error("[{}:{}] [Group: {}] Connection registration failed: Socket send error", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + static_cast(group.get())); + return -1; + } + + if (!already_registered) { + group->add_connection(conn); + } + group->write_socket_info_file(); + group->set_last_address(*addr); + + spdlog::info("[{}:{}] [Group: {}] Connection registration", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + static_cast(group.get())); + return 0; +} + +void SRTLAHandler::register_packet(ConnectionGroupPtr group, + const ConnectionPtr &conn, + int32_t sn) { + int next_idx = conn->recv_index() + 1; + if (next_idx <= 0 || next_idx > static_cast(RECV_ACK_INT)) { + // Defensive reset if index is corrupted or out of bounds + next_idx = 1; + } + conn->set_recv_index(next_idx); + conn->recv_log()[static_cast(next_idx - 1)] = htobe32(sn); + + uint64_t current_ms = 0; + get_ms(¤t_ms); + + if (conn->recv_index() == static_cast(RECV_ACK_INT)) { + bool should_send = true; + if (conn->stats().ack_throttle_factor > 0.0f && conn->stats().ack_throttle_factor < 1.0) { + uint64_t min_interval = ACK_THROTTLE_INTERVAL / conn->stats().ack_throttle_factor; + if (conn->stats().last_ack_sent_time > 0 && + current_ms < conn->stats().last_ack_sent_time + min_interval) { + should_send = false; + spdlog::trace("[{}:{}] [Group: {}] ACK throttled, next in {} ms (factor: {:.2f})", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get()), + (conn->stats().last_ack_sent_time + min_interval) - current_ms, + conn->stats().ack_throttle_factor); + } + } + + if (should_send) { + srtla_ack_pkt ack {}; + ack.type = htobe32(SRTLA_TYPE_ACK << 16); + std::memcpy(&ack.acks, conn->recv_log().data(), sizeof(uint32_t) * conn->recv_log().size()); + + int ret = sendto(srtla_socket_, &ack, sizeof(ack), 0, + reinterpret_cast(&conn->address()), kAddrLen); + if (ret != sizeof(ack)) { + spdlog::error("[{}:{}] [Group: {}] Failed to send the SRTLA ACK", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get())); + } else { + conn->stats().last_ack_sent_time = current_ms; + spdlog::trace("[{}:{}] [Group: {}] Sent SRTLA ACK (throttle factor: {:.2f})", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get()), + conn->stats().ack_throttle_factor); + } + } + + conn->set_recv_index(0); + } +} + +void SRTLAHandler::update_rtt_history(ConnectionStats &stats, uint32_t rtt) { + stats.rtt_history[stats.rtt_history_idx] = rtt; + stats.rtt_history_idx = (stats.rtt_history_idx + 1) % RTT_HISTORY_SIZE; + stats.rtt_ms = rtt; +} + +void SRTLAHandler::update_connection_telemetry(const ConnectionPtr &conn, + const connection_info_t &info, + time_t current_time) { + auto &stats = conn->stats(); + + // Mark that this sender supports extended keepalives + // This flag persists for the lifetime of the connection, allowing us to + // distinguish extended-keepalive-capable senders from legacy senders, + // even when the connection is actively transmitting (and not sending keepalives). + stats.sender_supports_extended_keepalives = true; + + // Update RTT with history + update_rtt_history(stats, info.rtt_ms); + + // Update window metrics + stats.window = info.window; + stats.in_flight = info.in_flight; + + // Update NAK count + stats.sender_nak_count = info.nak_count; + + // Update bitrate + stats.sender_bitrate_bps = info.bitrate_bytes_per_sec; + + // Mark keepalive timestamp + stats.last_keepalive = current_time; +} + +void SRTLAHandler::handle_keepalive(ConnectionGroupPtr group, + const ConnectionPtr &conn, + const struct sockaddr_storage *addr, + const char *buffer, + int length) { + time_t current_time = 0; + get_seconds(¤t_time); + + // ======================================================================== + // ALWAYS parse connection info when available + // ======================================================================== + connection_info_t info; + bool has_conn_info = parse_keepalive_conn_info(reinterpret_cast(buffer), length, &info); + + if (has_conn_info) { + // Copy values for logging to avoid packed field reference issues + uint32_t conn_id = info.conn_id; + int32_t window = info.window; + int32_t in_flight = info.in_flight; + uint32_t rtt_ms = info.rtt_ms; + uint32_t nak_count = info.nak_count; + double bitrate_kbits = (static_cast(info.bitrate_bytes_per_sec) * 8.0) / 1000.0; + + // Store telemetry in connection stats (used by Connection Info algorithm) + update_connection_telemetry(conn, info, current_time); + + // Log the detailed keepalive packet data + spdlog::info( + " [{}:{}] [Group: {}] Per-connection keepalive: ID={}, BW: {:.2f} kbits/s, Window={}, " + "In-flight={}, RTT={}ms, NAKs={}", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + static_cast(group.get()), + conn_id, + bitrate_kbits, + window, + in_flight, + rtt_ms, + nak_count + ); + +#if ENABLE_ALGO_COMPARISON + // ==================================================================== + // ALGORITHM COMPARISON: Show decisions from both algorithms + // ==================================================================== + int error_delta = static_cast(conn->stats().error_points) - static_cast(conn->stats().legacy_error_points); + int weight_delta = static_cast(conn->stats().weight_percent) - static_cast(conn->stats().legacy_weight_percent); + double throttle_delta = conn->stats().ack_throttle_factor - conn->stats().legacy_ack_throttle_factor; + + // Only log comparison if there's a meaningful difference (reduce spam) + if (std::abs(weight_delta) >= 5 || std::abs(error_delta) >= 5) { + spdlog::info( + " [{}:{}] [ALGO_CMP] ConnInfo: Err={} W={}% T={:.2f} | " + "Legacy: Err={} W={}% T={:.2f} | " + "Delta: E={:+d} W={:+d}% T={:+.2f}", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + conn->stats().error_points, + conn->stats().weight_percent, + conn->stats().ack_throttle_factor, + conn->stats().legacy_error_points, + conn->stats().legacy_weight_percent, + conn->stats().legacy_ack_throttle_factor, + error_delta, + weight_delta, + throttle_delta + ); + } +#endif + } else { + // No connection info in keepalive packet - quality evaluation will fall back + // to receiver-only metrics (bandwidth + packet loss) for this connection. + // This happens when the sender doesn't support extended keepalives. + spdlog::trace( + " [{}:{}] [Group: {}] Keepalive without sender telemetry - " + "quality evaluation will use receiver-only metrics", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + static_cast(group.get()) + ); + } + + // Echo the keepalive back to the sender + int ret = sendto(srtla_socket_, buffer, length, 0, + reinterpret_cast(addr), kAddrLen); + if (ret != length) { + spdlog::error("[{}:{}] [Group: {}] Failed to send SRTLA Keepalive", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + static_cast(group.get())); + } +} + +} // namespace srtla::protocol diff --git a/src/protocol/srtla_handler.h b/src/protocol/srtla_handler.h new file mode 100644 index 0000000..fe38ca4 --- /dev/null +++ b/src/protocol/srtla_handler.h @@ -0,0 +1,45 @@ +#pragma once + +#include "srt_handler.h" +#include "../connection/connection_registry.h" +#include "../quality/metrics_collector.h" +#include "../utils/nak_dedup.h" + +namespace srtla::protocol { + +class SRTLAHandler { +public: + SRTLAHandler(int srtla_socket, + connection::ConnectionRegistry ®istry, + SRTHandler &srt_handler, + quality::MetricsCollector &metrics_collector); + + void process_packet(time_t ts); + void send_keepalive(const connection::ConnectionPtr &conn, time_t ts); + +private: + int register_group(const struct sockaddr_storage *addr, const char *buffer, time_t ts); + int register_connection(const struct sockaddr_storage *addr, const char *buffer, time_t ts); + void register_packet(connection::ConnectionGroupPtr group, + const connection::ConnectionPtr &conn, + int32_t sn); + + void handle_keepalive(connection::ConnectionGroupPtr group, + const connection::ConnectionPtr &conn, + const struct sockaddr_storage *addr, + const char *buffer, + int length); + + // Helper functions for keepalive telemetry + void update_rtt_history(ConnectionStats &stats, uint32_t rtt); + void update_connection_telemetry(const connection::ConnectionPtr &conn, + const connection_info_t &info, + time_t current_time); + + int srtla_socket_; + connection::ConnectionRegistry ®istry_; + SRTHandler &srt_handler_; + quality::MetricsCollector &metrics_; +}; + +} // namespace srtla::protocol diff --git a/src/quality/load_balancer.cpp b/src/quality/load_balancer.cpp new file mode 100644 index 0000000..884e3ee --- /dev/null +++ b/src/quality/load_balancer.cpp @@ -0,0 +1,164 @@ +#include "load_balancer.h" + +#include +#include + +#include + +extern "C" { +#include "../common.h" +} + +#include "../receiver_config.h" + +namespace srtla::quality { + +using srtla::connection::ConnectionGroupPtr; + +void LoadBalancer::adjust_weights(ConnectionGroupPtr group, time_t current_time) const { + if (!group || group->connections().empty()) { + return; + } + + const bool load_balancing_enabled = group->load_balancing_enabled(); + + if (load_balancing_enabled) { + if (group->last_load_balance_eval() >= group->last_quality_eval()) { + return; + } + } else { + time_t last_eval = group->last_load_balance_eval(); + if (last_eval != 0 && (last_eval + CONN_QUALITY_EVAL_PERIOD) > current_time) { + return; + } + } + + group->set_last_load_balance_eval(current_time); + + bool any_change = false; + spdlog::debug("[Group: {}] Evaluating weights and throttle factors for {} connections", + static_cast(group.get()), group->connections().size()); + + + uint8_t max_weight = 0; + int active_conns = 0; + + for (auto &conn : group->connections()) { + uint8_t old_weight = conn->stats().weight_percent; + uint8_t new_weight; + + if (conn->stats().error_points >= 40) { + new_weight = WEIGHT_CRITICAL; + } else if (conn->stats().error_points >= 25) { + new_weight = WEIGHT_POOR; + } else if (conn->stats().error_points >= 15) { + new_weight = WEIGHT_FAIR; + } else if (conn->stats().error_points >= 10) { + new_weight = WEIGHT_DEGRADED; + } else if (conn->stats().error_points >= 5) { + new_weight = WEIGHT_EXCELLENT; + } else { + new_weight = WEIGHT_FULL; + } + + if (new_weight != old_weight) { + conn->stats().weight_percent = new_weight; + any_change = true; + } + + if (!((conn->last_received() + CONN_TIMEOUT) < current_time)) { + max_weight = std::max(max_weight, conn->stats().weight_percent); + active_conns++; + } + } + + spdlog::debug("[Group: {}] Active connections: {}, max_weight: {}, load_balancing_enabled: {}", + static_cast(group.get()), active_conns, max_weight, load_balancing_enabled); + + if (load_balancing_enabled && active_conns > 1) { + + for (auto &conn : group->connections()) { + double old_throttle = conn->stats().ack_throttle_factor; + double absolute_quality = static_cast(conn->stats().weight_percent) / WEIGHT_FULL; + double relative_quality = max_weight > 0 ? static_cast(conn->stats().weight_percent) / max_weight : 0.0; + double new_throttle = std::min(absolute_quality, relative_quality); + + // Recovery boost: ONLY for connections with sender telemetry (extended keepalives). + // If a connection is heavily throttled but has improved (error points dropped), + // give it a boost to help it recover from the feedback loop. + // Legacy senders don't get this boost since we rely on bandwidth as primary indicator. + bool has_recent_telemetry = conn->stats().has_valid_sender_telemetry(current_time); + if (has_recent_telemetry && old_throttle < 0.5 && conn->stats().error_points < 15) { + double recovery_boost = 0.15; // Boost throttle by 15% + new_throttle = std::min(new_throttle + recovery_boost, 0.6); + spdlog::debug("[{}:{}] Applying recovery boost (telemetry-based): error_points={}, boosted throttle {:.2f} -> {:.2f}", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + conn->stats().error_points, new_throttle - recovery_boost, new_throttle); + } + + new_throttle = std::max(MIN_ACK_RATE, new_throttle); + + spdlog::debug("[{}:{}] Throttle calculation: weight={}, max_weight={}, absolute={:.2f}, relative={:.2f}, new_throttle={:.2f}, old_throttle={:.2f}", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + conn->stats().weight_percent, max_weight, + absolute_quality, relative_quality, new_throttle, old_throttle); + + if (std::abs(old_throttle - new_throttle) > 0.01) { + conn->stats().ack_throttle_factor = new_throttle; + any_change = true; + spdlog::debug("[{}:{}] Throttle factor updated: {:.2f} -> {:.2f}", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + old_throttle, new_throttle); + } + } + } else { + for (auto &conn : group->connections()) { + if (conn->stats().ack_throttle_factor != 1.0) { + conn->stats().ack_throttle_factor = 1.0; + any_change = true; + } + } + } + + if (any_change) { + spdlog::info("[Group: {}] Connection parameters adjusted:", static_cast(group.get())); + for (auto &conn : group->connections()) { +#if ENABLE_ALGO_COMPARISON + // Show side-by-side comparison of both algorithms + int error_delta = static_cast(conn->stats().error_points) - static_cast(conn->stats().legacy_error_points); + int weight_delta = static_cast(conn->stats().weight_percent) - static_cast(conn->stats().legacy_weight_percent); + double throttle_delta = conn->stats().ack_throttle_factor - conn->stats().legacy_ack_throttle_factor; + + spdlog::info(" [{}:{}] [COMPARISON] ConnInfo: Weight={}%, Throttle={:.2f}, ErrPts={} | Legacy: Weight={}%, Throttle={:.2f}, ErrPts={} | Delta: W={:+d}%, T={:+.2f}, E={:+d}", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + conn->stats().weight_percent, + conn->stats().ack_throttle_factor, + conn->stats().error_points, + conn->stats().legacy_weight_percent, + conn->stats().legacy_ack_throttle_factor, + conn->stats().legacy_error_points, + weight_delta, + throttle_delta, + error_delta); +#else + spdlog::info(" [{}:{}] Weight: {}%, Throttle: {:.2f}, Error points: {}, Bandwidth: {} bytes, Packets: {}, Loss: {}", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + conn->stats().weight_percent, + conn->stats().ack_throttle_factor, + conn->stats().error_points, + conn->stats().bytes_received, + conn->stats().packets_received, + conn->stats().packets_lost); +#endif + } + } else { + spdlog::debug("[Group: {}] No weight or throttle adjustments needed", static_cast(group.get())); + } +} + +} // namespace srtla::quality diff --git a/src/quality/load_balancer.h b/src/quality/load_balancer.h new file mode 100644 index 0000000..a2c8704 --- /dev/null +++ b/src/quality/load_balancer.h @@ -0,0 +1,12 @@ +#pragma once + +#include "../connection/connection_group.h" + +namespace srtla::quality { + +class LoadBalancer { +public: + void adjust_weights(connection::ConnectionGroupPtr group, time_t current_time) const; +}; + +} // namespace srtla::quality diff --git a/src/quality/metrics_collector.cpp b/src/quality/metrics_collector.cpp new file mode 100644 index 0000000..922a18b --- /dev/null +++ b/src/quality/metrics_collector.cpp @@ -0,0 +1,40 @@ +#include "metrics_collector.h" + +namespace srtla::quality { + +void MetricsCollector::on_packet_received(connection::ConnectionPtr conn, size_t bytes) { + auto &stats = conn->stats(); + stats.bytes_received += bytes; + stats.packets_received++; +} + +void MetricsCollector::on_nak_detected(connection::ConnectionPtr conn, uint32_t nak_count) { + auto &stats = conn->stats(); + stats.packets_lost += nak_count; + stats.nack_count += nak_count; +} + +void MetricsCollector::reset_period(connection::ConnectionPtr conn, uint64_t current_ms) { + auto &stats = conn->stats(); + stats.last_bytes_received = stats.bytes_received; + stats.last_packets_received = stats.packets_received; + stats.last_packets_lost = stats.packets_lost; + stats.last_eval_time = current_ms; +} + +uint64_t MetricsCollector::bytes_in_period(const connection::ConnectionPtr &conn) const { + const auto &stats = conn->stats(); + return stats.bytes_received - stats.last_bytes_received; +} + +uint64_t MetricsCollector::packets_in_period(const connection::ConnectionPtr &conn) const { + const auto &stats = conn->stats(); + return stats.packets_received - stats.last_packets_received; +} + +uint32_t MetricsCollector::naks_in_period(const connection::ConnectionPtr &conn) const { + const auto &stats = conn->stats(); + return stats.packets_lost - stats.last_packets_lost; +} + +} // namespace srtla::quality diff --git a/src/quality/metrics_collector.h b/src/quality/metrics_collector.h new file mode 100644 index 0000000..8faa667 --- /dev/null +++ b/src/quality/metrics_collector.h @@ -0,0 +1,20 @@ +#pragma once + +#include "../receiver_config.h" +#include "../connection/connection.h" + +namespace srtla::quality { + +class MetricsCollector { +public: + void on_packet_received(connection::ConnectionPtr conn, size_t bytes); + void on_nak_detected(connection::ConnectionPtr conn, uint32_t nak_count); + + void reset_period(connection::ConnectionPtr conn, uint64_t current_ms); + + uint64_t bytes_in_period(const connection::ConnectionPtr &conn) const; + uint64_t packets_in_period(const connection::ConnectionPtr &conn) const; + uint32_t naks_in_period(const connection::ConnectionPtr &conn) const; +}; + +} // namespace srtla::quality diff --git a/src/quality/quality_evaluator.cpp b/src/quality/quality_evaluator.cpp new file mode 100644 index 0000000..a5f264c --- /dev/null +++ b/src/quality/quality_evaluator.cpp @@ -0,0 +1,462 @@ +#include "quality_evaluator.h" + +#include +#include +#include + +#include + +extern "C" { +#include "../common.h" +} + +// ============================================================================ +// Quality Evaluation Algorithm +// ============================================================================ +// This module evaluates connection quality using an adaptive approach: +// +// 1. RECEIVER-SIDE METRICS (always used): +// - Bandwidth: Measured throughput compared to expected/median +// - Packet loss: Ratio of lost packets to total received +// +// 2. SENDER TELEMETRY (when available): +// - RTT: Round-trip time and jitter from sender's keepalive packets +// - NAK rate: Retransmission requests from sender's perspective +// - Window utilization: Congestion indicator from sender's flow control +// - Bitrate validation: Cross-check sender vs receiver measurements +// +// When sender telemetry is NOT available (e.g., older clients that don't send +// connection_info_t in keepalives), the algorithm falls back to receiver-only +// metrics. This is detected via ConnectionStats::has_valid_sender_telemetry(). +// +// The result is error points that determine connection weight and ACK throttle +// factor, which indirectly influences load balancing by affecting the sender's +// connection selection algorithm. +// ============================================================================ + +namespace srtla::quality { + +using srtla::connection::ConnectionGroupPtr; +using srtla::connection::ConnectionPtr; + +void QualityEvaluator::evaluate_group(ConnectionGroupPtr group, time_t current_time) { + if (!group || group->connections().empty() || !group->load_balancing_enabled()) { + return; + } + + if (group->last_quality_eval() + CONN_QUALITY_EVAL_PERIOD > current_time) { + return; + } + + spdlog::debug("[Group: {}] Evaluating connection quality", static_cast(group.get())); + +group->set_total_target_bandwidth(0); + uint64_t current_ms = 0; + if (get_ms(¤t_ms) != 0) { + spdlog::error("[Group: {}] Failed to get current timestamp for quality evaluation", + static_cast(group.get())); + return; + } + + std::vector bandwidth_info; + bandwidth_info.reserve(group->connections().size()); + + for (auto &conn : group->connections()) { + uint64_t time_diff_ms = 0; + if (conn->stats().last_eval_time > 0) { + time_diff_ms = current_ms - conn->stats().last_eval_time; + } + +double bandwidth_kbits_per_sec = 0.0; + double packet_loss_ratio = 0.0; + uint64_t packets_diff = 0; + + if (time_diff_ms > 0) { + uint64_t bytes_diff = conn->stats().bytes_received - conn->stats().last_bytes_received; + packets_diff = conn->stats().packets_received - conn->stats().last_packets_received; + uint32_t lost_diff = conn->stats().packets_lost - conn->stats().last_packets_lost; + + double seconds = static_cast(time_diff_ms) / 1000.0; + double bandwidth_bytes_per_sec = bytes_diff / seconds; + bandwidth_kbits_per_sec = (bandwidth_bytes_per_sec * 8.0) / 1000.0; + + if (packets_diff > 0) { + packet_loss_ratio = static_cast(lost_diff) / (packets_diff + lost_diff); + } + + group->set_total_target_bandwidth(group->total_target_bandwidth() + static_cast(bandwidth_bytes_per_sec)); + } + + // Store packets_diff for NAK rate calculation in second loop + // Note: last_* values are updated AFTER all calculations in the second loop + bandwidth_info.push_back({bandwidth_kbits_per_sec, packet_loss_ratio, packets_diff, 0}); + } + + if (bandwidth_info.empty()) { + return; + } + + double total_kbits_per_sec = (group->total_target_bandwidth() * 8.0) / 1000.0; + double max_kbits_per_sec = 0.0; + double median_kbits_per_sec = 0.0; + + std::vector all_bandwidths; + all_bandwidths.reserve(bandwidth_info.size()); + for (const auto &info : bandwidth_info) { + all_bandwidths.push_back(info.bandwidth_kbits_per_sec); + max_kbits_per_sec = std::max(max_kbits_per_sec, info.bandwidth_kbits_per_sec); + } + + if (!all_bandwidths.empty() && max_kbits_per_sec > 0) { + double good_threshold = max_kbits_per_sec * GOOD_CONNECTION_THRESHOLD; + std::vector good_bandwidths; + for (const auto &bw : all_bandwidths) { + if (bw >= good_threshold) { + good_bandwidths.push_back(bw); + } + } + + auto compute_median = [](std::vector &values) { + std::sort(values.begin(), values.end()); + size_t mid = values.size() / 2; + if (values.size() % 2 == 0) { + return (values[mid - 1] + values[mid]) / 2.0; + } + return values[mid]; + }; + + if (!good_bandwidths.empty()) { + median_kbits_per_sec = compute_median(good_bandwidths); + spdlog::trace("[Group: {}] Median from good connections (>= {:.2f} kbps): {:.2f} kbps", + static_cast(group.get()), good_threshold, median_kbits_per_sec); + } else { + median_kbits_per_sec = compute_median(all_bandwidths); + spdlog::trace("[Group: {}] Using fallback median from all connections: {:.2f} kbps", + static_cast(group.get()), median_kbits_per_sec); + } + } + + double min_expected_kbits_per_sec = std::max(100.0, MIN_ACCEPTABLE_TOTAL_BANDWIDTH_KBPS / bandwidth_info.size()); + + spdlog::debug("[Group: {}] Total bandwidth: {:.2f} kbits/s, Max: {:.2f} kbits/s, Median: {:.2f} kbits/s, Min expected per conn: {:.2f} kbps", + static_cast(group.get()), total_kbits_per_sec, max_kbits_per_sec, median_kbits_per_sec, + min_expected_kbits_per_sec); + + for (std::size_t idx = 0; idx < bandwidth_info.size() && idx < group->connections().size(); ++idx) { + auto conn = group->connections()[idx]; + auto &metrics = bandwidth_info[idx]; + + bool in_grace_period = (current_time - conn->connection_start()) < CONNECTION_GRACE_PERIOD; + if (in_grace_period) { + spdlog::debug("[{}:{}] Connection in grace period, skipping penalties", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address())))); + continue; + } + + conn->stats().error_points = 0; + + bool is_poor_connection = metrics.bandwidth_kbits_per_sec < median_kbits_per_sec * GOOD_CONNECTION_THRESHOLD; + double expected_kbits_per_sec = is_poor_connection ? min_expected_kbits_per_sec : median_kbits_per_sec; + expected_kbits_per_sec = std::max(expected_kbits_per_sec, min_expected_kbits_per_sec); + + double performance_ratio = expected_kbits_per_sec > 0 ? metrics.bandwidth_kbits_per_sec / expected_kbits_per_sec : 0; + + // Check sender capabilities and current telemetry status + bool supports_ext_keepalives = conn->stats().supports_extended_keepalives(); + bool has_telemetry = conn->stats().has_valid_sender_telemetry(current_time); + + // ==================================================================== + // RECEIVER-SIDE METRICS (always applied) + // These are calculated from data we observe at the receiver. + // ==================================================================== + + // Bandwidth performance penalties + // IMPORTANT: For senders that support extended keepalives, apply lighter penalties + // to prevent positive feedback loop with ACK throttling. The feedback loop: + // low bandwidth → throttled → client uses it less → bandwidth drops further → + // more penalties → more throttling → permanent 0 bandwidth. + // + // We use the persistent "supports_extended_keepalives" flag (not the transient + // "has_telemetry" status) to ensure consistent treatment whether the connection + // is currently active (not sending keepalives) or idle (sending keepalives). + // + // For legacy senders, keep aggressive penalties since bandwidth is our only indicator. + if (supports_ext_keepalives) { + // Lighter penalties for extended-keepalive-capable senders + // (rely more on telemetry metrics when available) + if (performance_ratio < 0.3) { + conn->stats().error_points += 10; // Reduced from 40 + } else if (performance_ratio < 0.5) { + conn->stats().error_points += 7; // Reduced from 25 + } else if (performance_ratio < 0.7) { + conn->stats().error_points += 4; // Reduced from 15 + } else if (performance_ratio < 0.85) { + conn->stats().error_points += 2; // Reduced from 5 + } + } else { + // Original penalties for legacy senders (bandwidth is primary indicator) + if (performance_ratio < 0.3) { + conn->stats().error_points += 40; + } else if (performance_ratio < 0.5) { + conn->stats().error_points += 25; + } else if (performance_ratio < 0.7) { + conn->stats().error_points += 15; + } else if (performance_ratio < 0.85) { + conn->stats().error_points += 5; + } + } + + // Packet loss penalties + if (metrics.packet_loss_ratio > 0.20) { + conn->stats().error_points += 40; + } else if (metrics.packet_loss_ratio > 0.10) { + conn->stats().error_points += 20; + } else if (metrics.packet_loss_ratio > 0.05) { + conn->stats().error_points += 10; + } else if (metrics.packet_loss_ratio > 0.01) { + conn->stats().error_points += 5; + } + + // ==================================================================== + // SENDER TELEMETRY METRICS (only when available) + // These come from connection_info_t in keepalive packets from the sender. + // When not available, we skip these and rely only on receiver-side metrics. + // ==================================================================== + uint32_t telemetry_error_points = 0; + if (has_telemetry) { + // RTT-based error points + telemetry_error_points += calculate_rtt_error_points(conn->stats(), current_time); + + // NAK rate error points (sender's view of retransmissions) + // Use packets_diff from first loop to avoid always-zero bug + telemetry_error_points += calculate_nak_error_points(conn->stats(), metrics.packets_diff); + + // Window utilization error points (congestion indicator) + telemetry_error_points += calculate_window_error_points(conn->stats()); + + // Validate bitrate consistency between sender and receiver + double receiver_bitrate_bps = metrics.bandwidth_kbits_per_sec * 125.0; // kbits to bytes + validate_bitrate(conn->stats(), receiver_bitrate_bps, &conn->address()); + + conn->stats().error_points += telemetry_error_points; + } + + // Update last_* values AFTER all calculations for this evaluation cycle + conn->stats().last_bytes_received = conn->stats().bytes_received; + conn->stats().last_packets_received = conn->stats().packets_received; + conn->stats().last_packets_lost = conn->stats().packets_lost; + conn->stats().last_eval_time = current_ms; + + // Log evaluation mode for clarity + spdlog::debug(" [{}:{}] [Group: {}] Evaluation mode: {} (telemetry points: {})", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get()), + has_telemetry ? "full (receiver + sender telemetry)" : "receiver-only (no sender telemetry)", + telemetry_error_points); + + conn->stats().nack_count = 0; + +#if ENABLE_ALGO_COMPARISON + // ==================================================================== + // LEGACY ALGORITHM: Parallel evaluation for comparison + // ==================================================================== + evaluate_connection_legacy(conn, metrics.bandwidth_kbits_per_sec, + metrics.packet_loss_ratio, performance_ratio, current_time); +#endif + + double log_percentage = 0.0; + if (is_poor_connection && median_kbits_per_sec > 0) { + log_percentage = (metrics.bandwidth_kbits_per_sec / median_kbits_per_sec) * 100.0; + } else if (expected_kbits_per_sec > 0) { + log_percentage = (metrics.bandwidth_kbits_per_sec / expected_kbits_per_sec) * 100.0; + } + + spdlog::debug(" [{}:{}] [Group: {}] Connection stats: BW: {:.2f} kbits/s ({:.2f}%), Loss: {:.2f}%, Error points: {}", + print_addr(const_cast(reinterpret_cast(&conn->address()))), + port_no(const_cast(reinterpret_cast(&conn->address()))), + static_cast(group.get()), + metrics.bandwidth_kbits_per_sec, + log_percentage, + metrics.packet_loss_ratio * 100.0, + conn->stats().error_points); + } + + group->set_last_quality_eval(current_time); +} + +double QualityEvaluator::calculate_rtt_variance(const ConnectionStats &stats) { + // Count valid samples + int count = 0; + double sum = 0; + for (size_t i = 0; i < RTT_HISTORY_SIZE; i++) { + if (stats.rtt_history[i] > 0) { + sum += stats.rtt_history[i]; + count++; + } + } + + if (count < 2) return 0; // Need at least 2 samples + + double mean = sum / count; + double variance_sum = 0; + for (size_t i = 0; i < RTT_HISTORY_SIZE; i++) { + if (stats.rtt_history[i] > 0) { + double diff = static_cast(stats.rtt_history[i]) - mean; + variance_sum += diff * diff; + } + } + + return std::sqrt(variance_sum / count); +} + +uint32_t QualityEvaluator::calculate_rtt_error_points(const ConnectionStats &stats, time_t current_time) { + // Don't use stale keepalive data + if (stats.last_keepalive == 0 || (current_time - stats.last_keepalive) > KEEPALIVE_STALENESS_THRESHOLD) { + return 0; + } + + uint32_t points = 0; + + // Base RTT penalties + if (stats.rtt_ms > RTT_THRESHOLD_CRITICAL) { + points += 20; + } else if (stats.rtt_ms > RTT_THRESHOLD_HIGH) { + points += 10; + } else if (stats.rtt_ms > RTT_THRESHOLD_MODERATE) { + points += 5; + } + + // Jitter penalty + double variance = calculate_rtt_variance(stats); + if (variance > RTT_VARIANCE_THRESHOLD) { + points += 10; + } + + return points; +} + +uint32_t QualityEvaluator::calculate_nak_error_points(ConnectionStats &stats, uint64_t packets_diff) { + if (packets_diff == 0 || stats.sender_nak_count == 0) { + return 0; + } + + uint32_t nak_diff = stats.sender_nak_count - stats.last_sender_nak_count; + double nak_rate = static_cast(nak_diff) / packets_diff; + + uint32_t points = 0; + if (nak_rate > NAK_RATE_CRITICAL) { + points += 40; + } else if (nak_rate > NAK_RATE_HIGH) { + points += 20; + } else if (nak_rate > NAK_RATE_MODERATE) { + points += 10; + } else if (nak_rate > NAK_RATE_LOW) { + points += 5; + } + + stats.last_sender_nak_count = stats.sender_nak_count; + return points; +} + +uint32_t QualityEvaluator::calculate_window_error_points(const ConnectionStats &stats) { + if (stats.window <= 0) { + return 0; + } + + double utilization = static_cast(stats.in_flight) / stats.window; + + uint32_t points = 0; + + // Persistently full window indicates congestion + if (utilization > WINDOW_UTILIZATION_CONGESTED) { + points += 15; + } + + // Very low utilization might indicate client-side throttling + // This is informational, not necessarily bad, so we don't penalize + + return points; +} + +void QualityEvaluator::validate_bitrate(const ConnectionStats &stats, + double receiver_bitrate_bps, + const struct sockaddr_storage *addr) { + if (stats.sender_bitrate_bps == 0) { + return; + } + + double ratio = std::abs(receiver_bitrate_bps - stats.sender_bitrate_bps) + / stats.sender_bitrate_bps; + + if (ratio > BITRATE_DISCREPANCY_THRESHOLD) { + spdlog::warn("[{}:{}] Large bitrate discrepancy: " + "sender={} bps, receiver={} bps ({:.1f}%)", + print_addr(const_cast(reinterpret_cast(addr))), + port_no(const_cast(reinterpret_cast(addr))), + stats.sender_bitrate_bps, + static_cast(receiver_bitrate_bps), + ratio * 100); + } +} + +void QualityEvaluator::evaluate_connection_legacy(ConnectionPtr conn, + double bandwidth_kbits_per_sec, + double packet_loss_ratio, + double performance_ratio, + time_t current_time) { + // ======================================================================== + // LEGACY ALGORITHM: No connection info (RTT, window, sender NAKs, etc.) + // Only uses receiver-side bandwidth and packet loss measurements + // ======================================================================== + conn->stats().legacy_error_points = 0; + + // Bandwidth-based penalties (same as connection info algorithm) + if (performance_ratio < 0.3) { + conn->stats().legacy_error_points += 40; + } else if (performance_ratio < 0.5) { + conn->stats().legacy_error_points += 25; + } else if (performance_ratio < 0.7) { + conn->stats().legacy_error_points += 15; + } else if (performance_ratio < 0.85) { + conn->stats().legacy_error_points += 5; + } + + // Packet loss penalties (same as connection info algorithm) + if (packet_loss_ratio > 0.20) { + conn->stats().legacy_error_points += 40; + } else if (packet_loss_ratio > 0.10) { + conn->stats().legacy_error_points += 20; + } else if (packet_loss_ratio > 0.05) { + conn->stats().legacy_error_points += 10; + } else if (packet_loss_ratio > 0.01) { + conn->stats().legacy_error_points += 5; + } + + // NOTE: Legacy algorithm does NOT have: + // - RTT-based penalties + // - Sender NAK rate analysis + // - Window utilization penalties + // - Bitrate discrepancy validation + + // Calculate legacy weight and throttle (same logic as connection info) + if (conn->stats().legacy_error_points >= 40) { + conn->stats().legacy_weight_percent = WEIGHT_CRITICAL; + } else if (conn->stats().legacy_error_points >= 30) { + conn->stats().legacy_weight_percent = WEIGHT_POOR; + } else if (conn->stats().legacy_error_points >= 20) { + conn->stats().legacy_weight_percent = WEIGHT_FAIR; + } else if (conn->stats().legacy_error_points >= 10) { + conn->stats().legacy_weight_percent = WEIGHT_DEGRADED; + } else if (conn->stats().legacy_error_points >= 5) { + conn->stats().legacy_weight_percent = WEIGHT_EXCELLENT; + } else { + conn->stats().legacy_weight_percent = WEIGHT_FULL; + } + + conn->stats().legacy_ack_throttle_factor = + std::max(MIN_ACK_RATE, static_cast(conn->stats().legacy_weight_percent) / 100.0); +} + +} // namespace srtla::quality diff --git a/src/quality/quality_evaluator.h b/src/quality/quality_evaluator.h new file mode 100644 index 0000000..3bf77eb --- /dev/null +++ b/src/quality/quality_evaluator.h @@ -0,0 +1,56 @@ +#pragma once + +#include + +#include "metrics_collector.h" +#include "../connection/connection_group.h" + +namespace srtla::quality { + +struct QualityMetrics { + double bandwidth_kbits_per_sec = 0.0; + double packet_loss_ratio = 0.0; + uint64_t packets_diff = 0; + uint32_t error_points = 0; +}; + +class QualityEvaluator { +public: + QualityEvaluator() = default; + + void evaluate_group(connection::ConnectionGroupPtr group, + time_t current_time); + +private: + void evaluate_connection(connection::ConnectionGroupPtr group, + const connection::ConnectionPtr &conn, + double bandwidth_kbits_per_sec, + double packet_loss_ratio, + double median_kbits_per_sec, + double min_expected_kbits_per_sec, + bool is_poor_connection); + + // Helper functions for RTT-based quality assessment (Connection Info algorithm) + uint32_t calculate_rtt_error_points(const ConnectionStats &stats, time_t current_time); + double calculate_rtt_variance(const ConnectionStats &stats); + + // Helper functions for NAK rate analysis (Connection Info algorithm) + uint32_t calculate_nak_error_points(ConnectionStats &stats, uint64_t packets_diff); + + // Helper functions for window utilization (Connection Info algorithm) + uint32_t calculate_window_error_points(const ConnectionStats &stats); + + // Helper function for bitrate validation (Connection Info algorithm) + void validate_bitrate(const ConnectionStats &stats, + double receiver_bitrate_bps, + const struct sockaddr_storage *addr); + + // Legacy algorithm (without connection info) + void evaluate_connection_legacy(connection::ConnectionPtr conn, + double bandwidth_kbits_per_sec, + double packet_loss_ratio, + double performance_ratio, + time_t current_time); +}; + +} // namespace srtla::quality diff --git a/src/receiver.cpp b/src/receiver.cpp deleted file mode 100644 index 3514e92..0000000 --- a/src/receiver.cpp +++ /dev/null @@ -1,877 +0,0 @@ -/* - srtla_rec - SRT transport proxy with link aggregation, forked by IRLToolkit - and IRLServer Copyright (C) 2020-2021 BELABOX project Copyright (C) 2024 - IRLToolkit Inc. Copyright (C) 2025 IRLServer.com - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include "receiver.h" - -int srtla_sock; -// Use sockaddr_storage to handle both IPv4 and IPv6 -struct sockaddr_storage srt_addr; -const socklen_t addr_len = sizeof(struct sockaddr_storage); - -std::vector conn_groups; - -/* -Async I/O support -*/ -#define MAX_EPOLL_EVENTS 10 - -int socket_epoll; - -int epoll_add(int fd, uint32_t events, void *priv_data) { - struct epoll_event ev = {0}; - ev.events = events; - ev.data.ptr = priv_data; - return epoll_ctl(socket_epoll, EPOLL_CTL_ADD, fd, &ev); -} - -int epoll_rem(int fd) { - struct epoll_event ev; // non-NULL for Linux < 2.6.9, however unlikely it is - return epoll_ctl(socket_epoll, EPOLL_CTL_DEL, fd, &ev); -} - -/* -Misc helper functions -*/ -int const_time_cmp(const void *a, const void *b, int len) { - char diff = 0; - char *ca = (char *)a; - char *cb = (char *)b; - for (int i = 0; i < len; i++) { - diff |= *ca - *cb; - ca++; - cb++; - } - - return diff ? -1 : 0; -} - -inline std::vector get_random_bytes(size_t size) { - std::vector ret; - ret.resize(size); - - std::ifstream f("/dev/urandom"); - f.read(ret.data(), size); - assert(f); // Failed to read fully! - f.close(); - - return ret; -} - -uint16_t get_sock_local_port(int fd) { - struct sockaddr_in6 local_addr = {}; - socklen_t local_addr_len = sizeof(local_addr); - getsockname(fd, (struct sockaddr *)&local_addr, &local_addr_len); - return ntohs(local_addr.sin6_port); -} - -inline void srtla_send_reg_err(struct sockaddr_storage *addr) { - uint16_t header = htobe16(SRTLA_TYPE_REG_ERR); - sendto(srtla_sock, &header, sizeof(header), 0, (struct sockaddr *)addr, - addr_len); -} - -/* -Connection and group management functions -*/ -srtla_conn_group_ptr group_find_by_id(char *id) { - for (auto &group : conn_groups) { - if (const_time_cmp(group->id.begin(), id, SRTLA_ID_LEN) == 0) - return group; - } - return nullptr; -} - -void group_find_by_addr(struct sockaddr_storage *addr, srtla_conn_group_ptr &rg, - srtla_conn_ptr &rc) { - for (auto &group : conn_groups) { - for (auto &conn : group->conns) { - if (conn->addr.ss_family == addr->ss_family && - ((conn->addr.ss_family == AF_INET6 && - const_time_cmp(&((struct sockaddr_in6 *)(&conn->addr))->sin6_addr, - &((struct sockaddr_in6 *)addr)->sin6_addr, - sizeof(struct in6_addr)) == 0 && - ((struct sockaddr_in6 *)(&conn->addr))->sin6_port == - ((struct sockaddr_in6 *)addr)->sin6_port) || - (conn->addr.ss_family == AF_INET && - const_time_cmp(&((struct sockaddr_in *)(&conn->addr))->sin_addr, - &((struct sockaddr_in *)addr)->sin_addr, - sizeof(struct in_addr)) == 0 && - ((struct sockaddr_in *)(&conn->addr))->sin_port == - ((struct sockaddr_in *)addr)->sin_port))) { - rg = group; - rc = conn; - return; - } - } - if (group->last_addr.ss_family == addr->ss_family && - ((group->last_addr.ss_family == AF_INET6 && - const_time_cmp( - &((struct sockaddr_in6 *)(&group->last_addr))->sin6_addr, - &((struct sockaddr_in6 *)addr)->sin6_addr, - sizeof(struct in6_addr)) == 0 && - ((struct sockaddr_in6 *)(&group->last_addr))->sin6_port == - ((struct sockaddr_in6 *)addr)->sin6_port) || - (group->last_addr.ss_family == AF_INET && - const_time_cmp(&((struct sockaddr_in *)(&group->last_addr))->sin_addr, - &((struct sockaddr_in *)addr)->sin_addr, - sizeof(struct in_addr)) == 0 && - ((struct sockaddr_in *)(&group->last_addr))->sin_port == - ((struct sockaddr_in *)addr)->sin_port))) { - rg = group; - rc = nullptr; - return; - } - } - rg = nullptr; - rc = nullptr; -} - -srtla_conn::srtla_conn(struct sockaddr_storage &_addr, time_t ts) - : addr(_addr), last_rcvd(ts) { - recv_log.fill(0); -} - -srtla_conn_group::srtla_conn_group(char *client_id, time_t ts) - : created_at(ts) { - id.fill(0); - - // Copy client ID to first half of id buffer - std::memcpy(id.begin(), client_id, SRTLA_ID_LEN / 2); - - // Generate server ID, then copy to last half of id buffer - auto server_id = get_random_bytes(SRTLA_ID_LEN / 2); - std::copy(server_id.begin(), server_id.end(), - id.begin() + (SRTLA_ID_LEN / 2)); -} - -srtla_conn_group::~srtla_conn_group() { - conns.clear(); - - if (srt_sock > 0) { - remove_socket_info_file(); - epoll_rem(srt_sock); - close(srt_sock); - } -} - -std::vector srtla_conn_group::get_client_addresses() { - std::vector ret; - for (auto conn : conns) { - ret.push_back(conn->addr); - } - return ret; -} - -void srtla_conn_group::write_socket_info_file() { - if (srt_sock == -1) - return; - - uint16_t local_port = get_sock_local_port(srt_sock); - std::string file_name = - std::string(SRT_SOCKET_INFO_PREFIX) + std::to_string(local_port); - - auto client_addresses = get_client_addresses(); - - std::ofstream f(file_name); - for (auto &addr : client_addresses) - f << print_addr((struct sockaddr *)&addr) << std::endl; - f.close(); - - spdlog::debug("[Group: {}] Wrote SRTLA socket info file", - static_cast(this)); -} - -void srtla_conn_group::remove_socket_info_file() { - if (srt_sock == -1) - return; - - uint16_t local_port = get_sock_local_port(srt_sock); - std::string file_name = - std::string(SRT_SOCKET_INFO_PREFIX) + std::to_string(local_port); - - std::remove(file_name.c_str()); -} - -int register_group(struct sockaddr_storage *addr, char *in_buf, time_t ts) { - if (conn_groups.size() >= MAX_GROUPS) { - srtla_send_reg_err(addr); - spdlog::error("[{}:{}] Group registration failed: Max groups reached", - print_addr((struct sockaddr *)addr), - port_no((struct sockaddr *)addr)); - return -1; - } - - // If this remote address is already registered, abort - srtla_conn_group_ptr group; - srtla_conn_ptr conn; - group_find_by_addr(addr, group, conn); - if (group) { - srtla_send_reg_err(addr); - spdlog::error("[{}:{}] Group registration failed: Remote address already " - "registered to group", - print_addr((struct sockaddr *)addr), - port_no((struct sockaddr *)addr)); - return -1; - } - - // Allocate the group - char *client_id = in_buf + 2; - group = std::make_shared(client_id, ts); - - /* Record the address used to register the group - It won't be allowed to register another group while this one is active */ - group->last_addr = *addr; - - // Build a REG2 packet - char out_buf[SRTLA_TYPE_REG2_LEN]; - uint16_t header = htobe16(SRTLA_TYPE_REG2); - std::memcpy(out_buf, &header, sizeof(header)); - std::memcpy(out_buf + sizeof(header), group->id.begin(), SRTLA_ID_LEN); - - // Send the REG2 packet - int ret = sendto(srtla_sock, &out_buf, sizeof(out_buf), 0, - (const sockaddr *)addr, addr_len); - if (ret != sizeof(out_buf)) { - spdlog::error("[{}:{}] Group registration failed: Send error", - print_addr((struct sockaddr *)addr), - port_no((struct sockaddr *)addr)); - return -1; - } - - conn_groups.push_back(group); - - spdlog::info("[{}:{}] [Group: {}] Group registered", - print_addr((struct sockaddr *)addr), - port_no((struct sockaddr *)addr), - static_cast(group.get())); - return 0; -} - -void remove_group(srtla_conn_group_ptr group) { - if (!group) - return; - - conn_groups.erase(std::remove(conn_groups.begin(), conn_groups.end(), group), - conn_groups.end()); - - group.reset(); -} - -int conn_reg(struct sockaddr_storage *addr, char *in_buf, time_t ts) { - char *id = in_buf + 2; - srtla_conn_group_ptr group = group_find_by_id(id); - if (!group) { - uint16_t header = htobe16(SRTLA_TYPE_REG_NGP); - sendto(srtla_sock, &header, sizeof(header), 0, (const sockaddr *)addr, - addr_len); - spdlog::error("[{}:{}] Connection registration failed: No group found", - print_addr((struct sockaddr *)addr), - port_no((struct sockaddr *)addr)); - return -1; - } - - /* If the connection is already registered, we'll allow it to register - again to the same group, but not to a new one */ - srtla_conn_group_ptr tmp; - srtla_conn_ptr conn; - group_find_by_addr(addr, tmp, conn); - if (tmp && tmp != group) { - srtla_send_reg_err(addr); - spdlog::error("[{}:{}] [Group: {}] Connection registration failed: " - "Provided group ID mismatch", - print_addr((struct sockaddr *)addr), - port_no((struct sockaddr *)addr), - static_cast(group.get())); - return -1; - } - - /* If the connection is already registered to the group, we can - just skip ahead to sending the SRTLA_REG3 */ - bool already_registered = true; - if (!conn) { - if (group->conns.size() >= MAX_CONNS_PER_GROUP) { - srtla_send_reg_err(addr); - spdlog::error("[{}:{}] [Group: {}] Connection registration failed: Max " - "group conns reached", - print_addr((struct sockaddr *)addr), - port_no((struct sockaddr *)addr), - static_cast(group.get())); - return -1; - } - - conn = std::make_shared(*addr, ts); - already_registered = false; - } - - uint16_t header = htobe16(SRTLA_TYPE_REG3); - int ret = sendto(srtla_sock, &header, sizeof(header), 0, - (const sockaddr *)addr, addr_len); - if (ret != sizeof(header)) { - spdlog::error( - "[{}:{}] [Group: {}] Connection registration failed: Socket send error", - print_addr((struct sockaddr *)addr), port_no((struct sockaddr *)addr), - static_cast(group.get())); - return -1; - } - - if (!already_registered) - group->conns.push_back(conn); - - group->write_socket_info_file(); - - // If it all worked, mark this peer as the most recently active one - group->last_addr = *addr; - - spdlog::info("[{}:{}] [Group: {}] Connection registration", - print_addr((struct sockaddr *)addr), - port_no((struct sockaddr *)addr), - static_cast(group.get())); - return 0; -} - -/* -The main network event handlers -*/ -void handle_srt_data(srtla_conn_group_ptr g) { - char buf[MTU]; - - if (!g) - return; - - int n = recv(g->srt_sock, &buf, MTU, 0); - if (n < SRT_MIN_LEN) { - spdlog::error( - "[Group: {}] Failed to read the SRT sock, terminating the group", - static_cast(g.get())); - remove_group(g); - return; - } - - // ACK - if (is_srt_ack(buf, n)) { - // Broadcast SRT ACKs over all connections for timely delivery - for (auto &conn : g->conns) { - int ret = sendto(srtla_sock, &buf, n, 0, (struct sockaddr *)&conn->addr, - addr_len); - if (ret != n) - spdlog::error("[{}:{}] [Group: {}] Failed to send the SRT ack", - print_addr((struct sockaddr *)&conn->addr), - port_no((struct sockaddr *)&conn->addr), - static_cast(g.get())); - } - } else { - // send other packets over the most recently used SRTLA connection - int ret = sendto(srtla_sock, &buf, n, 0, (struct sockaddr *)&g->last_addr, - addr_len); - if (ret != n) { - spdlog::error("[{}:{}] [Group: {}] Failed to send the SRT packet", - print_addr((struct sockaddr *)&g->last_addr), - port_no((struct sockaddr *)&g->last_addr), - static_cast(g.get())); - } - } -} - -void register_packet(srtla_conn_group_ptr group, srtla_conn_ptr conn, - int32_t sn) { - // store the sequence numbers in BE, as they're transmitted over the network - conn->recv_log[conn->recv_idx++] = htobe32(sn); - - if (conn->recv_idx == RECV_ACK_INT) { - srtla_ack_pkt ack; - ack.type = htobe32(SRTLA_TYPE_ACK << 16); - std::memcpy(&ack.acks, conn->recv_log.begin(), - sizeof(uint32_t) * conn->recv_log.max_size()); - - int ret = sendto(srtla_sock, &ack, sizeof(ack), 0, - (struct sockaddr *)&conn->addr, addr_len); - if (ret != sizeof(ack)) { - spdlog::error("[{}:{}] [Group: {}] Failed to send the SRTLA ACK", - print_addr((struct sockaddr *)&conn->addr), - port_no((struct sockaddr *)&conn->addr), - static_cast(group.get())); - } - - conn->recv_idx = 0; - } -} - -void handle_srtla_data(time_t ts) { - char buf[MTU] = {}; - - // Get the packet - struct sockaddr_storage srtla_addr; - socklen_t len = addr_len; - int n = - recvfrom(srtla_sock, &buf, MTU, 0, (struct sockaddr *)&srtla_addr, &len); - if (n < 0) { - spdlog::error("Failed to read an srtla packet {}", strerror(errno)); - return; - } - - // Handle srtla registration packets - if (is_srtla_reg1(buf, n)) { - register_group(&srtla_addr, buf, ts); - return; - } - - if (is_srtla_reg2(buf, n)) { - conn_reg(&srtla_addr, buf, ts); - return; - } - - // Check that the peer is a member of a connection group, discard otherwise - srtla_conn_group_ptr g; - srtla_conn_ptr c; - group_find_by_addr(&srtla_addr, g, c); - if (!g || !c) - return; - - // Update the connection's use timestamp - c->last_rcvd = ts; - - // Resend SRTLA keep-alive packets to the sender - if (is_srtla_keepalive(buf, n)) { - int ret = sendto(srtla_sock, &buf, n, 0, (struct sockaddr *)&srtla_addr, - addr_len); - if (ret != n) { - spdlog::error("[{}:{}] [Group: {}] Failed to send SRTLA Keepalive", - print_addr((struct sockaddr *)&srtla_addr), - port_no((struct sockaddr *)&srtla_addr), - static_cast(g.get())); - } - return; - } - - // Check that the packet is large enough to be an SRT packet, discard - // otherwise - if (n < SRT_MIN_LEN) - return; - - // Record the most recently active peer - g->last_addr = srtla_addr; - - // Keep track of the received data packets to send SRTLA ACKs - int32_t sn = get_srt_sn(buf, n); - if (sn >= 0) { - register_packet(g, c, sn); - } - - // Open a connection to the SRT server for the group - if (g->srt_sock < 0) { - int sock = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); - if (sock < 0) { - spdlog::error("[Group: {}] Failed to create an SRT socket", - static_cast(g.get())); - remove_group(g); - return; - } - g->srt_sock = sock; - - // Set receive buffer size for g->srt_sock - int bufsize = RECV_BUF_SIZE; - int ret = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)); - if (ret != 0) { - spdlog::error("failed to set receive buffer size ({})", bufsize); - remove_group(g); - return; - } - - // Set send buffer size for g->srt_sock - int sndbufsize = SEND_BUF_SIZE; - ret = setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &sndbufsize, sizeof(sndbufsize)); - if (ret != 0) { - spdlog::error("failed to set send buffer size ({})", bufsize); - remove_group(g); - return; - } - - // Set g->srt_sock to non-blocking - int flags = fcntl(sock, F_GETFL, 0); - if (flags == -1 || fcntl(sock, F_SETFL, flags | O_NONBLOCK) == -1) { - spdlog::error("failed to set g->srt_sock non-blocking"); - remove_group(g); - return; - } - - // Connect using the appropriate address family - if (srt_addr.ss_family == AF_INET) { - ret = connect(sock, (struct sockaddr *)&srt_addr, - sizeof(struct sockaddr_in)); - } else if (srt_addr.ss_family == AF_INET6) { - ret = connect(sock, (struct sockaddr *)&srt_addr, - sizeof(struct sockaddr_in6)); - } else { - spdlog::error("[Group: {}] Invalid address family for SRT server", - static_cast(g.get())); - remove_group(g); - return; - } - - uint16_t local_port = get_sock_local_port(sock); - spdlog::info("[Group: {}] Created SRT socket. Local Port: {}", - static_cast(g.get()), local_port); - - ret = epoll_add(sock, EPOLLIN, g.get()); - if (ret != 0) { - spdlog::error("[Group: {}] Failed to add the SRT socket to the epoll", - static_cast(g.get())); - remove_group(g); - return; - } - - // Write file containing association between local port and client IPs - g->write_socket_info_file(); - } - - int ret = send(g->srt_sock, &buf, n, 0); - if (ret != n) { - spdlog::error( - "[Group: {}] Failed to forward SRTLA packet, terminating the group", - static_cast(g.get())); - remove_group(g); - } -} - -/* - Freeing resources - - Groups: - * new groups with no connection: created_at < (ts - G_TIMEOUT) - * other groups: when all connections have timed out - Connections: - * GC last_rcvd < (ts - CONN_TIMEOUT) -*/ -void cleanup_groups_connections(time_t ts) { - static time_t last_ran = 0; - if ((last_ran + CLEANUP_PERIOD) > ts) - return; - last_ran = ts; - - if (!conn_groups.size()) - return; - - spdlog::debug("Starting a cleanup run..."); - - int total_groups = conn_groups.size(); - int total_conns = 0; - int removed_groups = 0; - int removed_conns = 0; - - for (std::vector::iterator git = conn_groups.begin(); - git != conn_groups.end();) { - auto group = *git; - - size_t before_conns = group->conns.size(); - total_conns += before_conns; - for (std::vector::iterator cit = group->conns.begin(); - cit != group->conns.end();) { - auto conn = *cit; - - if ((conn->last_rcvd + CONN_TIMEOUT) < ts) { - cit = group->conns.erase(cit); - removed_conns++; - spdlog::info("[{}:{}] [Group: {}] Connection removed (timed out)", - print_addr((struct sockaddr *)&conn->addr), - port_no((struct sockaddr *)&conn->addr), - static_cast(group.get())); - } else { - cit++; - } - } - - if (!group->conns.size() && (group->created_at + GROUP_TIMEOUT) < ts) { - git = conn_groups.erase(git); - removed_groups++; - spdlog::info("[Group: {}] Group removed (no connections)", - static_cast(group.get())); - } else { - if (before_conns != group->conns.size()) - group->write_socket_info_file(); - git++; - } - } - - spdlog::debug("Clean up run ended. Counted {} groups and {} connections. " - "Removed {} groups and {} connections", - total_groups, total_conns, removed_groups, removed_conns); -} - -/* -SRT is connection-oriented and it won't reply to our packets at this point -unless we start a handshake, so we do that for each resolved address - -Returns: -1 when an error has been encountered - 0 when the address was resolved but SRT appears unreachable - 1 when the address was resolved and SRT appears reachable -*/ -int resolve_srt_addr(const char *host, const char *port) { - // Let's set up an SRT handshake induction packet - srt_handshake_t hs_packet = {0}; - hs_packet.header.type = htobe16(SRT_TYPE_HANDSHAKE); - hs_packet.version = htobe32(4); - hs_packet.ext_field = htobe16(2); - hs_packet.handshake_type = htobe32(1); - - struct addrinfo hints, *srt_addrs; - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_UNSPEC; // Allow IPv4 or IPv6 - hints.ai_socktype = SOCK_DGRAM; - int ret = getaddrinfo(host, port, &hints, &srt_addrs); - if (ret != 0) { - spdlog::error("Failed to resolve the address: {}:{}: {}", host, port, - gai_strerror(ret)); - return -1; - } - - int tmp_sock = socket(AF_INET, SOCK_DGRAM, 0); - if (tmp_sock < 0) { - spdlog::error("Failed to create a UDP socket"); - return -1; - } - - // Set receive buffer size for tmp_sock - int bufsize = RECV_BUF_SIZE; - ret = setsockopt(tmp_sock, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)); - if (ret != 0) { - spdlog::error("Failed to set a receive buffer size ({} bytes)", bufsize); - return -1; - } - - // Set send buffer size for tmp_sock - bufsize = SEND_BUF_SIZE; - ret = setsockopt(tmp_sock, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - if (ret != 0) { - spdlog::error("Failed to set a send buffer size ({} bytes)", bufsize); - return -1; - } - - int found = -1; - for (struct addrinfo *addr = srt_addrs; addr != NULL && found == -1; - addr = addr->ai_next) { - spdlog::info("Trying to connect to SRT at {}:{}...", - print_addr((struct sockaddr *)addr->ai_addr), port); - if (addr->ai_family == AF_INET) { - ret = connect(tmp_sock, addr->ai_addr, sizeof(struct sockaddr_in)); - } else if (addr->ai_family == AF_INET6) { - ret = connect(tmp_sock, addr->ai_addr, sizeof(struct sockaddr_in6)); - } else { - spdlog::warn("Unsupported address family, skipping"); - continue; - } - if (ret == 0) { - ret = send(tmp_sock, &hs_packet, sizeof(hs_packet), 0); - if (ret == sizeof(hs_packet)) { - char buf[MTU]; - ret = recv(tmp_sock, &buf, MTU, 0); - if (ret == sizeof(hs_packet)) { - spdlog::info("Success"); - // Copy the successful address to srt_addr - if (addr->ai_family == AF_INET) { - memcpy(&srt_addr, addr->ai_addr, sizeof(struct sockaddr_in)); - } else { - // AF_INET6 - memcpy(&srt_addr, addr->ai_addr, sizeof(struct sockaddr_in6)); - } - found = 1; - } - } // ret == sizeof(buf) - } // ret == 0 - - if (found == -1) { - spdlog::info("Error"); - } - } - close(tmp_sock); - - if (found == -1) { - // If no successful connection, default to the first address - if (srt_addrs->ai_family == AF_INET) { - memcpy(&srt_addr, srt_addrs->ai_addr, sizeof(struct sockaddr_in)); - } else if (srt_addrs->ai_family == AF_INET6) { - memcpy(&srt_addr, srt_addrs->ai_addr, sizeof(struct sockaddr_in6)); - } - spdlog::warn("Failed to confirm that a SRT server is reachable at any " - "address. Proceeding with the first address: {}", - print_addr((struct sockaddr *)&srt_addr)); - found = 0; - } - - freeaddrinfo(srt_addrs); - - return found; -} - -int main(int argc, char **argv) { - argparse::ArgumentParser args("srtla_rec", VERSION); - - args.add_argument("--srtla_port") - .help("Port to bind the SRTLA socket to") - .default_value((uint16_t)5000) - .scan<'d', uint16_t>(); - args.add_argument("--srt_hostname") - .help("Hostname of the downstream SRT server") - .default_value(std::string{"127.0.0.1"}); - args.add_argument("--srt_port") - .help("Port of the downstream SRT server") - .default_value((uint16_t)5001) - .scan<'d', uint16_t>(); - args.add_argument("--verbose") - .help("Enable verbose logging") - .default_value(false) - .implicit_value(true); - - try { - args.parse_args(argc, argv); - } catch (const std::runtime_error &err) { - std::cerr << err.what() << std::endl; - std::cerr << args; - std::exit(1); - } - - uint16_t srtla_port = args.get("--srtla_port"); - std::string srt_hostname = args.get("--srt_hostname"); - std::string srt_port = std::to_string(args.get("--srt_port")); - - if (args.get("--verbose")) - spdlog::set_level(spdlog::level::debug); - - // Try to detect if the SRT server is reachable. - int ret = resolve_srt_addr(srt_hostname.c_str(), srt_port.c_str()); - if (ret < 0) { - exit(EXIT_FAILURE); - } - - // We use epoll for event-driven network I/O - socket_epoll = epoll_create(1000); // the number is ignored since Linux 2.6.8 - if (socket_epoll < 0) { - spdlog::critical("epoll creation failed"); - exit(EXIT_FAILURE); - } - - // Set up the listener socket for incoming SRT connections - srtla_sock = socket(AF_INET6, SOCK_DGRAM, 0); - if (srtla_sock < 0) { - spdlog::critical("SRTLA socket creation failed"); - exit(EXIT_FAILURE); - } - - // Disable IPV6_V6ONLY - int v6only = 0; - ret = setsockopt(srtla_sock, IPPROTO_IPV6, IPV6_V6ONLY, &v6only, - sizeof(v6only)); - if (ret < 0) { - spdlog::critical("Failed to set IPV6_V6ONLY option"); - exit(EXIT_FAILURE); - } - - // Set receive buffer size for srtla_sock - int bufsize = RECV_BUF_SIZE; - ret = setsockopt(srtla_sock, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)); - if (ret != 0) { - spdlog::error("failed to set receive buffer size ({})", bufsize); - exit(EXIT_FAILURE); - } - - // Set send buffer size for srtla_sock - bufsize = SEND_BUF_SIZE; - ret = setsockopt(srtla_sock, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - if (ret != 0) { - spdlog::error("failed to set send buffer size ({})", bufsize); - exit(EXIT_FAILURE); - } - - // Set srtla_sock to non-blocking - int flags = fcntl(srtla_sock, F_GETFL, 0); - if (flags == -1 || fcntl(srtla_sock, F_SETFL, flags | O_NONBLOCK) == -1) { - spdlog::error("failed to set srtla_sock non-blocking"); - exit(EXIT_FAILURE); - } - - struct sockaddr_in6 listen_addr = {}; - listen_addr.sin6_family = AF_INET6; - listen_addr.sin6_addr = in6addr_any; - // Use the original srtla_port - listen_addr.sin6_port = htons(srtla_port); - ret = bind(srtla_sock, (const struct sockaddr *)&listen_addr, - sizeof(listen_addr)); - if (ret < 0) { - spdlog::critical("SRTLA socket bind failed"); - exit(EXIT_FAILURE); - } - - ret = epoll_add(srtla_sock, EPOLLIN, NULL); - if (ret != 0) { - spdlog::critical("Failed to add the SRTLA sock to the epoll"); - exit(EXIT_FAILURE); - } - - spdlog::info("srtla_rec is now running"); - - while (true) { - struct epoll_event events[MAX_EPOLL_EVENTS]; - int eventcnt = epoll_wait(socket_epoll, events, MAX_EPOLL_EVENTS, 1000); - - time_t ts = 0; - int ret = get_seconds(&ts); - if (ret != 0) - spdlog::error("Failed to get the current time"); - - size_t group_cnt; - for (int i = 0; i < eventcnt; i++) { - group_cnt = conn_groups.size(); - if (events[i].data.ptr == NULL) { - handle_srtla_data(ts); - } else { - auto g = static_cast(events[i].data.ptr); - handle_srt_data(group_find_by_id(g->id.data())); - } - - /* If we've removed a group due to a socket error, then we might have - pending events already waiting for us in events[], and now pointing - to freed() memory. Get an updated list from epoll_wait() */ - if (conn_groups.size() < group_cnt) - break; - } // for - - cleanup_groups_connections(ts); - } -} diff --git a/src/receiver.h b/src/receiver.h deleted file mode 100644 index 524281e..0000000 --- a/src/receiver.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - srtla_rec - SRT transport proxy with link aggregation, forked by IRLToolkit and IRLServer - Copyright (C) 2020-2021 BELABOX project - Copyright (C) 2024 IRLToolkit Inc. - Copyright (C) 2025 IRLServer.com - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -#pragma once - -#include - -#include - -extern "C" { -#include "common.h" -} - -#define MAX_CONNS_PER_GROUP 16 -#define MAX_GROUPS 200 - -#define CLEANUP_PERIOD 3 -#define GROUP_TIMEOUT 10 -#define CONN_TIMEOUT 10 - -#define RECV_ACK_INT 10 - -#define SRT_SOCKET_INFO_PREFIX "/tmp/srtla-group-" - -struct srtla_conn { - struct sockaddr_storage addr; - time_t last_rcvd = 0; - int recv_idx = 0; - std::array recv_log; - - srtla_conn(struct sockaddr_storage &_addr, time_t ts); -}; -typedef std::shared_ptr srtla_conn_ptr; - -struct srtla_conn_group { - std::array id; - std::vector conns; - time_t created_at = 0; - int srt_sock = -1; - struct sockaddr_storage last_addr = {}; - - srtla_conn_group(char *client_id, time_t ts); - ~srtla_conn_group(); - - std::vector get_client_addresses(); - void write_socket_info_file(); - void remove_socket_info_file(); -}; -typedef std::shared_ptr srtla_conn_group_ptr; - -struct srtla_ack_pkt { - uint32_t type; - uint32_t acks[RECV_ACK_INT]; -}; diff --git a/src/receiver_config.h b/src/receiver_config.h new file mode 100644 index 0000000..cd5d199 --- /dev/null +++ b/src/receiver_config.h @@ -0,0 +1,137 @@ +#pragma once + +#include +#include + +// ============================================================================ +// COMPARISON MODE: Connection Info Algorithm Comparison +// ============================================================================ +// When enabled (1): Run BOTH algorithms simultaneously on same data and log +// the differences for real-time comparison +// When disabled (0): Run only the connection info algorithm (production mode) +// ============================================================================ +#ifndef ENABLE_ALGO_COMPARISON +#define ENABLE_ALGO_COMPARISON 1 +#endif + +namespace srtla { +inline constexpr int MAX_CONNS_PER_GROUP = 16; +inline constexpr int MAX_GROUPS = 200; + +inline constexpr int CLEANUP_PERIOD = 3; +inline constexpr int GROUP_TIMEOUT = 4; +inline constexpr int CONN_TIMEOUT = 4; + +inline constexpr int KEEPALIVE_PERIOD = 1; +inline constexpr int RECOVERY_CHANCE_PERIOD = 5; + +inline constexpr int CONN_QUALITY_EVAL_PERIOD = 5; +inline constexpr int ACK_THROTTLE_INTERVAL = 100; // milliseconds +inline constexpr double MIN_ACK_RATE = 0.2; +inline constexpr double MIN_ACCEPTABLE_TOTAL_BANDWIDTH_KBPS = 1000.0; +inline constexpr int MAX_ERROR_POINTS = 40; +inline constexpr double GOOD_CONNECTION_THRESHOLD = 0.5; +inline constexpr int CONNECTION_GRACE_PERIOD = 10; + +inline constexpr int WEIGHT_FULL = 100; +inline constexpr int WEIGHT_EXCELLENT = 85; +inline constexpr int WEIGHT_DEGRADED = 70; +inline constexpr int WEIGHT_FAIR = 55; +inline constexpr int WEIGHT_POOR = 40; +inline constexpr int WEIGHT_CRITICAL = 10; + +// RTT-based quality assessment thresholds (milliseconds) +inline constexpr uint32_t RTT_THRESHOLD_CRITICAL = 500; // 500ms +inline constexpr uint32_t RTT_THRESHOLD_HIGH = 200; // 200ms +inline constexpr uint32_t RTT_THRESHOLD_MODERATE = 100; // 100ms +inline constexpr uint32_t RTT_VARIANCE_THRESHOLD = 50; // 50ms stddev +inline constexpr int KEEPALIVE_STALENESS_THRESHOLD = 2; // seconds +inline constexpr std::size_t RTT_HISTORY_SIZE = 5; + +// NAK rate thresholds +inline constexpr double NAK_RATE_CRITICAL = 0.20; // 20% +inline constexpr double NAK_RATE_HIGH = 0.10; // 10% +inline constexpr double NAK_RATE_MODERATE = 0.05; // 5% +inline constexpr double NAK_RATE_LOW = 0.01; // 1% + +// Window utilization thresholds +inline constexpr double WINDOW_UTILIZATION_CONGESTED = 0.95; +inline constexpr double WINDOW_UTILIZATION_LOW = 0.30; + +// Bitrate comparison tolerance +inline constexpr double BITRATE_DISCREPANCY_THRESHOLD = 0.20; // 20% + +inline constexpr std::size_t RECV_ACK_INT = 10; +inline constexpr const char *SRT_SOCKET_INFO_PREFIX = "/tmp/srtla-group-"; + +struct srtla_ack_pkt { + uint32_t type; + uint32_t acks[RECV_ACK_INT]; +}; + +struct ConnectionStats { + // Receiver-side metrics (always available) + uint64_t bytes_received = 0; + uint64_t packets_received = 0; + uint32_t packets_lost = 0; + uint64_t last_eval_time = 0; + uint64_t last_bytes_received = 0; + uint64_t last_packets_received = 0; + uint32_t last_packets_lost = 0; + uint32_t error_points = 0; + uint8_t weight_percent = WEIGHT_FULL; + uint64_t last_ack_sent_time = 0; + double ack_throttle_factor = 1.0; + uint16_t nack_count = 0; + + // Sender-side telemetry from keepalive packets (when available) + // These are populated when the sender includes connection_info_t in keepalives. + // When not available, the quality algorithm falls back to receiver-only metrics. + uint32_t rtt_ms = 0; + uint32_t rtt_history[RTT_HISTORY_SIZE] = {0}; + uint8_t rtt_history_idx = 0; + time_t last_keepalive = 0; // Timestamp of last keepalive with valid sender telemetry + + int32_t window = 0; + int32_t in_flight = 0; + + uint32_t sender_nak_count = 0; + uint32_t last_sender_nak_count = 0; + + uint32_t sender_bitrate_bps = 0; + + // Sender capability detection + // Once set to true, remains true for the lifetime of the connection. + // This allows us to distinguish senders with extended keepalive support + // from legacy senders, even when the connection is actively transmitting + // (and thus not sending keepalives). + bool sender_supports_extended_keepalives = false; + + // Legacy algorithm parallel tracking (for comparison mode only) + uint32_t legacy_error_points = 0; + uint8_t legacy_weight_percent = WEIGHT_FULL; + double legacy_ack_throttle_factor = 1.0; + + // Returns true if we have recent, valid sender telemetry to use for quality evaluation. + // When false, the algorithm falls back to receiver-only metrics (bandwidth + packet loss). + bool has_valid_sender_telemetry(time_t current_time) const { + // Must have received at least one keepalive with connection info + if (last_keepalive == 0) { + return false; + } + // Telemetry must be recent (within staleness threshold) + if ((current_time - last_keepalive) > KEEPALIVE_STALENESS_THRESHOLD) { + return false; + } + // Must have meaningful data (at least RTT or window info) + return (rtt_ms > 0 || window > 0); + } + + // Returns true if the sender supports extended keepalives (capability detection). + // Unlike has_valid_sender_telemetry(), this persists even when connection is active. + bool supports_extended_keepalives() const { + return sender_supports_extended_keepalives; + } +}; + +} // namespace srtla diff --git a/src/receiver_main.cpp b/src/receiver_main.cpp new file mode 100644 index 0000000..8499452 --- /dev/null +++ b/src/receiver_main.cpp @@ -0,0 +1,212 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "connection/connection_registry.h" +#include "protocol/srt_handler.h" +#include "protocol/srtla_handler.h" +#include "quality/load_balancer.h" +#include "quality/metrics_collector.h" +#include "quality/quality_evaluator.h" +#include "receiver_config.h" +#include "utils/network_utils.h" + +extern "C" { +#include "common.h" +} + +namespace { + +constexpr int MAX_EPOLL_EVENTS = 10; + +void set_socket_buffers(int socket_fd) { + int bufsize = RECV_BUF_SIZE; + if (setsockopt(socket_fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)) != + 0) { + spdlog::error("failed to set receive buffer size ({})", bufsize); + throw std::runtime_error("Failed to set receive buffer size"); + } + + bufsize = SEND_BUF_SIZE; + if (setsockopt(socket_fd, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)) != + 0) { + spdlog::error("failed to set send buffer size ({})", bufsize); + throw std::runtime_error("Failed to set send buffer size"); + } +} + +} // namespace + +int main(int argc, char **argv) { + argparse::ArgumentParser args("srtla_rec", VERSION); + args.add_argument("--srtla_port") + .help("Port to bind the SRTLA socket to") + .default_value(static_cast(5000)) + .scan<'d', uint16_t>(); + args.add_argument("--srt_hostname") + .help("Hostname of the downstream SRT server") + .default_value(std::string{"127.0.0.1"}); + args.add_argument("--srt_port") + .help("Port of the downstream SRT server") + .default_value(static_cast(4001)) + .scan<'d', uint16_t>(); + args.add_argument("--log_level") + .help("Set logging level (trace, debug, info, warn, error, critical)") + .default_value(std::string{"info"}); + + try { + args.parse_args(argc, argv); + } catch (const std::runtime_error &err) { + std::cerr << err.what() << std::endl; + std::cerr << args; + std::exit(1); + } + + const uint16_t srtla_port = args.get("--srtla_port"); + const std::string srt_hostname = args.get("--srt_hostname"); + const std::string srt_port = std::to_string(args.get("--srt_port")); + const std::string log_level = args.get("--log_level"); + + if (log_level == "trace") { + spdlog::set_level(spdlog::level::trace); + } else if (log_level == "debug") { + spdlog::set_level(spdlog::level::debug); + } else if (log_level == "info") { + spdlog::set_level(spdlog::level::info); + } else if (log_level == "warn") { + spdlog::set_level(spdlog::level::warn); + } else if (log_level == "error") { + spdlog::set_level(spdlog::level::err); + } else if (log_level == "critical") { + spdlog::set_level(spdlog::level::critical); + } else { + spdlog::warn("Invalid log level '{}' specified, using 'info' as default", + log_level); + spdlog::set_level(spdlog::level::info); + } + + struct sockaddr_storage srt_addr {}; + int resolve_result = srtla::utils::NetworkUtils::resolve_srt_address( + srt_hostname.c_str(), srt_port.c_str(), &srt_addr, RECV_BUF_SIZE, + SEND_BUF_SIZE); + if (resolve_result < 0) { + return EXIT_FAILURE; + } + + int epoll_fd = epoll_create1(0); + if (epoll_fd < 0) { + spdlog::critical("epoll creation failed"); + return EXIT_FAILURE; + } + + int srtla_sock = socket(AF_INET6, SOCK_DGRAM, 0); + if (srtla_sock < 0) { + spdlog::critical("SRTLA socket creation failed"); + return EXIT_FAILURE; + } + + int v6only = 0; + if (setsockopt(srtla_sock, IPPROTO_IPV6, IPV6_V6ONLY, &v6only, + sizeof(v6only)) < 0) { + spdlog::critical("Failed to set IPV6_V6ONLY option"); + return EXIT_FAILURE; + } + + try { + set_socket_buffers(srtla_sock); + } catch (const std::exception &) { + return EXIT_FAILURE; + } + + int flags = fcntl(srtla_sock, F_GETFL, 0); + if (flags == -1 || fcntl(srtla_sock, F_SETFL, flags | O_NONBLOCK) == -1) { + spdlog::error("failed to set srtla_sock non-blocking"); + return EXIT_FAILURE; + } + + struct sockaddr_in6 listen_addr {}; + listen_addr.sin6_family = AF_INET6; + listen_addr.sin6_addr = in6addr_any; + listen_addr.sin6_port = htons(srtla_port); + if (bind(srtla_sock, reinterpret_cast(&listen_addr), + sizeof(listen_addr)) < 0) { + spdlog::critical("SRTLA socket bind failed"); + return EXIT_FAILURE; + } + + if (srtla::utils::NetworkUtils::epoll_add(epoll_fd, srtla_sock, EPOLLIN, + nullptr) != 0) { + spdlog::critical("Failed to add the SRTLA sock to the epoll"); + return EXIT_FAILURE; + } + + srtla::connection::ConnectionRegistry registry; + srtla::quality::MetricsCollector metrics_collector; + srtla::protocol::SRTHandler srt_handler(srtla_sock, srt_addr, epoll_fd, + registry); + srtla::protocol::SRTLAHandler srtla_handler(srtla_sock, registry, srt_handler, + metrics_collector); + srtla::quality::QualityEvaluator quality_evaluator; + srtla::quality::LoadBalancer load_balancer; + + spdlog::info("srtla_rec is now running"); + + const auto keepalive_callback = + [&srtla_handler](const srtla::connection::ConnectionPtr &conn, + time_t ts) { srtla_handler.send_keepalive(conn, ts); }; + + while (true) { + struct epoll_event events[MAX_EPOLL_EVENTS]; + int eventcnt = epoll_wait(epoll_fd, events, MAX_EPOLL_EVENTS, 1000); + + time_t ts = 0; + if (get_seconds(&ts) != 0) { + spdlog::error("Failed to get the current time"); + continue; + } + + std::size_t group_cnt; + for (int i = 0; i < eventcnt; i++) { + // Snapshot the current group count before processing. Both + // srtla_handler.process_packet() and srt_handler.handle_srt_data() may + // remove ConnectionGroup instances via registry operations (e.g., + // registry.find_group_by_id() returning nullptr after removal). If the + // group count shrinks, events[i].data.ptr pointers from subsequent + // iterations may reference freed memory. We detect this by comparing + // registry.groups().size() with group_cnt and break early to avoid + // iterator/pointer invalidation. + group_cnt = registry.groups().size(); + if (events[i].data.ptr == nullptr) { + srtla_handler.process_packet(ts); + } else { + auto raw_group = static_cast( + events[i].data.ptr); + auto shared_group = registry.find_group_by_id(raw_group->id().data()); + if (shared_group) { + srt_handler.handle_srt_data(shared_group); + } + } + + if (registry.groups().size() < group_cnt) { + break; + } + } + + registry.cleanup_inactive(ts, keepalive_callback); + for (auto &group : registry.groups()) { + quality_evaluator.evaluate_group(group, ts); + load_balancer.adjust_weights(group, ts); + } + } + + return 0; +} diff --git a/src/sender.cpp b/src/sender.cpp index 41cce58..aa11b57 100644 --- a/src/sender.cpp +++ b/src/sender.cpp @@ -1,829 +1,838 @@ -/* - srtla - SRT transport proxy with link aggregation, forked by IRLServer - Copyright (C) 2020-2021 BELABOX project - Copyright (C) 2025 IRLServer.com - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "sender.h" - -#define PKT_LOG_SZ 256 -#define CONN_TIMEOUT 4 -#define REG2_TIMEOUT 4 -#define REG3_TIMEOUT 4 -#define GLOBAL_TIMEOUT 10 -#define IDLE_TIME 1 - -#define min(a, b) ((a < b) ? a : b) -#define max(a, b) ((a > b) ? a : b) -#define min_max(a, l, h) (max(min((a), (h)), (l))) - -#define WINDOW_MIN 1 -#define WINDOW_DEF 20 -#define WINDOW_MAX 60 -#define WINDOW_MULT 1000 -#define WINDOW_DECR 100 -#define WINDOW_INCR 30 - -#define LOG_PKT_INT 20 - -typedef struct conn { - struct conn *next; - int fd; - time_t last_rcvd; - time_t last_sent; - struct sockaddr src; - int removed; - int in_flight_pkts; - int window; - int pkt_idx; - int pkt_log[PKT_LOG_SZ]; -} conn_t; - -char *source_ip_file = NULL; - -int do_update_conns = 0; - -struct addrinfo *addrs; - -struct sockaddr srtla_addr, srt_addr; -const socklen_t addr_len = sizeof(srtla_addr); -conn_t *conns = NULL; -int listenfd; -int active_connections = 0; -int has_connected = 0; - -conn_t *pending_reg2_conn = NULL; -time_t pending_reg_timeout = 0; - -char srtla_id[SRTLA_ID_LEN]; - -/* - -Async I/O support - -*/ -fd_set active_fds; -int max_act_fd = -1; - -int add_active_fd(int fd) { - if (fd < 0) - return -1; - - if (fd > max_act_fd) - max_act_fd = fd; - FD_SET(fd, &active_fds); - - return 0; -} - -int remove_active_fd(int fd) { - if (fd < 0) - return -1; - - FD_CLR(fd, &active_fds); - - return 0; -} - -/* - -srtla registration helpers - -*/ -int send_reg1(conn_t *c) { - if (c->fd < 0) - return -1; - - char buf[MTU]; - uint16_t packet_type = htobe16(SRTLA_TYPE_REG1); - memcpy(buf, &packet_type, sizeof(packet_type)); - memcpy(buf + sizeof(packet_type), srtla_id, SRTLA_ID_LEN); - - int ret = sendto(c->fd, buf, SRTLA_TYPE_REG1_LEN, 0, &srtla_addr, addr_len); - if (ret != SRTLA_TYPE_REG1_LEN) - return -1; - - return 0; -} - -int send_reg2(conn_t *c) { - if (c->fd < 0) - return -1; - - char buf[SRTLA_TYPE_REG2_LEN]; - uint16_t packet_type = htobe16(SRTLA_TYPE_REG2); - memcpy(buf, &packet_type, sizeof(packet_type)); - memcpy(buf + sizeof(packet_type), srtla_id, SRTLA_ID_LEN); - - int ret = sendto(c->fd, buf, SRTLA_TYPE_REG2_LEN, 0, &srtla_addr, addr_len); - return (ret == SRTLA_TYPE_REG2_LEN) ? 0 : -1; -} - -/* - -Handling code for packets coming from the SRT caller - -*/ -void reg_pkt(conn_t *c, int32_t packet) { - spdlog::debug("{} ({}) register packet {} at idx {}", print_addr(&c->src), - fmt::ptr(c), packet, c->pkt_idx); - c->pkt_log[c->pkt_idx] = packet; - c->pkt_idx++; - c->pkt_idx %= PKT_LOG_SZ; - - c->in_flight_pkts++; -} - -int conn_timed_out(conn_t *c, time_t ts) { - return (c->last_rcvd + CONN_TIMEOUT) < ts; -} - -conn_t *select_conn() { - conn_t *min_c = NULL; - int max_score = -1; - int max_window = 0; - - for (conn_t *c = conns; c != NULL; c = c->next) { - if (c->window > max_window) { - max_window = c->window; - } - } - - time_t t; - assert(get_seconds(&t) == 0); - - for (conn_t *c = conns; c != NULL; c = c->next) { - /* If we have some very slow links, we may be better off ignoring them - However, we'd probably need to periodically re-probe them, otherwise - a link disabled due to a momentary glitch might not ever get enabled - again unless all the remaining links suffered from high packet loss - at some point. */ - /*if (c->window < max_window / 5) { - c->window++; - continue; - }*/ - - if (conn_timed_out(c, t)) { - spdlog::debug("{} ({}): is timed out, ignoring it", print_addr(&c->src), - fmt::ptr(c)); - continue; - } - - int score = c->window / (c->in_flight_pkts + 1); - if (score > max_score) { - min_c = c; - max_score = score; - } - } - - if (min_c) { - min_c->last_sent = t; - } - - return min_c; -} - -void handle_srt_data(int fd) { - char buf[MTU]; - socklen_t len = sizeof(srt_addr); - int n = recvfrom(fd, &buf, MTU, 0, &srt_addr, &len); - - conn_t *c = select_conn(); - if (c) { - int32_t sn = get_srt_sn(buf, n); - int ret = sendto(c->fd, &buf, n, 0, &srtla_addr, addr_len); - if (ret == n) { - if (sn >= 0) { - reg_pkt(c, sn); - } - } else { - /* If sending the packet fails, adjust the timestamp to disable the link - until a reconnection is confirmed. 1 so connection_housekeeping() - prints its message */ - c->last_rcvd = 1; - spdlog::error("{} ({}): sendto() failed, disabling the connection", - print_addr(&c->src), fmt::ptr(c)); - } - } -} - -/* - -Handling code for packets coming from the receiver - -*/ -int get_pkt_idx(int idx, int increment) { - idx = idx + increment; - if (idx < 0) - idx += PKT_LOG_SZ; - idx %= PKT_LOG_SZ; - assert(idx >= 0 && idx < PKT_LOG_SZ); - return idx; -} - -void register_nak(int32_t packet) { - for (conn_t *c = conns; c != NULL; c = c->next) { - int idx = get_pkt_idx(c->pkt_idx, -1); - for (int i = idx; i != c->pkt_idx; i = get_pkt_idx(i, -1)) { - if (c->pkt_log[i] == packet) { - c->pkt_log[i] = -1; - // It might be better to use exponential decay like this - // c->window = c->window * 998 / 1000; - c->window -= WINDOW_DECR; - c->window = max(c->window, WINDOW_MIN * WINDOW_MULT); - spdlog::debug("{} ({}): found NAKed packet {} in the log", - print_addr(&c->src), fmt::ptr(c), packet); - return; - } - } - } - - spdlog::debug("Didn't find NAKed packet {} in our logs", packet); -} - -void register_srtla_ack(int32_t ack) { - int found = 0; - - for (conn_t *c = conns; c != NULL; c = c->next) { - int idx = get_pkt_idx(c->pkt_idx, -1); - for (int i = idx; i != c->pkt_idx && !found; i = get_pkt_idx(i, -1)) { - if (c->pkt_log[i] == ack) { - found = 1; - if (c->in_flight_pkts > 0) { - c->in_flight_pkts--; - } - c->pkt_log[i] = -1; - - if (c->in_flight_pkts * WINDOW_MULT > c->window) { - c->window += WINDOW_INCR - 1; - } - - break; - } - } - - if (c->last_rcvd != 0) { - c->window += 1; - c->window = min(c->window, WINDOW_MAX * WINDOW_MULT); - } - } -} - -/* - TODO after the sequence number overflows, we should probably also mark high - sn packets as received. However, this shouldn't normally be an issue as SRTLA - ACKs acknowledge each packet individually. Also, if the SRTLA ACK is lost, - stale entries will be overwritten soon enough as pkt_log is a circular buffer -*/ -void conn_register_srt_ack(conn_t *c, int32_t ack) { - int count = 0; - int idx = get_pkt_idx(c->pkt_idx, -1); - for (int i = idx; i != c->pkt_idx; i = get_pkt_idx(i, -1)) { - if (c->pkt_log[i] < ack) { - c->pkt_log[i] = -1; - } else { - count++; - } - } - c->in_flight_pkts = count; -} - -void register_srt_ack(int32_t ack) { - for (conn_t *c = conns; c != NULL; c = c->next) { - conn_register_srt_ack(c, ack); - } -} - -void handle_srtla_data(conn_t *c) { - char buf[MTU]; - - int n = recvfrom(c->fd, &buf, MTU, 0, NULL, NULL); - if (n <= 0) - return; - - time_t ts; - get_seconds(&ts); - - uint16_t packet_type = get_srt_type(buf, n); - - /* Handling NGPs separately because we don't want them to update last_rcvd - Otherwise they could be keeping failed connections marked active */ - if (packet_type == SRTLA_TYPE_REG_NGP) { - /* Only process NGPs if: - * we don't have any established connections - * and we don't already have a pending REG1->REG2 exhange in flight - * and we don't have any pending REG2->REG3 exchanges in flight - */ - if (active_connections == 0 && pending_reg2_conn == NULL && - ts > pending_reg_timeout) { - if (send_reg1(c) == 0) { - pending_reg2_conn = c; - pending_reg_timeout = ts + REG2_TIMEOUT; - } - } - return; - - } else if (packet_type == SRTLA_TYPE_REG2) { - if (pending_reg2_conn == c) { - char *id = &buf[2]; - if (memcmp(id, srtla_id, SRTLA_ID_LEN / 2) != 0) { - spdlog::error("{} ({}): got a mismatching ID in SRTLA_REG2", - print_addr(&c->src), fmt::ptr(c)); - return; - } - - spdlog::info("{} ({}): connection group registered", print_addr(&c->src), - fmt::ptr(c)); - memcpy(srtla_id, id, SRTLA_ID_LEN); - - /* Broadcast REG2 */ - for (conn_t *i = conns; i != NULL; i = i->next) { - send_reg2(i); - } - - pending_reg2_conn = NULL; - pending_reg_timeout = ts + REG3_TIMEOUT; - } - return; - } - - c->last_rcvd = ts; - - switch (packet_type) { - case SRT_TYPE_ACK: { - uint32_t last_ack = *((uint32_t *)&buf[16]); - last_ack = be32toh(last_ack); - register_srt_ack(last_ack); - break; - } - - case SRT_TYPE_NAK: { - uint32_t *ids = (uint32_t *)buf; - for (int i = 4; i < n / 4; i++) { - uint32_t id = be32toh(ids[i]); - if (id & (1 << 31)) { - id = id & 0x7FFFFFFF; - uint32_t last_id = be32toh(ids[i + 1]); - for (int32_t lost = id; lost <= last_id; lost++) { - register_nak(lost); - } - i++; - } else { - register_nak(id); - } - } - break; - } - - // srtla packets below, don't send to SRT - case SRTLA_TYPE_ACK: { - uint32_t *acks = (uint32_t *)buf; - for (int i = 1; i < n / 4; i++) { - uint32_t id = be32toh(acks[i]); - spdlog::debug("{} ({}): ack {}\n", print_addr(&c->src), fmt::ptr(c), id); - register_srtla_ack(id); - } - return; - } - case SRTLA_TYPE_KEEPALIVE: - spdlog::debug("{} ({}): got a keepalive", print_addr(&c->src), fmt::ptr(c)); - return; // don't send to SRT - - case SRTLA_TYPE_REG3: - has_connected = 1; - active_connections++; - spdlog::info("{} ({}): connection established", print_addr(&c->src), - fmt::ptr(c)); - return; - } // switch - - sendto(listenfd, &buf, n, 0, &srt_addr, addr_len); -} - -/* - -Connection and socket management - -*/ -conn_t *conn_find_by_src(struct sockaddr *src) { - for (conn_t *c = conns; c != NULL; c = c->next) { - if (memcmp(src, &c->src, sizeof(*src)) == 0) { - return c; - } - } - - return NULL; -} - -int setup_conns(char *source_ip_file) { - FILE *config = fopen(source_ip_file, "r"); - if (config == NULL) { - spdlog::critical("Failed to open the source ip file {}", source_ip_file); - exit(EXIT_FAILURE); - } - - int count = 0; - char *line = NULL; - size_t line_len = 0; - while (getline(&line, &line_len, config) >= 0) { - char *nl; - if ((nl = strchr(line, '\n'))) { - *nl = '\0'; - } - - struct sockaddr src; - - int ret = parse_ip((struct sockaddr_in *)&src, line); - if (ret == 0) { - conn_t *c = conn_find_by_src(&src); - if (c == NULL) { - conn_t *c = static_cast(calloc(1, sizeof(conn_t))); - assert(c != NULL); - - c->src = src; - c->fd = -1; - c->window = WINDOW_DEF * WINDOW_MULT; - - c->next = conns; - conns = c; - - count++; - - spdlog::info("Added connection via {} ({})", print_addr(&c->src), - fmt::ptr(c)); - } else { - c->removed = 0; - } - } - } - if (line) - free(line); - - fclose(config); - - return count; -} - -void update_conns(char *source_ip_file) { - for (conn_t *c = conns; c != NULL; c = c->next) { - c->removed = 1; - } - - setup_conns(source_ip_file); - - conn_t **prev = &conns; - conn_t *next; - for (conn_t *c = conns; c != NULL; c = next) { - next = c->next; - if (c->removed) { - spdlog::info("Removed connection via {} ({})", print_addr(&c->src), - fmt::ptr(c)); - - if (c == pending_reg2_conn) { - pending_reg2_conn = NULL; - } - - remove_active_fd(c->fd); - close(c->fd); - *prev = c->next; - free(c); - } else { - prev = &c->next; - } - } -} - -void schedule_update_conns(int signal) { do_update_conns = 1; } - -int open_socket(conn_t *c, int quiet) { - if (c->fd >= 0) { - remove_active_fd(c->fd); - close(c->fd); - c->fd = -1; - } - - // Set up the socket - int fd = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); - if (fd < 0) { - spdlog::error("Failed to open a socket"); - return -1; - } - int bufsize = SEND_BUF_SIZE; - int ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); - if (ret != 0) { - spdlog::error("Failed to set send buffer size ({} bytes)", bufsize); - goto err; - } - - // Bind it to the source address - ret = bind(fd, &c->src, sizeof(c->src)); - if (ret != 0) { - if (!quiet) { - spdlog::error("Failed to bind to the source address {}", - print_addr(&c->src)); - } - goto err; - } - - add_active_fd(fd); - c->fd = fd; - - return 0; - -err: - close(fd); - return -1; -} - -int open_conns(const char *host, const char *port) { - // Check that we can actually open & bind at least one socket - int opened = 0; - for (conn_t *c = conns; c != NULL; c = c->next) { - if (open_socket(c, 0) == 0) { - opened++; - } - } - return opened; -} - -/* - -Connection housekeeping - -*/ -void set_srtla_addr(struct addrinfo *addr) { - memcpy(&srtla_addr, addr->ai_addr, addr->ai_addrlen); - spdlog::info("Trying to connect to {}...", print_addr(&srtla_addr)); -} - -void send_keepalive(conn_t *c) { - spdlog::debug("{} ({}): sending keepalive", print_addr(&c->src), fmt::ptr(c)); - uint16_t pkt = htobe16(SRTLA_TYPE_KEEPALIVE); - // ignoring the result on purpose - sendto(c->fd, &pkt, sizeof(pkt), 0, &srtla_addr, addr_len); -} - -#define HOUSEKEEPING_INT 1000 // ms -void connection_housekeeping() { - static uint64_t all_failed_at = 0; - /* We use milliseconds here because with a seconds timer we may be - resending a second REG2 very soon after the first one, depending - on when the first execution happens within the seconds interval */ - static uint64_t last_ran = 0; - uint64_t ms; - assert(get_ms(&ms) == 0); - if ((last_ran + HOUSEKEEPING_INT) > ms) - return; - - time_t time = (time_t)(ms / 1000); - - active_connections = 0; - - if (pending_reg2_conn && time > pending_reg_timeout) { - pending_reg2_conn = NULL; - } - - for (conn_t *c = conns; c != NULL; c = c->next) { - if (c->fd < 0) { - open_socket(c, 1); - continue; - } - - if (conn_timed_out(c, time)) { - /* When we first detect the connection having failed, - we reset its status and print a message */ - if (c->last_rcvd > 0) { - spdlog::info("{} ({}): connection failed, attempting to reconnect", - print_addr(&c->src), fmt::ptr(c)); - c->last_rcvd = 0; - c->last_sent = 0; - c->window = WINDOW_MIN * WINDOW_MULT; - c->in_flight_pkts = 0; - for (int i = 0; i < PKT_LOG_SZ; i++) { - c->pkt_log[i] = -1; - } - } - - if (pending_reg2_conn == NULL) { - /* As the connection has timed out on our end, the receiver might have - garbage collected it. Try to re-establish it rather than send a - keepalive */ - send_reg2(c); - } else if (pending_reg2_conn == c) { - send_reg1(c); - } - continue; - } - - /* If a connection has received data in the last CONN_TIMEOUT seconds, - then it's active */ - active_connections++; - - if ((c->last_sent + IDLE_TIME) < time) { - send_keepalive(c); - } - } - - if (active_connections == 0) { - if (all_failed_at == 0) { - all_failed_at = ms; - } - - if (has_connected) { - spdlog::error("warning: no available connections"); - } - - // Timeout when all connections have failed - if (ms > (all_failed_at + (GLOBAL_TIMEOUT * 1000))) { - if (has_connected) { - spdlog::critical("Failed to re-establish any connections to {}", - print_addr(&srtla_addr)); - exit(EXIT_FAILURE); - } - - spdlog::error("Failed to establish any initial connections to {}", - print_addr(&srtla_addr)); - - // Walk through the list of resolved addresses - if (addrs->ai_next) { - addrs = addrs->ai_next; - set_srtla_addr(addrs); - all_failed_at = 0; - } else { - exit(EXIT_FAILURE); - } - } - } else { - all_failed_at = 0; - } - - last_ran = ms; -} - -inline std::vector get_random_bytes(size_t size) { - std::vector ret; - ret.resize(size); - - std::ifstream f("/dev/urandom"); - f.read(ret.data(), size); - assert(f); // Failed to read fully! - f.close(); - - return ret; -} - -int main(int argc, char **argv) { - argparse::ArgumentParser args("srtla_send", VERSION); - // SRT_LISTEN_PORT SRTLA_HOST SRTLA_PORT BIND_IPS_FILE - args.add_argument("listen_port") - .help("Port to bind the SRT socket to") - .default_value((uint16_t)5000) - .scan<'d', uint16_t>(); - args.add_argument("srtla_host") - .help("Hostname of the upstream SRTLA server") - .default_value(std::string{"127.0.0.1"}); - args.add_argument("srtla_port") - .help("Port of the upstream SRTLA server") - .default_value((uint16_t)5001) - .scan<'d', uint16_t>(); - args.add_argument("ips_file") - .help("File containing the source IP addresses") - .default_value(std::string{"/tmp/srtla_ips"}); - args.add_argument("--verbose") - .help("Enable verbose logging") - .default_value(false) - .implicit_value(true); - - try { - args.parse_args(argc, argv); - } catch (const std::runtime_error &err) { - std::cerr << err.what() << std::endl; - std::cerr << args; - std::exit(1); - } - if (args.get("--verbose")) - spdlog::set_level(spdlog::level::debug); - - std::string ips_file = args.get("ips_file"); - source_ip_file = (char *)ips_file.c_str(); - int conn_count = setup_conns(source_ip_file); - if (conn_count <= 0) { - spdlog::critical("Failed to parse any IP addresses in {}", source_ip_file); - exit(EXIT_FAILURE); - } - - struct sockaddr_in listen_addr; - - int port = args.get("listen_port"); - - // Read a random connection group id for this session - auto srtla_id = get_random_bytes(SRTLA_ID_LEN / 2); - - FD_ZERO(&active_fds); - - listen_addr.sin_family = AF_INET; - listen_addr.sin_addr.s_addr = INADDR_ANY; - listen_addr.sin_port = htons(port); - listenfd = socket(AF_INET, SOCK_DGRAM, 0); - if (listenfd < 0) { - spdlog::critical("Failed to create a socket"); - exit(EXIT_FAILURE); - } - - int ret = - bind(listenfd, (struct sockaddr *)&listen_addr, sizeof(listen_addr)); - if (ret < 0) { - spdlog::critical("Failed to bind to port {}", port); - exit(EXIT_FAILURE); - } - add_active_fd(listenfd); - - std::string srtla_host = args.get("srtla_host"); - std::string srtla_port = std::to_string(args.get("srtla_port")); - int connected = open_conns(srtla_host.c_str(), srtla_port.c_str()); - if (connected < 1) { - spdlog::critical("Failed to open and bind to any of the IP addresses in {}", - source_ip_file); - exit(EXIT_FAILURE); - } - - // Resolve the address of the receiver - struct addrinfo hints; - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_INET; - hints.ai_socktype = SOCK_DGRAM; - ret = getaddrinfo(srtla_host.c_str(), srtla_port.c_str(), &hints, &addrs); - if (ret != 0) { - spdlog::critical("Failed to resolve {}: {}", srtla_host, gai_strerror(ret)); - exit(EXIT_FAILURE); - } - - set_srtla_addr(addrs); - - signal(SIGHUP, schedule_update_conns); - - int info_int = LOG_PKT_INT; - - while (1) { - if (do_update_conns) { - update_conns(source_ip_file); - do_update_conns = 0; - } - - connection_housekeeping(); - - fd_set read_fds = active_fds; - struct timeval to = {.tv_sec = 0, .tv_usec = 200 * 1000}; - ret = select(FD_SETSIZE, &read_fds, NULL, NULL, &to); - - if (ret > 0) { - if (FD_ISSET(listenfd, &read_fds)) { - handle_srt_data(listenfd); - } - - for (conn_t *c = conns; c != NULL; c = c->next) { - if (c->fd >= 0 && FD_ISSET(c->fd, &read_fds)) { - handle_srtla_data(c); - } - } - } // ret > 0 - - info_int--; - if (info_int == 0) { - for (conn_t *c = conns; c != NULL; c = c->next) { - spdlog::debug("{} ({}): in flight: {}, window: {}, last_rcvd {}", - print_addr(&c->src), fmt::ptr(c), c->in_flight_pkts, - c->window, c->last_rcvd); - } - info_int = LOG_PKT_INT; - } - } // while(1); +/* + srtla - SRT transport proxy with link aggregation, forked by IRLServer + Copyright (C) 2020-2021 BELABOX project + Copyright (C) 2025 IRLServer.com + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sender.h" +#include + +#define PKT_LOG_SZ 256 +#define CONN_TIMEOUT 4 +#define REG2_TIMEOUT 4 +#define REG3_TIMEOUT 4 +#define GLOBAL_TIMEOUT 10 +#define IDLE_TIME 1 + +#define min(a, b) ((a < b) ? a : b) +#define max(a, b) ((a > b) ? a : b) +#define min_max(a, l, h) (max(min((a), (h)), (l))) + +#define WINDOW_MIN 1 +#define WINDOW_DEF 20 +#define WINDOW_MAX 60 +#define WINDOW_MULT 1000 +#define WINDOW_DECR 100 +#define WINDOW_INCR 30 + +#define LOG_PKT_INT 20 + +typedef struct conn { + struct conn *next; + int fd; + time_t last_rcvd; + time_t last_sent; + struct sockaddr src; + int removed; + int in_flight_pkts; + int window; + int pkt_idx; + int pkt_log[PKT_LOG_SZ]; +} conn_t; + +char *source_ip_file = NULL; + +int do_update_conns = 0; + +struct addrinfo *addrs; + +struct sockaddr srtla_addr, srt_addr; +const socklen_t addr_len = sizeof(srtla_addr); +conn_t *conns = NULL; +int listenfd; +int active_connections = 0; +int has_connected = 0; + +conn_t *pending_reg2_conn = NULL; +time_t pending_reg_timeout = 0; + +char srtla_id[SRTLA_ID_LEN]; + +/* + +Async I/O support + +*/ +fd_set active_fds; +int max_act_fd = -1; + +int add_active_fd(int fd) { + if (fd < 0) + return -1; + + if (fd > max_act_fd) + max_act_fd = fd; + FD_SET(fd, &active_fds); + + return 0; +} + +int remove_active_fd(int fd) { + if (fd < 0) + return -1; + + FD_CLR(fd, &active_fds); + + return 0; +} + +/* + +srtla registration helpers + +*/ +int send_reg1(conn_t *c) { + if (c->fd < 0) + return -1; + + char buf[MTU]; + uint16_t packet_type = htobe16(SRTLA_TYPE_REG1); + memcpy(buf, &packet_type, sizeof(packet_type)); + memcpy(buf + sizeof(packet_type), srtla_id, SRTLA_ID_LEN); + + int ret = sendto(c->fd, buf, SRTLA_TYPE_REG1_LEN, 0, &srtla_addr, addr_len); + if (ret != SRTLA_TYPE_REG1_LEN) + return -1; + + return 0; +} + +int send_reg2(conn_t *c) { + if (c->fd < 0) + return -1; + + char buf[SRTLA_TYPE_REG2_LEN]; + uint16_t packet_type = htobe16(SRTLA_TYPE_REG2); + memcpy(buf, &packet_type, sizeof(packet_type)); + memcpy(buf + sizeof(packet_type), srtla_id, SRTLA_ID_LEN); + + int ret = sendto(c->fd, buf, SRTLA_TYPE_REG2_LEN, 0, &srtla_addr, addr_len); + return (ret == SRTLA_TYPE_REG2_LEN) ? 0 : -1; +} + +/* + +Handling code for packets coming from the SRT caller + +*/ +void reg_pkt(conn_t *c, int32_t packet) { + spdlog::debug("{} ({}) register packet {} at idx {}", print_addr(&c->src), + fmt::ptr(c), packet, c->pkt_idx); + c->pkt_log[c->pkt_idx] = packet; + c->pkt_idx++; + c->pkt_idx %= PKT_LOG_SZ; + + c->in_flight_pkts++; +} + +int conn_timed_out(conn_t *c, time_t ts) { + return (c->last_rcvd + CONN_TIMEOUT) < ts; +} + +conn_t *select_conn() { + conn_t *min_c = NULL; + int max_score = -1; + int max_window = 0; + + for (conn_t *c = conns; c != NULL; c = c->next) { + if (c->window > max_window) { + max_window = c->window; + } + } + + time_t t; + assert(get_seconds(&t) == 0); + + for (conn_t *c = conns; c != NULL; c = c->next) { + /* If we have some very slow links, we may be better off ignoring them + However, we'd probably need to periodically re-probe them, otherwise + a link disabled due to a momentary glitch might not ever get enabled + again unless all the remaining links suffered from high packet loss + at some point. */ + /*if (c->window < max_window / 5) { + c->window++; + continue; + }*/ + + if (conn_timed_out(c, t)) { + spdlog::debug("{} ({}): is timed out, ignoring it", print_addr(&c->src), + fmt::ptr(c)); + continue; + } + + int score = c->window / (c->in_flight_pkts + 1); + if (score > max_score) { + min_c = c; + max_score = score; + } + } + + if (min_c) { + min_c->last_sent = t; + } + + return min_c; +} + +void handle_srt_data(int fd) { + char buf[MTU]; + socklen_t len = sizeof(srt_addr); + int n = recvfrom(fd, &buf, MTU, 0, &srt_addr, &len); + + conn_t *c = select_conn(); + if (c) { + int32_t sn = get_srt_sn(buf, n); + int ret = sendto(c->fd, &buf, n, 0, &srtla_addr, addr_len); + if (ret == n) { + if (sn >= 0) { + reg_pkt(c, sn); + } + } else { + /* If sending the packet fails, adjust the timestamp to disable the link + until a reconnection is confirmed. 1 so connection_housekeeping() + prints its message */ + c->last_rcvd = 1; + spdlog::error("{} ({}): sendto() failed, disabling the connection", + print_addr(&c->src), fmt::ptr(c)); + } + } +} + +/* + +Handling code for packets coming from the receiver + +*/ +int get_pkt_idx(int idx, int increment) { + idx = idx + increment; + if (idx < 0) + idx += PKT_LOG_SZ; + idx %= PKT_LOG_SZ; + assert(idx >= 0 && idx < PKT_LOG_SZ); + return idx; +} + +void register_nak(int32_t packet) { + for (conn_t *c = conns; c != NULL; c = c->next) { + int idx = get_pkt_idx(c->pkt_idx, -1); + for (int i = idx; i != c->pkt_idx; i = get_pkt_idx(i, -1)) { + if (c->pkt_log[i] == packet) { + c->pkt_log[i] = -1; + // It might be better to use exponential decay like this + // c->window = c->window * 998 / 1000; + c->window -= WINDOW_DECR; + c->window = max(c->window, WINDOW_MIN * WINDOW_MULT); + spdlog::debug("{} ({}): found NAKed packet {} in the log", + print_addr(&c->src), fmt::ptr(c), packet); + return; + } + } + } + + spdlog::debug("Didn't find NAKed packet {} in our logs", packet); +} + +void register_srtla_ack(int32_t ack) { + int found = 0; + + for (conn_t *c = conns; c != NULL; c = c->next) { + int idx = get_pkt_idx(c->pkt_idx, -1); + for (int i = idx; i != c->pkt_idx && !found; i = get_pkt_idx(i, -1)) { + if (c->pkt_log[i] == ack) { + found = 1; + if (c->in_flight_pkts > 0) { + c->in_flight_pkts--; + } + c->pkt_log[i] = -1; + + if (c->in_flight_pkts * WINDOW_MULT > c->window) { + c->window += WINDOW_INCR - 1; + } + + break; + } + } + + if (c->last_rcvd != 0) { + c->window += 1; + c->window = min(c->window, WINDOW_MAX * WINDOW_MULT); + } + } +} + +/* + TODO after the sequence number overflows, we should probably also mark high + sn packets as received. However, this shouldn't normally be an issue as SRTLA + ACKs acknowledge each packet individually. Also, if the SRTLA ACK is lost, + stale entries will be overwritten soon enough as pkt_log is a circular buffer +*/ +void conn_register_srt_ack(conn_t *c, int32_t ack) { + int count = 0; + int idx = get_pkt_idx(c->pkt_idx, -1); + for (int i = idx; i != c->pkt_idx; i = get_pkt_idx(i, -1)) { + if (c->pkt_log[i] < ack) { + c->pkt_log[i] = -1; + } else { + count++; + } + } + c->in_flight_pkts = count; +} + +void register_srt_ack(int32_t ack) { + for (conn_t *c = conns; c != NULL; c = c->next) { + conn_register_srt_ack(c, ack); + } +} + +void handle_srtla_data(conn_t *c) { + char buf[MTU]; + + int n = recvfrom(c->fd, &buf, MTU, 0, NULL, NULL); + if (n <= 0) + return; + + time_t ts; + get_seconds(&ts); + + uint16_t packet_type = get_srt_type(buf, n); + + /* Handling NGPs separately because we don't want them to update last_rcvd + Otherwise they could be keeping failed connections marked active */ + if (packet_type == SRTLA_TYPE_REG_NGP) { + /* Only process NGPs if: + * we don't have any established connections + * and we don't already have a pending REG1->REG2 exhange in flight + * and we don't have any pending REG2->REG3 exchanges in flight + */ + if (active_connections == 0 && pending_reg2_conn == NULL && + ts > pending_reg_timeout) { + if (send_reg1(c) == 0) { + pending_reg2_conn = c; + pending_reg_timeout = ts + REG2_TIMEOUT; + } + } + return; + + } else if (packet_type == SRTLA_TYPE_REG2) { + if (pending_reg2_conn == c) { + char *id = &buf[2]; + if (memcmp(id, srtla_id, SRTLA_ID_LEN / 2) != 0) { + spdlog::error("{} ({}): got a mismatching ID in SRTLA_REG2", + print_addr(&c->src), fmt::ptr(c)); + return; + } + + spdlog::info("{} ({}): connection group registered", print_addr(&c->src), + fmt::ptr(c)); + memcpy(srtla_id, id, SRTLA_ID_LEN); + + /* Broadcast REG2 */ + for (conn_t *i = conns; i != NULL; i = i->next) { + send_reg2(i); + } + + pending_reg2_conn = NULL; + pending_reg_timeout = ts + REG3_TIMEOUT; + } + return; + } + + c->last_rcvd = ts; + + switch (packet_type) { + case SRT_TYPE_ACK: { + uint32_t last_ack = *((uint32_t *)&buf[16]); + last_ack = be32toh(last_ack); + register_srt_ack(last_ack); + break; + } + + case SRT_TYPE_NAK: { + uint32_t *ids = (uint32_t *)buf; + for (int i = 4; i < n / 4; i++) { + uint32_t id = be32toh(ids[i]); + if (id & (1 << 31)) { + id = id & 0x7FFFFFFF; + uint32_t last_id = be32toh(ids[i + 1]); + for (int32_t lost = id; lost <= last_id; lost++) { + register_nak(lost); + } + i++; + } else { + register_nak(id); + } + } + break; + } + + // srtla packets below, don't send to SRT + case SRTLA_TYPE_ACK: { + uint32_t *acks = (uint32_t *)buf; + for (int i = 1; i < n / 4; i++) { + uint32_t id = be32toh(acks[i]); + spdlog::debug("{} ({}): ack {}\n", print_addr(&c->src), fmt::ptr(c), id); + register_srtla_ack(id); + } + return; + } + case SRTLA_TYPE_KEEPALIVE: + spdlog::debug("{} ({}): got a keepalive", print_addr(&c->src), fmt::ptr(c)); + return; // don't send to SRT + + case SRTLA_TYPE_REG3: + has_connected = 1; + active_connections++; + spdlog::info("{} ({}): connection established", print_addr(&c->src), + fmt::ptr(c)); + return; + } // switch + + sendto(listenfd, &buf, n, 0, &srt_addr, addr_len); +} + +/* + +Connection and socket management + +*/ +conn_t *conn_find_by_src(struct sockaddr *src) { + for (conn_t *c = conns; c != NULL; c = c->next) { + if (memcmp(src, &c->src, sizeof(*src)) == 0) { + return c; + } + } + + return NULL; +} + +int setup_conns(char *source_ip_file) { + FILE *config = fopen(source_ip_file, "r"); + if (config == NULL) { + spdlog::critical("Failed to open the source ip file {}", source_ip_file); + exit(EXIT_FAILURE); + } + + int count = 0; + char *line = NULL; + size_t line_len = 0; + while (getline(&line, &line_len, config) >= 0) { + char *nl; + if ((nl = strchr(line, '\n'))) { + *nl = '\0'; + } + + struct sockaddr src; + + int ret = parse_ip((struct sockaddr_in *)&src, line); + if (ret == 0) { + conn_t *c = conn_find_by_src(&src); + if (c == NULL) { + conn_t *c = static_cast(calloc(1, sizeof(conn_t))); + assert(c != NULL); + + c->src = src; + c->fd = -1; + c->window = WINDOW_DEF * WINDOW_MULT; + + c->next = conns; + conns = c; + + count++; + + spdlog::info("Added connection via {} ({})", print_addr(&c->src), + fmt::ptr(c)); + } else { + c->removed = 0; + } + } + } + if (line) + free(line); + + fclose(config); + + return count; +} + +void update_conns(char *source_ip_file) { + for (conn_t *c = conns; c != NULL; c = c->next) { + c->removed = 1; + } + + setup_conns(source_ip_file); + + conn_t **prev = &conns; + conn_t *next; + for (conn_t *c = conns; c != NULL; c = next) { + next = c->next; + if (c->removed) { + spdlog::info("Removed connection via {} ({})", print_addr(&c->src), + fmt::ptr(c)); + + if (c == pending_reg2_conn) { + pending_reg2_conn = NULL; + } + + remove_active_fd(c->fd); + close(c->fd); + *prev = c->next; + free(c); + } else { + prev = &c->next; + } + } +} + +void schedule_update_conns(int signal) { do_update_conns = 1; } + +int open_socket(conn_t *c, int quiet) { + if (c->fd >= 0) { + remove_active_fd(c->fd); + close(c->fd); + c->fd = -1; + } + + // Set up the socket + int fd = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (fd < 0) { + spdlog::error("Failed to open a socket"); + return -1; + } + int bufsize = SEND_BUF_SIZE; + int ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)); + if (ret != 0) { + spdlog::error("Failed to set send buffer size ({} bytes)", bufsize); + goto err; + } + + // Bind it to the source address + ret = bind(fd, &c->src, sizeof(c->src)); + if (ret != 0) { + if (!quiet) { + spdlog::error("Failed to bind to the source address {}", + print_addr(&c->src)); + } + goto err; + } + + add_active_fd(fd); + c->fd = fd; + + return 0; + +err: + close(fd); + return -1; +} + +int open_conns(const char *host, const char *port) { + // Check that we can actually open & bind at least one socket + int opened = 0; + for (conn_t *c = conns; c != NULL; c = c->next) { + if (open_socket(c, 0) == 0) { + opened++; + } + } + return opened; +} + +/* + +Connection housekeeping + +*/ +void set_srtla_addr(struct addrinfo *addr) { + memcpy(&srtla_addr, addr->ai_addr, addr->ai_addrlen); + spdlog::info("Trying to connect to {}...", print_addr(&srtla_addr)); +} + +void send_keepalive(conn_t *c) { + spdlog::debug("{} ({}): sending keepalive", print_addr(&c->src), fmt::ptr(c)); + uint16_t pkt = htobe16(SRTLA_TYPE_KEEPALIVE); + // ignoring the result on purpose + sendto(c->fd, &pkt, sizeof(pkt), 0, &srtla_addr, addr_len); +} + +#define HOUSEKEEPING_INT 1000 // ms +void connection_housekeeping() { + static uint64_t all_failed_at = 0; + /* We use milliseconds here because with a seconds timer we may be + resending a second REG2 very soon after the first one, depending + on when the first execution happens within the seconds interval */ + static uint64_t last_ran = 0; + uint64_t ms; + assert(get_ms(&ms) == 0); + if ((last_ran + HOUSEKEEPING_INT) > ms) + return; + + time_t time = (time_t)(ms / 1000); + + active_connections = 0; + + if (pending_reg2_conn && time > pending_reg_timeout) { + pending_reg2_conn = NULL; + } + + for (conn_t *c = conns; c != NULL; c = c->next) { + if (c->fd < 0) { + open_socket(c, 1); + continue; + } + + if (conn_timed_out(c, time)) { + /* When we first detect the connection having failed, + we reset its status and print a message */ + if (c->last_rcvd > 0) { + spdlog::info("{} ({}): connection failed, attempting to reconnect", + print_addr(&c->src), fmt::ptr(c)); + c->last_rcvd = 0; + c->last_sent = 0; + c->window = WINDOW_MIN * WINDOW_MULT; + c->in_flight_pkts = 0; + for (int i = 0; i < PKT_LOG_SZ; i++) { + c->pkt_log[i] = -1; + } + } + + if (pending_reg2_conn == NULL) { + /* As the connection has timed out on our end, the receiver might have + garbage collected it. Try to re-establish it rather than send a + keepalive */ + send_reg2(c); + } else if (pending_reg2_conn == c) { + send_reg1(c); + } + continue; + } + + /* If a connection has received data in the last CONN_TIMEOUT seconds, + then it's active */ + active_connections++; + + if ((c->last_sent + IDLE_TIME) < time) { + send_keepalive(c); + } + } + + if (active_connections == 0) { + if (all_failed_at == 0) { + all_failed_at = ms; + } + + if (has_connected) { + spdlog::error("warning: no available connections"); + } + + // Timeout when all connections have failed + if (ms > (all_failed_at + (GLOBAL_TIMEOUT * 1000))) { + if (has_connected) { + spdlog::critical("Failed to re-establish any connections to {}", + print_addr(&srtla_addr)); + exit(EXIT_FAILURE); + } + + spdlog::error("Failed to establish any initial connections to {}", + print_addr(&srtla_addr)); + + // Walk through the list of resolved addresses + if (addrs->ai_next) { + addrs = addrs->ai_next; + set_srtla_addr(addrs); + all_failed_at = 0; + } else { + exit(EXIT_FAILURE); + } + } + } else { + all_failed_at = 0; + } + + last_ran = ms; +} + +inline std::vector get_random_bytes(size_t size) { + std::vector ret; + ret.resize(size); + + std::ifstream f("/dev/urandom"); + if (!f.is_open()) { + throw std::runtime_error("Failed to open /dev/urandom for random bytes"); + } + + f.read(ret.data(), size); + if (f.gcount() != static_cast(size) || f.fail()) { + f.close(); + throw std::runtime_error( + "Failed to read sufficient random bytes from /dev/urandom"); + } + f.close(); + + return ret; +} + +int main(int argc, char **argv) { + argparse::ArgumentParser args("srtla_send", VERSION); + // SRT_LISTEN_PORT SRTLA_HOST SRTLA_PORT BIND_IPS_FILE + args.add_argument("listen_port") + .help("Port to bind the SRT socket to") + .default_value((uint16_t)5000) + .scan<'d', uint16_t>(); + args.add_argument("srtla_host") + .help("Hostname of the upstream SRTLA server") + .default_value(std::string{"127.0.0.1"}); + args.add_argument("srtla_port") + .help("Port of the upstream SRTLA server") + .default_value((uint16_t)5001) + .scan<'d', uint16_t>(); + args.add_argument("ips_file") + .help("File containing the source IP addresses") + .default_value(std::string{"/tmp/srtla_ips"}); + args.add_argument("--verbose") + .help("Enable verbose logging") + .default_value(false) + .implicit_value(true); + + try { + args.parse_args(argc, argv); + } catch (const std::runtime_error &err) { + std::cerr << err.what() << std::endl; + std::cerr << args; + std::exit(1); + } + if (args.get("--verbose")) + spdlog::set_level(spdlog::level::debug); + + std::string ips_file = args.get("ips_file"); + source_ip_file = (char *)ips_file.c_str(); + int conn_count = setup_conns(source_ip_file); + if (conn_count <= 0) { + spdlog::critical("Failed to parse any IP addresses in {}", source_ip_file); + exit(EXIT_FAILURE); + } + + struct sockaddr_in listen_addr; + + int port = args.get("listen_port"); + + // Read a random connection group id for this session + auto random_bytes = get_random_bytes(SRTLA_ID_LEN / 2); + std::memcpy(srtla_id, random_bytes.data(), SRTLA_ID_LEN / 2); + + FD_ZERO(&active_fds); + + listen_addr.sin_family = AF_INET; + listen_addr.sin_addr.s_addr = INADDR_ANY; + listen_addr.sin_port = htons(port); + listenfd = socket(AF_INET, SOCK_DGRAM, 0); + if (listenfd < 0) { + spdlog::critical("Failed to create a socket"); + exit(EXIT_FAILURE); + } + + int ret = + bind(listenfd, (struct sockaddr *)&listen_addr, sizeof(listen_addr)); + if (ret < 0) { + spdlog::critical("Failed to bind to port {}", port); + exit(EXIT_FAILURE); + } + add_active_fd(listenfd); + + std::string srtla_host = args.get("srtla_host"); + std::string srtla_port = std::to_string(args.get("srtla_port")); + int connected = open_conns(srtla_host.c_str(), srtla_port.c_str()); + if (connected < 1) { + spdlog::critical("Failed to open and bind to any of the IP addresses in {}", + source_ip_file); + exit(EXIT_FAILURE); + } + + // Resolve the address of the receiver + struct addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_DGRAM; + ret = getaddrinfo(srtla_host.c_str(), srtla_port.c_str(), &hints, &addrs); + if (ret != 0) { + spdlog::critical("Failed to resolve {}: {}", srtla_host, gai_strerror(ret)); + exit(EXIT_FAILURE); + } + + set_srtla_addr(addrs); + + signal(SIGHUP, schedule_update_conns); + + int info_int = LOG_PKT_INT; + + while (1) { + if (do_update_conns) { + update_conns(source_ip_file); + do_update_conns = 0; + } + + connection_housekeeping(); + + fd_set read_fds = active_fds; + struct timeval to = {.tv_sec = 0, .tv_usec = 200 * 1000}; + ret = select(FD_SETSIZE, &read_fds, NULL, NULL, &to); + + if (ret > 0) { + if (FD_ISSET(listenfd, &read_fds)) { + handle_srt_data(listenfd); + } + + for (conn_t *c = conns; c != NULL; c = c->next) { + if (c->fd >= 0 && FD_ISSET(c->fd, &read_fds)) { + handle_srtla_data(c); + } + } + } // ret > 0 + + info_int--; + if (info_int == 0) { + for (conn_t *c = conns; c != NULL; c = c->next) { + spdlog::debug("{} ({}): in flight: {}, window: {}, last_rcvd {}", + print_addr(&c->src), fmt::ptr(c), c->in_flight_pkts, + c->window, c->last_rcvd); + } + info_int = LOG_PKT_INT; + } + } // while(1); } \ No newline at end of file diff --git a/src/sender.h b/src/sender.h index dad2966..83db506 100644 --- a/src/sender.h +++ b/src/sender.h @@ -1,28 +1,28 @@ -/* - srtla - SRT transport proxy with link aggregation, forked by IRLServer - Copyright (C) 2020-2021 BELABOX project - Copyright (C) 2025 IRLServer.com - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -#pragma once - -#include - -#include - -extern "C" { -#include "common.h" +/* + srtla - SRT transport proxy with link aggregation, forked by IRLServer + Copyright (C) 2020-2021 BELABOX project + Copyright (C) 2025 IRLServer.com + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +#pragma once + +#include + +#include + +extern "C" { +#include "common.h" } \ No newline at end of file diff --git a/src/utils/nak_dedup.cpp b/src/utils/nak_dedup.cpp new file mode 100644 index 0000000..f691e8d --- /dev/null +++ b/src/utils/nak_dedup.cpp @@ -0,0 +1,54 @@ +#include "nak_dedup.h" + +#include + +namespace srtla::utils { + +uint64_t NakDeduplicator::hash_nak_payload(const uint8_t *buffer, int length, int prefix_bytes) { + if (length <= 16) { + return 0; + } + + const uint8_t *payload = buffer + 16; + size_t payload_length = static_cast(length - 16); + if (prefix_bytes >= 0 && static_cast(prefix_bytes) < payload_length) { + payload_length = static_cast(prefix_bytes); + } + + uint64_t hash = FNV_OFFSET_BASIS; + for (size_t i = 0; i < payload_length; ++i) { + hash ^= static_cast(payload[i]); + hash *= FNV_PRIME; + } + + return hash; +} + +bool NakDeduplicator::should_accept_nak(std::unordered_map &cache, + uint64_t hash, + uint64_t current_time_ms) { + auto it = cache.find(hash); + if (it == cache.end()) { + cache.emplace(hash, NakHashEntry{current_time_ms, 0}); + return true; + } + +if (current_time_ms < it->second.timestamp_ms) { + // Clock moved backwards, treat as within suppression window + return false; + } + + if (current_time_ms - it->second.timestamp_ms < SUPPRESS_MS) { + return false; + } + + if (it->second.repeat_count >= MAX_REPEATS) { + return false; + } + + it->second.timestamp_ms = current_time_ms; + ++it->second.repeat_count; + return true; +} + +} // namespace srtla::utils diff --git a/src/utils/nak_dedup.h b/src/utils/nak_dedup.h new file mode 100644 index 0000000..677e268 --- /dev/null +++ b/src/utils/nak_dedup.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +namespace srtla::utils { + +struct NakHashEntry { + uint64_t timestamp_ms = 0; + int repeat_count = 0; +}; + +class NakDeduplicator { +public: + static uint64_t hash_nak_payload(const uint8_t *buffer, int length, int prefix_bytes = -1); + static bool should_accept_nak(std::unordered_map &cache, + uint64_t hash, + uint64_t current_time_ms); + +private: + static constexpr uint64_t FNV_OFFSET_BASIS = 1469598103934665603ull; + static constexpr uint64_t FNV_PRIME = 1099511628211ull; + static constexpr uint64_t SUPPRESS_MS = 100; + static constexpr int MAX_REPEATS = 1; +}; + +} // namespace srtla::utils diff --git a/src/utils/network_utils.cpp b/src/utils/network_utils.cpp new file mode 100644 index 0000000..8d49fdd --- /dev/null +++ b/src/utils/network_utils.cpp @@ -0,0 +1,195 @@ +#include "network_utils.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +extern "C" { +#include "../common.h" +} + +namespace srtla::utils { + +int NetworkUtils::epoll_add(int epoll_fd, int socket_fd, uint32_t events, void *priv_data) { + struct epoll_event ev {}; + ev.events = events; + ev.data.ptr = priv_data; + return epoll_ctl(epoll_fd, EPOLL_CTL_ADD, socket_fd, &ev); +} + +int NetworkUtils::epoll_remove(int epoll_fd, int socket_fd) { + struct epoll_event ev {}; + return epoll_ctl(epoll_fd, EPOLL_CTL_DEL, socket_fd, &ev); +} + +uint16_t NetworkUtils::get_local_port(int socket_fd) { + struct sockaddr_storage local_addr {}; + socklen_t len = sizeof(local_addr); + if (getsockname(socket_fd, reinterpret_cast(&local_addr), &len) != 0) { + spdlog::error("getsockname failed for socket {}: {}", socket_fd, strerror(errno)); + return 0; + } + + if (local_addr.ss_family == AF_INET) { + return ntohs(reinterpret_cast(&local_addr)->sin_port); + } else if (local_addr.ss_family == AF_INET6) { + return ntohs(reinterpret_cast(&local_addr)->sin6_port); + } + + spdlog::error("Unknown address family {} for socket {}", local_addr.ss_family, socket_fd); + return 0; +} + +int NetworkUtils::resolve_srt_address(const char *host, + const char *port, + struct sockaddr_storage *out_addr, + int recv_buf_size, + int send_buf_size) { + srt_handshake_t hs_packet {}; + hs_packet.header.type = htobe16(SRT_TYPE_HANDSHAKE); + hs_packet.version = htobe32(4); + hs_packet.ext_field = htobe16(2); + hs_packet.handshake_type = htobe32(1); + + struct addrinfo hints {}; + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + + struct addrinfo *srt_addrs = nullptr; + int ret = getaddrinfo(host, port, &hints, &srt_addrs); + if (ret != 0) { + spdlog::error("Failed to resolve the address: {}:{}: {}", host, port, gai_strerror(ret)); + return -1; + } + +int found = -1; + int tmp_sock = -1; + + for (struct addrinfo *addr = srt_addrs; addr != nullptr && found == -1; addr = addr->ai_next) { + spdlog::info("Trying to connect to SRT at {}:{}...", print_addr(addr->ai_addr), port); + + // Create socket with the appropriate family for this address + tmp_sock = socket(addr->ai_family, SOCK_DGRAM, 0); + if (tmp_sock < 0) { + spdlog::error("Failed to create a UDP socket for family {}", addr->ai_family); + continue; + } + + // Set socket options + bool socket_opts_ok = true; + if (setsockopt(tmp_sock, SOL_SOCKET, SO_RCVBUF, &recv_buf_size, sizeof(recv_buf_size)) != 0) { + spdlog::error("Failed to set a receive buffer size ({})", recv_buf_size); + socket_opts_ok = false; + } + if (socket_opts_ok && setsockopt(tmp_sock, SOL_SOCKET, SO_SNDBUF, &send_buf_size, sizeof(send_buf_size)) != 0) { + spdlog::error("Failed to set a send buffer size ({})", send_buf_size); + socket_opts_ok = false; + } + + // Set receive timeout to prevent indefinite blocking + if (socket_opts_ok) { + struct timeval timeout; + timeout.tv_sec = 2; // 2 seconds timeout + timeout.tv_usec = 0; + if (setsockopt(tmp_sock, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) != 0) { + spdlog::error("Failed to set receive timeout"); + socket_opts_ok = false; + } + } + + if (!socket_opts_ok) { + close(tmp_sock); + tmp_sock = -1; + continue; + } + + // Connect to the address + ret = connect(tmp_sock, addr->ai_addr, addr->ai_addrlen); + if (ret != 0) { + spdlog::info("Connection failed"); + close(tmp_sock); + tmp_sock = -1; + continue; + } + + // Send handshake packet + ret = send(tmp_sock, &hs_packet, sizeof(hs_packet), 0); + if (ret != sizeof(hs_packet)) { + spdlog::info("Failed to send handshake packet"); + close(tmp_sock); + tmp_sock = -1; + continue; + } + + // Receive response + char buffer[MTU]; + ret = recv(tmp_sock, &buffer, MTU, 0); + if (ret == sizeof(hs_packet)) { + std::memcpy(out_addr, addr->ai_addr, addr->ai_addrlen); + spdlog::info("Success"); + found = 1; + } else { + spdlog::info("Failed to receive handshake response"); + close(tmp_sock); + tmp_sock = -1; + } + } + + if (tmp_sock != -1) { + close(tmp_sock); + } + + if (found == -1 && srt_addrs != nullptr) { + if (srt_addrs->ai_family == AF_INET) { + std::memcpy(out_addr, srt_addrs->ai_addr, sizeof(struct sockaddr_in)); + } else if (srt_addrs->ai_family == AF_INET6) { + std::memcpy(out_addr, srt_addrs->ai_addr, sizeof(struct sockaddr_in6)); + } + spdlog::warn("Failed to confirm that a SRT server is reachable at any address. Proceeding with the first address: {}", + print_addr(reinterpret_cast(out_addr))); + found = 0; + } + + freeaddrinfo(srt_addrs); + return found; +} + +int NetworkUtils::constant_time_compare(const void *a, const void *b, int length) { + const auto *ca = static_cast(a); + const auto *cb = static_cast(b); + unsigned char diff = 0; + for (int i = 0; i < length; ++i) { + diff |= ca[i] ^ cb[i]; + } + return diff ? -1 : 0; +} + +void NetworkUtils::get_random_bytes(char *buffer, size_t size) { + std::ifstream random("/dev/urandom", std::ios::in | std::ios::binary); + if (!random.is_open()) { + spdlog::error("Failed to open /dev/urandom"); + throw std::runtime_error("Failed to open /dev/urandom"); + } + + size_t total_read = 0; + while (total_read < size) { + random.read(buffer + total_read, static_cast(size - total_read)); + std::streamsize bytes_read = random.gcount(); + if (bytes_read <= 0) { + spdlog::error("Failed to read from /dev/urandom: got {} of {} bytes", total_read, size); + throw std::runtime_error("Failed to read random bytes from /dev/urandom"); + } + total_read += static_cast(bytes_read); + } +} + +} // namespace srtla::utils diff --git a/src/utils/network_utils.h b/src/utils/network_utils.h new file mode 100644 index 0000000..2f8b19a --- /dev/null +++ b/src/utils/network_utils.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace srtla::utils { + +class NetworkUtils { +public: + static int epoll_add(int epoll_fd, int socket_fd, uint32_t events, void *priv_data); + static int epoll_remove(int epoll_fd, int socket_fd); + + static uint16_t get_local_port(int socket_fd); + + static int resolve_srt_address(const char *host, + const char *port, + struct sockaddr_storage *out_addr, + int recv_buf_size, + int send_buf_size); + + static int constant_time_compare(const void *a, const void *b, int length); + static void get_random_bytes(char *buffer, size_t size); +}; + +} // namespace srtla::utils