From 34c818cd838f320f62e4fa3cfa47a693bc0e2a26 Mon Sep 17 00:00:00 2001 From: Tomasz Zawadzki Date: Thu, 20 Jan 2022 16:36:55 +0100 Subject: [PATCH 01/28] isal: compile compress_isal PMD without system-wide libisal SPDK provides isa-l submodule with -I and -L. Signed-off-by: Tomasz Zawadzki Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/21092 (spdk-23.11) (cherry picked from commit 68108f1886be82bd8c2daea0e05237292c8fa222) Change-Id: I99924fc161a876ef017b9cdeeee52e2aed30d8ec Signed-off-by: Sebastian Brzezinka Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/22689 Reviewed-by: Jim Harris Tested-by: Tomasz Zawadzki Reviewed-by: Tomasz Zawadzki --- drivers/compress/isal/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/compress/isal/meson.build b/drivers/compress/isal/meson.build index 4b3eaa2274e..03294c71e7a 100644 --- a/drivers/compress/isal/meson.build +++ b/drivers/compress/isal/meson.build @@ -3,7 +3,7 @@ dep = dependency('libisal', required: false, method: 'pkg-config') if not dep.found() - build = false + build = true reason = 'missing dependency, "libisal"' endif From 5aec55a1d6d6e942140f7c5120c90f287a57e8d6 Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Thu, 15 Jul 2021 18:39:27 +0300 Subject: [PATCH 02/28] meson/mlx5: Suppress -Wunused-value diagnostic mlx5 common library checks if several symbols/definitions are presented in system header files. If some are not presented, they will be enabled by mlx5_glue library. The problem appears with clang and '-Werror' - code generated by meson is not compiled due to unused variable: Code: #include int main(void) { /* If it's not defined as a macro, try to use as a symbol */ #ifndef mlx5dv_create_flow_action_packet_reformat mlx5dv_create_flow_action_packet_reformat; #endif return 0; } Compiler stdout: Compiler stderr: /hpc/local/work/alexeymar/repo/spdk/dpdk/build-tmp/meson-private/tmp5obnak86/testfile.c:6:17: error: expression result unused [-Werror,-Wunused-value] mlx5dv_create_flow_action_packet_reformat; ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As result, almost all symbols are enabled in mlx5_glue while they exist is system headers. As result, we get multiple symbols redefenitions when we compile mlx5_common. As a solution for this problem we can suppress -Wunused-vaurable using pragma DPDK 23.11 note: Starting with commit bellow, all cflags are passed to the has_header_symbol(). (33d6694) build: use C11 standard To make sure that the symbol is properly detected, the pedantic flags needs to be removed. Signed-off-by: Alexey Marchuk Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/21093 (spdk-23.11) (cherry picked from commit 02e24e008bf2241f9f01360c2e0580700219c374) Change-Id: I03ba5d03f7e53d8e593a9de1deace5140c67d21d Signed-off-by: Sebastian Brzezinka Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/22688 Reviewed-by: Tomasz Zawadzki Tested-by: Tomasz Zawadzki Reviewed-by: Jim Harris --- drivers/common/mlx5/linux/meson.build | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build index cdee40c5538..f3b8e5741ba 100644 --- a/drivers/common/mlx5/linux/meson.build +++ b/drivers/common/mlx5/linux/meson.build @@ -209,7 +209,11 @@ if libmtcr_ul_found endif foreach arg:has_sym_args - mlx5_config.set(arg[0], cc.has_header_symbol(arg[1], arg[2], dependencies: libs, args: cflags)) + file_prefix = '#pragma clang diagnostic ignored "-Wunused-value"' + cflags += [ + '-Wno-pedantic', + ] + mlx5_config.set(arg[0], cc.has_header_symbol(arg[1], arg[2], prefix : file_prefix, dependencies: libs, args: cflags)) endforeach foreach arg:has_member_args file_prefix = '#include <' + arg[1] + '>' From 023fd6c428a08a72509b8429afa678705a3ae63b Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Thu, 24 Aug 2023 15:07:10 +0200 Subject: [PATCH 03/28] malloc: fix allocation for a specific case with ASan Allocation would fail with ASan enabled if the size and alignment was equal to half of the page size, e.g.: size_t pg_sz = 2 * (1 << 20); rte_malloc(NULL, pg_sz / 2, pg_sz / 2); In such case, try_expand_heap_primary() only allocated one page but it is not enough to fit this allocation with such alignment and MALLOC_ELEM_TRAILER_LEN > 0, as correctly checked by malloc_elem_can_hold(). Signed-off-by: Artur Paszkiewicz Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/21096 (spdk-23.11) (cherry picked from commit 5dd0c0388764244e7d1cafd29784be41bafd97d2) Change-Id: I50e51ed25ad9760260e50599405a0ed766a274c7 Signed-off-by: Sebastian Brzezinka Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/23150 Reviewed-by: Tomasz Zawadzki Reviewed-by: Jim Harris Tested-by: Tomasz Zawadzki --- lib/eal/common/malloc_heap.c | 4 ++-- lib/eal/common/malloc_mp.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/eal/common/malloc_heap.c b/lib/eal/common/malloc_heap.c index 5ff27548ff4..1cfb3bfa4d7 100644 --- a/lib/eal/common/malloc_heap.c +++ b/lib/eal/common/malloc_heap.c @@ -402,8 +402,8 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, int n_segs; bool callback_triggered = false; - alloc_sz = RTE_ALIGN_CEIL(RTE_ALIGN_CEIL(elt_size, align) + - MALLOC_ELEM_OVERHEAD, pg_sz); + alloc_sz = RTE_ALIGN_CEIL(RTE_MAX(MALLOC_ELEM_HEADER_LEN, align) + + elt_size + MALLOC_ELEM_TRAILER_LEN, pg_sz); n_segs = alloc_sz / pg_sz; /* we can't know in advance how many pages we'll need, so we malloc */ diff --git a/lib/eal/common/malloc_mp.c b/lib/eal/common/malloc_mp.c index 2d39b0716f5..a188d5cafab 100644 --- a/lib/eal/common/malloc_mp.c +++ b/lib/eal/common/malloc_mp.c @@ -251,8 +251,8 @@ handle_alloc_request(const struct malloc_mp_req *m, return -1; } - alloc_sz = RTE_ALIGN_CEIL(RTE_ALIGN_CEIL(ar->elt_size, ar->align) + - MALLOC_ELEM_OVERHEAD, ar->page_sz); + alloc_sz = RTE_ALIGN_CEIL(RTE_MAX(MALLOC_ELEM_HEADER_LEN, ar->align) + + ar->elt_size + MALLOC_ELEM_TRAILER_LEN, ar->page_sz); n_segs = alloc_sz / ar->page_sz; /* we can't know in advance how many pages we'll need, so we malloc */ From 84c8af2440437d208772466da6421a6a2dcf3c1d Mon Sep 17 00:00:00 2001 From: Tomasz Zawadzki Date: Tue, 18 Jan 2022 14:50:37 +0100 Subject: [PATCH 04/28] crypto: increase RTE_CRYPTO_MAX_DEVS to accomodate QAT SYM and ASYM VFs In SPDK Jenkins CI the QAT devices only support 16VFs. Per DPDK QAT documentation this could exceed the value of RTE_CRYPTO_MAX_DEVS. Ideally this should be configured by SPDK when building submodule, but for now workaround #2258. Signed-off-by: Tomasz Zawadzki Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/21071 (spdk-23.11) (cherry picked from commit a211408a784ec318d1b2e61962343eddce1a7a35) Change-Id: Ic9e22155564b2d1e6f685bac0f958836da4fc13b Signed-off-by: Sebastian Brzezinka Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/23184 Reviewed-by: Jim Harris Tested-by: Tomasz Zawadzki Reviewed-by: Tomasz Zawadzki --- config/rte_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/rte_config.h b/config/rte_config.h index dd7bb0d35bb..10014538e10 100644 --- a/config/rte_config.h +++ b/config/rte_config.h @@ -70,7 +70,7 @@ #define RTE_MAX_MULTI_HOST_CTRLS 4 /* cryptodev defines */ -#define RTE_CRYPTO_MAX_DEVS 64 +#define RTE_CRYPTO_MAX_DEVS 128 #define RTE_CRYPTODEV_NAME_LEN 64 #define RTE_CRYPTO_CALLBACKS 1 From 530d0d75b1f1cfdf925609d8828ad38aa8af2f67 Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Fri, 22 Sep 2023 10:59:31 +0200 Subject: [PATCH 05/28] mem: allow using ASan in multi-process mode Multi-process applications operate on shared hugepage memory but each process has its own ASan shadow region which is not synchronized with the other processes. This causes issues when different processes try to use the same memory because they have their own view of which addresses are valid. Fix it by mapping the shadow regions for memseg lists as shared memory. The primary process is responsible for creating and removing the shared memory objects. Disable ASan instrumentation for triggering the page fault in alloc_seg() because if the segment is already allocated by another process and is marked as free in the shadow, accessing this address will cause an ASan error. Signed-off-by: Artur Paszkiewicz Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/21097 (spdk-23.11) (cherry picked from commit f2cd1fb8eec58d190af52dac47ff9013ce084a9f) Change-Id: I2bb6ae100d080aad30ee44d7e5d200962f74d1a8 Signed-off-by: Sebastian Brzezinka Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/23151 Tested-by: SPDK CI Jenkins Reviewed-by: Tomasz Zawadzki Reviewed-by: Jim Harris --- lib/eal/common/eal_common_memory.c | 7 ++ lib/eal/common/eal_private.h | 35 ++++++++++ lib/eal/linux/eal_memalloc.c | 23 +++++-- lib/eal/linux/eal_memory.c | 101 +++++++++++++++++++++++++++++ lib/eal/linux/meson.build | 4 ++ 5 files changed, 164 insertions(+), 6 deletions(-) diff --git a/lib/eal/common/eal_common_memory.c b/lib/eal/common/eal_common_memory.c index 60ddc30580a..d979c72aa7d 100644 --- a/lib/eal/common/eal_common_memory.c +++ b/lib/eal/common/eal_common_memory.c @@ -263,6 +263,11 @@ eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags) EAL_LOG(DEBUG, "VA reserved for memseg list at %p, size %zx", addr, mem_sz); + if (eal_memseg_list_map_asan_shadow(msl) != 0) { + RTE_LOG(ERR, EAL, "Failed to map ASan shadow region for memseg list"); + return -1; + } + return 0; } @@ -1050,6 +1055,8 @@ rte_eal_memory_detach(void) EAL_LOG(ERR, "Could not unmap memory: %s", rte_strerror(rte_errno)); + eal_memseg_list_unmap_asan_shadow(msl); + /* * we are detaching the fbarray rather than destroying because * other processes might still reference this fbarray, and we diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h index 71523cfdb82..f8a9f574d08 100644 --- a/lib/eal/common/eal_private.h +++ b/lib/eal/common/eal_private.h @@ -302,6 +302,41 @@ eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags); void eal_memseg_list_populate(struct rte_memseg_list *msl, void *addr, int n_segs); +/** + * Map shared memory for MSL ASan shadow region. + * + * @param msl + * Memory segment list. + * @return + * 0 on success, (-1) on failure. + */ +#ifdef RTE_MALLOC_ASAN +int +eal_memseg_list_map_asan_shadow(struct rte_memseg_list *msl); +#else +static inline int +eal_memseg_list_map_asan_shadow(__rte_unused struct rte_memseg_list *msl) +{ + return 0; +} +#endif + +/** + * Unmap the MSL ASan shadow region. + * + * @param msl + * Memory segment list. + */ +#ifdef RTE_MALLOC_ASAN +void +eal_memseg_list_unmap_asan_shadow(struct rte_memseg_list *msl); +#else +static inline void +eal_memseg_list_unmap_asan_shadow(__rte_unused struct rte_memseg_list *msl) +{ +} +#endif + /** * Distribute available memory between MSLs. * diff --git a/lib/eal/linux/eal_memalloc.c b/lib/eal/linux/eal_memalloc.c index 0cc32959942..40b18bee415 100644 --- a/lib/eal/linux/eal_memalloc.c +++ b/lib/eal/linux/eal_memalloc.c @@ -511,6 +511,21 @@ resize_hugefile(int fd, uint64_t fa_offset, uint64_t page_sz, bool grow, grow, dirty); } +__rte_no_asan +static inline void +page_fault(void *addr) +{ + /* We need to trigger a write to the page to enforce page fault but we + * can't overwrite value that is already there, so read the old value + * and write it back. Kernel populates the page with zeroes initially. + * + * Disable ASan instrumentation here because if the segment is already + * allocated by another process and is marked as free in the shadow, + * accessing this address will cause an ASan error. + */ + *(volatile int *)addr = *(volatile int *)addr; +} + static int alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, struct hugepage_info *hi, unsigned int list_idx, @@ -636,12 +651,8 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, goto mapped; } - /* we need to trigger a write to the page to enforce page fault and - * ensure that page is accessible to us, but we can't overwrite value - * that is already there, so read the old value, and write itback. - * kernel populates the page with zeroes initially. - */ - *(volatile int *)addr = *(volatile int *)addr; + /* enforce page fault and ensure that page is accessible to us */ + page_fault(addr); iova = rte_mem_virt2iova(addr); if (iova == RTE_BAD_PHYS_ADDR) { diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c index 45879ca7434..b440b449dc0 100644 --- a/lib/eal/linux/eal_memory.c +++ b/lib/eal/linux/eal_memory.c @@ -41,6 +41,7 @@ #include "eal_filesystem.h" #include "eal_hugepages.h" #include "eal_options.h" +#include "malloc_elem.h" #define PFN_MASK_SIZE 8 @@ -1469,6 +1470,7 @@ eal_legacy_hugepage_init(void) if (msl->memseg_arr.count > 0) continue; /* this is an unused list, deallocate it */ + eal_memseg_list_unmap_asan_shadow(msl); mem_sz = msl->len; munmap(msl->base_va, mem_sz); msl->base_va = NULL; @@ -1956,3 +1958,102 @@ rte_eal_memseg_init(void) #endif memseg_secondary_init(); } + +#ifdef RTE_MALLOC_ASAN +int +eal_memseg_list_map_asan_shadow(struct rte_memseg_list *msl) +{ + const struct internal_config *internal_conf = + eal_get_internal_configuration(); + void *addr; + void *shadow_addr; + size_t shadow_sz; + int shm_oflag; + char shm_path[PATH_MAX]; + int shm_fd; + int ret = 0; + + if (!msl->heap) + return 0; + + /* these options imply no secondary process support */ + if (internal_conf->hugepage_file.unlink_before_mapping || + internal_conf->no_shconf || internal_conf->no_hugetlbfs) { + RTE_ASSERT(rte_eal_process_type() != RTE_PROC_SECONDARY); + return 0; + } + + shadow_addr = ASAN_MEM_TO_SHADOW(msl->base_va); + shadow_sz = msl->len >> ASAN_SHADOW_SCALE; + + snprintf(shm_path, sizeof(shm_path), "/%s_%s_shadow", + eal_get_hugefile_prefix(), msl->memseg_arr.name); + + shm_oflag = O_RDWR; + if (internal_conf->process_type == RTE_PROC_PRIMARY) + shm_oflag |= O_CREAT | O_TRUNC; + + shm_fd = shm_open(shm_path, shm_oflag, 0600); + if (shm_fd == -1) { + RTE_LOG(DEBUG, EAL, "shadow shm_open() failed: %s\n", + strerror(errno)); + return -1; + } + + if (internal_conf->process_type == RTE_PROC_PRIMARY) { + ret = ftruncate(shm_fd, shadow_sz); + if (ret == -1) { + RTE_LOG(DEBUG, EAL, "shadow ftruncate() failed: %s\n", + strerror(errno)); + goto out; + } + } + + addr = mmap(shadow_addr, shadow_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, shm_fd, 0); + if (addr == MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "shadow mmap() failed: %s\n", + strerror(errno)); + ret = -1; + goto out; + } + + if (addr != shadow_addr) { + RTE_LOG(DEBUG, EAL, "wrong shadow mmap() address\n"); + munmap(addr, shadow_sz); + ret = -1; + } +out: + close(shm_fd); + if (ret != 0) { + if (internal_conf->process_type == RTE_PROC_PRIMARY) + shm_unlink(shm_path); + } + + return ret; +} + +void +eal_memseg_list_unmap_asan_shadow(struct rte_memseg_list *msl) +{ + const struct internal_config *internal_conf = + eal_get_internal_configuration(); + + if (!msl->heap || internal_conf->hugepage_file.unlink_before_mapping || + internal_conf->no_shconf || internal_conf->no_hugetlbfs) + return; + + if (munmap(ASAN_MEM_TO_SHADOW(msl->base_va), + msl->len >> ASAN_SHADOW_SCALE) != 0) + RTE_LOG(ERR, EAL, "Could not unmap asan shadow memory: %s\n", + strerror(errno)); + if (internal_conf->process_type == RTE_PROC_PRIMARY) { + char shm_path[PATH_MAX]; + + snprintf(shm_path, sizeof(shm_path), "/%s_%s_shadow", + eal_get_hugefile_prefix(), + msl->memseg_arr.name); + shm_unlink(shm_path); + } +} +#endif diff --git a/lib/eal/linux/meson.build b/lib/eal/linux/meson.build index e99ebed2569..1e8a48c8d32 100644 --- a/lib/eal/linux/meson.build +++ b/lib/eal/linux/meson.build @@ -23,3 +23,7 @@ deps += ['kvargs', 'telemetry'] if has_libnuma dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true) endif + +if dpdk_conf.has('RTE_MALLOC_ASAN') + ext_deps += cc.find_library('rt') +endif From a086ce6b8f202d18250610bb58e14a74875c89a6 Mon Sep 17 00:00:00 2001 From: Tomasz Zawadzki Date: Wed, 1 Dec 2021 11:39:08 +0100 Subject: [PATCH 06/28] meson: remove checks for optional libraries Very few libraries in DPDK are marked as optional. For SPDK when most of the drivers are disabled, the requirements are much lower. By removing the check for optional libraries, it is possible to pass a narrow set of actually required libraries. Signed-off-by: Tomasz Zawadzki Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/21095 (spdk-23.11) (cherry picked from commit 792dc9824d8d15c9f9fcfbe87c0b05242306a26b) Change-Id: I60fbc7307a4f33482025a3b3c00948c091d236ff Signed-off-by: Sebastian Brzezinka Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/22687 Tested-by: SPDK CI Jenkins Reviewed-by: Tomasz Zawadzki Reviewed-by: Jim Harris --- lib/meson.build | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lib/meson.build b/lib/meson.build index 179a2729326..9c7f0b517c7 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -159,12 +159,8 @@ foreach l:libraries build = false reason = 'not in enabled libraries build config' elif disable_libs.contains(l) - if always_enable.contains(l) - warning('Cannot disable mandatory library "@0@"'.format(l)) - else - build = false - reason = 'explicitly disabled via build config' - endif + build = false + reason = 'explicitly disabled via build config' endif if build From bbbcf016e3c77820fc8dfe15af3dd805bc775172 Mon Sep 17 00:00:00 2001 From: Krishna Kanth Reddy Date: Wed, 14 Jul 2021 13:03:31 +0530 Subject: [PATCH 07/28] ARM64: Cross-Compilation Support Modified the Configuration file to use the latest ARM Cross-Compiler. Fixed the linker errors for the undefined references to the APIs isal_deflate_init, isal_deflate, isal_inflate_init, isal_inflate, isal_inflate_stateless, isal_deflate_stateless, isal_deflate_set_hufftables in the case of ARM Cross-Compilation. Signed-off-by: Krishna Kanth Reddy Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/21094 (spdk-23.11) (cherry picked from commit 26bb8ea9748890596904a90c6d1df9ff501975e9) Change-Id: I0ba89e5640760276646d6b9211585ad116ebf446 Signed-off-by: Sebastian Brzezinka Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/22686 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Tomasz Zawadzki --- config/arm/arm64_armv8_linux_gcc | 10 +++++----- drivers/compress/isal/meson.build | 4 ++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/config/arm/arm64_armv8_linux_gcc b/config/arm/arm64_armv8_linux_gcc index 529694b49dd..6dfb8aa206e 100644 --- a/config/arm/arm64_armv8_linux_gcc +++ b/config/arm/arm64_armv8_linux_gcc @@ -1,9 +1,9 @@ [binaries] -c = ['ccache', 'aarch64-linux-gnu-gcc'] -cpp = ['ccache', 'aarch64-linux-gnu-g++'] -ar = 'aarch64-linux-gnu-gcc-ar' -strip = 'aarch64-linux-gnu-strip' -pkgconfig = 'aarch64-linux-gnu-pkg-config' +c = ['ccache', 'aarch64-none-linux-gnu-gcc'] +cpp = ['ccache', 'aarch64-none-linux-gnu-cpp'] +ar = 'aarch64-none-linux-gnu-gcc-ar' +strip = 'aarch64-none-linux-gnu-strip' +pkgconfig = 'aarch64-none-linux-gnu-pkg-config' pcap-config = '' [host_machine] diff --git a/drivers/compress/isal/meson.build b/drivers/compress/isal/meson.build index 03294c71e7a..5daf24d1135 100644 --- a/drivers/compress/isal/meson.build +++ b/drivers/compress/isal/meson.build @@ -5,6 +5,10 @@ dep = dependency('libisal', required: false, method: 'pkg-config') if not dep.found() build = true reason = 'missing dependency, "libisal"' + isal_dep = cc.find_library('libisal', required: false) + if isal_dep.found() + ext_deps += isal_dep + endif endif deps += 'bus_vdev' From 08f3a46de70afff49f55d175de690b5ad7e4a44d Mon Sep 17 00:00:00 2001 From: Tomasz Zawadzki Date: Tue, 19 Dec 2023 13:58:40 +0100 Subject: [PATCH 08/28] pmdinfogen: avoid empty string in ELFSymbol() Starting with Clang 17 the list of pmds could contain empty string. Please see: https://bugs.dpdk.org/show_bug.cgi?id=1313 This is a fix proposed by alialnu@nvidia.com in the issue above. Signed-off-by: Tomasz Zawadzki Change-Id: Ic797fb39b6676d27aab0acdfdf79056ec03bbb35 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/21135/ (spdk-23.11) Signed-off-by: Sebastian Brzezinka Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/23194 Reviewed-by: Konrad Sztyber Tested-by: SPDK CI Jenkins --- buildtools/pmdinfogen.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/buildtools/pmdinfogen.py b/buildtools/pmdinfogen.py index 2a44f17bdad..2b298726493 100755 --- a/buildtools/pmdinfogen.py +++ b/buildtools/pmdinfogen.py @@ -71,7 +71,9 @@ def find_by_prefix(self, prefix): for i in range(self._symtab.num_symbols()): symbol = self._symtab.get_symbol(i) if symbol.name.startswith(prefix): - yield ELFSymbol(self._image, symbol) + elf_symbol = ELFSymbol(self._image, symbol) + if elf_symbol.string_value: + yield elf_symbol class COFFSymbol: From 9e645c3bcbdcfbefbea6e0f4096b838ecb9fe42f Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Wed, 12 Jun 2024 13:43:57 +0200 Subject: [PATCH 09/28] mem: map ASan shadow shm after mapping the segment Due to a change in ASan behavior[1] the mapped shadow shared memory regions are remapped later, when segments are mapped. So instead of mapping the whole shadow region when reserving the memseg list memory, map only the fragments corresponding to the segments after they are mapped. [1] https://github.com/llvm/llvm-project/commit/a34e702aa16fde4cc76e9360d985a64e008e0b23 Signed-off-by: Artur Paszkiewicz Change-Id: Ia9881639ddeb158da6e6590f3fef95e314e2a33d Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/23659 Reviewed-by: Jim Harris Reviewed-by: Tomasz Zawadzki Tested-by: SPDK CI Jenkins --- lib/eal/common/eal_common_memory.c | 3 ++ lib/eal/include/rte_memory.h | 3 ++ lib/eal/linux/eal_memalloc.c | 30 ++++++++++++++++++++ lib/eal/linux/eal_memory.c | 44 ++++++++---------------------- 4 files changed, 47 insertions(+), 33 deletions(-) diff --git a/lib/eal/common/eal_common_memory.c b/lib/eal/common/eal_common_memory.c index d979c72aa7d..d99f2496fc6 100644 --- a/lib/eal/common/eal_common_memory.c +++ b/lib/eal/common/eal_common_memory.c @@ -211,6 +211,9 @@ eal_memseg_list_init_named(struct rte_memseg_list *msl, const char *name, msl->socket_id = socket_id; msl->base_va = NULL; msl->heap = heap; +#ifdef RTE_MALLOC_ASAN + msl->shm_fd = -1; +#endif EAL_LOG(DEBUG, "Memseg list allocated at socket %i, page size 0x%"PRIx64"kB", diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h index 842362d5272..1fcd1b02ca1 100644 --- a/lib/eal/include/rte_memory.h +++ b/lib/eal/include/rte_memory.h @@ -78,6 +78,9 @@ struct rte_memseg_list { unsigned int external; /**< 1 if this list points to external memory */ unsigned int heap; /**< 1 if this list points to a heap */ struct rte_fbarray memseg_arr; +#ifdef RTE_MALLOC_ASAN + int shm_fd; +#endif }; /** diff --git a/lib/eal/linux/eal_memalloc.c b/lib/eal/linux/eal_memalloc.c index 40b18bee415..2ef3732e276 100644 --- a/lib/eal/linux/eal_memalloc.c +++ b/lib/eal/linux/eal_memalloc.c @@ -37,6 +37,7 @@ #include "eal_memalloc.h" #include "eal_memcfg.h" #include "eal_private.h" +#include "malloc_elem.h" const int anonymous_hugepages_supported = #ifdef MAP_HUGE_SHIFT @@ -688,6 +689,35 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, __func__); #endif +#ifdef RTE_MALLOC_ASAN + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl = &mcfg->memsegs[list_idx]; + + if (msl->shm_fd != -1) { + void *shadow_base_addr, *shadow_addr; + off_t shadow_map_offset; + size_t shadow_sz; + + shadow_base_addr = ASAN_MEM_TO_SHADOW(msl->base_va); + shadow_addr = ASAN_MEM_TO_SHADOW(addr); + shadow_map_offset = (char *)shadow_addr - (char *)shadow_base_addr; + shadow_sz = alloc_sz >> ASAN_SHADOW_SCALE; + + va = mmap(shadow_addr, shadow_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, msl->shm_fd, shadow_map_offset); + if (va == MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "shadow mmap() failed: %s\n", + strerror(errno)); + goto mapped; + } + + if (va != shadow_addr) { + RTE_LOG(DEBUG, EAL, "wrong shadow mmap() address\n"); + munmap(addr, shadow_sz); + goto mapped; + } + } +#endif huge_recover_sigbus(); ms->addr = addr; diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c index b440b449dc0..7e3192f6836 100644 --- a/lib/eal/linux/eal_memory.c +++ b/lib/eal/linux/eal_memory.c @@ -1965,13 +1965,9 @@ eal_memseg_list_map_asan_shadow(struct rte_memseg_list *msl) { const struct internal_config *internal_conf = eal_get_internal_configuration(); - void *addr; - void *shadow_addr; - size_t shadow_sz; int shm_oflag; char shm_path[PATH_MAX]; int shm_fd; - int ret = 0; if (!msl->heap) return 0; @@ -1983,9 +1979,6 @@ eal_memseg_list_map_asan_shadow(struct rte_memseg_list *msl) return 0; } - shadow_addr = ASAN_MEM_TO_SHADOW(msl->base_va); - shadow_sz = msl->len >> ASAN_SHADOW_SCALE; - snprintf(shm_path, sizeof(shm_path), "/%s_%s_shadow", eal_get_hugefile_prefix(), msl->memseg_arr.name); @@ -2001,36 +1994,19 @@ eal_memseg_list_map_asan_shadow(struct rte_memseg_list *msl) } if (internal_conf->process_type == RTE_PROC_PRIMARY) { - ret = ftruncate(shm_fd, shadow_sz); - if (ret == -1) { + if (ftruncate(shm_fd, msl->len >> ASAN_SHADOW_SCALE) == -1) { RTE_LOG(DEBUG, EAL, "shadow ftruncate() failed: %s\n", strerror(errno)); - goto out; + close(shm_fd); + if (internal_conf->process_type == RTE_PROC_PRIMARY) + shm_unlink(shm_path); + return -1; } } - addr = mmap(shadow_addr, shadow_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, shm_fd, 0); - if (addr == MAP_FAILED) { - RTE_LOG(DEBUG, EAL, "shadow mmap() failed: %s\n", - strerror(errno)); - ret = -1; - goto out; - } - - if (addr != shadow_addr) { - RTE_LOG(DEBUG, EAL, "wrong shadow mmap() address\n"); - munmap(addr, shadow_sz); - ret = -1; - } -out: - close(shm_fd); - if (ret != 0) { - if (internal_conf->process_type == RTE_PROC_PRIMARY) - shm_unlink(shm_path); - } + msl->shm_fd = shm_fd; - return ret; + return 0; } void @@ -2039,10 +2015,12 @@ eal_memseg_list_unmap_asan_shadow(struct rte_memseg_list *msl) const struct internal_config *internal_conf = eal_get_internal_configuration(); - if (!msl->heap || internal_conf->hugepage_file.unlink_before_mapping || - internal_conf->no_shconf || internal_conf->no_hugetlbfs) + if (msl->shm_fd == -1) return; + close(msl->shm_fd); + msl->shm_fd = -1; + if (munmap(ASAN_MEM_TO_SHADOW(msl->base_va), msl->len >> ASAN_SHADOW_SCALE) != 0) RTE_LOG(ERR, EAL, "Could not unmap asan shadow memory: %s\n", From 2af0a473a84ac8e9e253db29996541a059a3d9d1 Mon Sep 17 00:00:00 2001 From: Artur Paszkiewicz Date: Fri, 30 Aug 2024 14:51:30 +0200 Subject: [PATCH 10/28] mem: fix ASan shadow shared mem mapping msl->shm_fd was in shared memory and a secondary process could change it, causing the primary process to map wrong files into the shadow region. Fix it by keeping the file descriptors in a private array in each process. Signed-off-by: Artur Paszkiewicz Change-Id: Iae2a13b3f054bdf52b1ff1c3e24ea155972f8caf Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/24763 Reviewed-by: Konrad Sztyber Reviewed-by: Jim Harris Tested-by: SPDK CI Jenkins --- lib/eal/common/eal_common_memory.c | 3 --- lib/eal/common/eal_private.h | 19 +++++++++++++++++++ lib/eal/include/rte_memory.h | 3 --- lib/eal/linux/eal_memalloc.c | 8 ++++---- lib/eal/linux/eal_memory.c | 29 +++++++++++++++++++++++++---- 5 files changed, 48 insertions(+), 14 deletions(-) diff --git a/lib/eal/common/eal_common_memory.c b/lib/eal/common/eal_common_memory.c index d99f2496fc6..d979c72aa7d 100644 --- a/lib/eal/common/eal_common_memory.c +++ b/lib/eal/common/eal_common_memory.c @@ -211,9 +211,6 @@ eal_memseg_list_init_named(struct rte_memseg_list *msl, const char *name, msl->socket_id = socket_id; msl->base_va = NULL; msl->heap = heap; -#ifdef RTE_MALLOC_ASAN - msl->shm_fd = -1; -#endif EAL_LOG(DEBUG, "Memseg list allocated at socket %i, page size 0x%"PRIx64"kB", diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h index f8a9f574d08..9b751c8ab4e 100644 --- a/lib/eal/common/eal_private.h +++ b/lib/eal/common/eal_private.h @@ -337,6 +337,25 @@ eal_memseg_list_unmap_asan_shadow(__rte_unused struct rte_memseg_list *msl) } #endif +/** + * Get the MSL ASan shadow shared memory object file descriptor. + * + * @param msl + * Index of the MSL. + * @return + * A file descriptor. + */ +#ifdef RTE_MALLOC_ASAN +int +eal_memseg_list_get_asan_shadow_fd(int msl_idx); +#else +static inline int +eal_memseg_list_get_asan_shadow_fd(__rte_unused int msl_idx) +{ + return -1; +} +#endif + /** * Distribute available memory between MSLs. * diff --git a/lib/eal/include/rte_memory.h b/lib/eal/include/rte_memory.h index 1fcd1b02ca1..842362d5272 100644 --- a/lib/eal/include/rte_memory.h +++ b/lib/eal/include/rte_memory.h @@ -78,9 +78,6 @@ struct rte_memseg_list { unsigned int external; /**< 1 if this list points to external memory */ unsigned int heap; /**< 1 if this list points to a heap */ struct rte_fbarray memseg_arr; -#ifdef RTE_MALLOC_ASAN - int shm_fd; -#endif }; /** diff --git a/lib/eal/linux/eal_memalloc.c b/lib/eal/linux/eal_memalloc.c index 2ef3732e276..10d6c38dc78 100644 --- a/lib/eal/linux/eal_memalloc.c +++ b/lib/eal/linux/eal_memalloc.c @@ -691,20 +691,20 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, #ifdef RTE_MALLOC_ASAN struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct rte_memseg_list *msl = &mcfg->memsegs[list_idx]; + int shadow_shm_fd = eal_memseg_list_get_asan_shadow_fd(list_idx); - if (msl->shm_fd != -1) { + if (shadow_shm_fd != -1) { void *shadow_base_addr, *shadow_addr; off_t shadow_map_offset; size_t shadow_sz; - shadow_base_addr = ASAN_MEM_TO_SHADOW(msl->base_va); + shadow_base_addr = ASAN_MEM_TO_SHADOW(mcfg->memsegs[list_idx].base_va); shadow_addr = ASAN_MEM_TO_SHADOW(addr); shadow_map_offset = (char *)shadow_addr - (char *)shadow_base_addr; shadow_sz = alloc_sz >> ASAN_SHADOW_SCALE; va = mmap(shadow_addr, shadow_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, msl->shm_fd, shadow_map_offset); + MAP_SHARED | MAP_FIXED, shadow_shm_fd, shadow_map_offset); if (va == MAP_FAILED) { RTE_LOG(DEBUG, EAL, "shadow mmap() failed: %s\n", strerror(errno)); diff --git a/lib/eal/linux/eal_memory.c b/lib/eal/linux/eal_memory.c index 7e3192f6836..cbe7649bfcf 100644 --- a/lib/eal/linux/eal_memory.c +++ b/lib/eal/linux/eal_memory.c @@ -1917,6 +1917,10 @@ memseg_secondary_init(void) return 0; } +#ifdef RTE_MALLOC_ASAN +static int msl_asan_shadow_fd[RTE_MAX_MEMSEG_LISTS]; +#endif + int rte_eal_memseg_init(void) { @@ -1926,6 +1930,9 @@ rte_eal_memseg_init(void) #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES const struct internal_config *internal_conf = eal_get_internal_configuration(); +#endif +#ifdef RTE_MALLOC_ASAN + int msl_idx; #endif if (getrlimit(RLIMIT_NOFILE, &lim) == 0) { /* set limit to maximum */ @@ -1949,6 +1956,11 @@ rte_eal_memseg_init(void) EAL_LOG(WARNING, "Please use --"OPT_LEGACY_MEM" option, or recompile with NUMA support."); } #endif +#ifdef RTE_MALLOC_ASAN + for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { + msl_asan_shadow_fd[msl_idx] = -1; + } +#endif return rte_eal_process_type() == RTE_PROC_PRIMARY ? #ifndef RTE_ARCH_64 @@ -1965,6 +1977,7 @@ eal_memseg_list_map_asan_shadow(struct rte_memseg_list *msl) { const struct internal_config *internal_conf = eal_get_internal_configuration(); + int msl_idx = msl - rte_eal_get_configuration()->mem_config->memsegs; int shm_oflag; char shm_path[PATH_MAX]; int shm_fd; @@ -2004,7 +2017,7 @@ eal_memseg_list_map_asan_shadow(struct rte_memseg_list *msl) } } - msl->shm_fd = shm_fd; + msl_asan_shadow_fd[msl_idx] = shm_fd; return 0; } @@ -2014,12 +2027,14 @@ eal_memseg_list_unmap_asan_shadow(struct rte_memseg_list *msl) { const struct internal_config *internal_conf = eal_get_internal_configuration(); + int msl_idx = msl - rte_eal_get_configuration()->mem_config->memsegs; + int *shm_fd = &msl_asan_shadow_fd[msl_idx]; - if (msl->shm_fd == -1) + if (*shm_fd == -1) return; - close(msl->shm_fd); - msl->shm_fd = -1; + close(*shm_fd); + *shm_fd = -1; if (munmap(ASAN_MEM_TO_SHADOW(msl->base_va), msl->len >> ASAN_SHADOW_SCALE) != 0) @@ -2034,4 +2049,10 @@ eal_memseg_list_unmap_asan_shadow(struct rte_memseg_list *msl) shm_unlink(shm_path); } } + +int +eal_memseg_list_get_asan_shadow_fd(int msl_idx) +{ + return msl_asan_shadow_fd[msl_idx]; +} #endif From 8d8db71763ccc7f092e3d66dbc8b32deaaf69fb4 Mon Sep 17 00:00:00 2001 From: Wojciech Panfil Date: Sun, 21 Jul 2024 18:12:10 +0200 Subject: [PATCH 11/28] eal/alarm_cancel: Fix thread starvation Issue: Two threads: - A, executing rte_eal_alarm_cancel, - B, executing eal_alarm_callback. Such case can cause starvation of thread B. Please see that there is a small time window between lock and unlock in thread A, so thread B must be switched to within a very small time window, so that it can obtain the lock. Solution to this problem is use sched_yield(), which puts current thread (A) at the end of thread execution priority queue and allows thread B to execute. The issue can be observed e.g. on hot-pluggable device detach path. On such path, rte_alarm can used to check if DPDK has completed the detachment. Waiting for completion, rte_eal_alarm_cancel is called, while another thread periodically calls eal_alarm_callback causing the issue to occur. Change-Id: I00256e0d29fd507443fcc1784bfa916f1af7d213 Signed-off-by: Wojciech Panfil Reviewed-on: https://review.spdk.io/gerrit/c/spdk/dpdk/+/24275 Reviewed-by: Jacek Kalwas Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Konrad Sztyber --- lib/eal/freebsd/eal_alarm.c | 5 +++++ lib/eal/linux/eal_alarm.c | 5 +++++ lib/eal/windows/eal_alarm.c | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/lib/eal/freebsd/eal_alarm.c b/lib/eal/freebsd/eal_alarm.c index 94cae5f4b67..8425b4f5a21 100644 --- a/lib/eal/freebsd/eal_alarm.c +++ b/lib/eal/freebsd/eal_alarm.c @@ -318,7 +318,12 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) } ap_prev = ap; } + rte_spinlock_unlock(&alarm_list_lk); + + /* Yield control to a second thread executing eal_alarm_callback to avoid its starvation, + * as it is waiting for the lock we have just released. */ + sched_yield(); } while (executing != 0); if (count == 0 && err == 0) diff --git a/lib/eal/linux/eal_alarm.c b/lib/eal/linux/eal_alarm.c index eeb096213b3..5326b1895ff 100644 --- a/lib/eal/linux/eal_alarm.c +++ b/lib/eal/linux/eal_alarm.c @@ -248,7 +248,12 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) } ap_prev = ap; } + rte_spinlock_unlock(&alarm_list_lk); + + /* Yield control to a second thread executing eal_alarm_callback to avoid its starvation, + * as it is waiting for the lock we have just released. */ + sched_yield(); } while (executing != 0); if (count == 0 && err == 0) diff --git a/lib/eal/windows/eal_alarm.c b/lib/eal/windows/eal_alarm.c index 052af4b21b7..43e8d7881f4 100644 --- a/lib/eal/windows/eal_alarm.c +++ b/lib/eal/windows/eal_alarm.c @@ -211,6 +211,10 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) } rte_spinlock_unlock(&alarm_lock); + + /* Yield control to a second thread executing eal_alarm_callback to avoid its starvation, + * as it is waiting for the lock we have just released. */ + SwitchToThread(); } while (executing); rte_eal_trace_alarm_cancel(cb_fn, cb_arg, removed); From 89850ad3213b33717c0775a9a158931e970cc82f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 8 Jan 2026 12:24:41 -0800 Subject: [PATCH 12/28] doc: split pcap and ring drivers guide The documentation had combined to unrelated drivers together. Use AI to split into two separate files: pcap.rst for the pcap PMD and ring.rst for the ring PMD. Changes to pcap.rst: - Use "pcap" consistently instead of mixed "libpcap/pcap/PCAP" naming - Remove Linux-specific references; document support for Linux, FreeBSD, and Windows - Add reference to upstream libpcap documentation - Add multi-queue support section explaining queue count determination and file handle limitations - Use ``--vdev=net_pcap0`` format consistently - Remove deprecated rte_eth_from_pcaps() API section - Improve technical documentation style throughout Changes to ring.rst: - Use ``--vdev=net_ring0`` format consistently - Fix inconsistent "Rings-based/Ring-based" naming - Retain rte_eth_from_rings() API section with usage examples - Improve technical documentation style throughout Signed-off-by: Stephen Hemminger Signed-off-by: Thomas Monjalon --- doc/guides/nics/index.rst | 3 +- doc/guides/nics/pcap.rst | 249 ++++++++++++++++++++++++++ doc/guides/nics/pcap_ring.rst | 318 ---------------------------------- doc/guides/nics/ring.rst | 118 +++++++++++++ 4 files changed, 369 insertions(+), 319 deletions(-) create mode 100644 doc/guides/nics/pcap.rst delete mode 100644 doc/guides/nics/pcap_ring.rst create mode 100644 doc/guides/nics/ring.rst diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst index b00ed998c53..cb818284fea 100644 --- a/doc/guides/nics/index.rst +++ b/doc/guides/nics/index.rst @@ -60,10 +60,11 @@ Network Interface Controller Drivers null octeon_ep octeontx - pcap_ring + pcap pfe qede r8169 + ring rnp sfc_efx softnic diff --git a/doc/guides/nics/pcap.rst b/doc/guides/nics/pcap.rst new file mode 100644 index 00000000000..fbfe854bb10 --- /dev/null +++ b/doc/guides/nics/pcap.rst @@ -0,0 +1,249 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2015 Intel Corporation. + +Pcap Poll Mode Driver +===================== + +The pcap-based PMD (**librte_net_pcap**) reads and writes packets using the pcap library, +both from files on disk and from physical NIC devices using standard kernel drivers. + +For more information about the pcap library, see the +`libpcap documentation `_. + +.. note:: + + The pcap-based PMD requires the libpcap development files to be installed. + This applies to all supported operating systems: Linux, FreeBSD, and Windows. + + +Using the Driver from the EAL Command Line +------------------------------------------ + +DPDK allows pseudo-Ethernet devices, as the pcap driver, +to be created at application startup time during EAL initialization. + +To do so, pass the ``--vdev=net_pcap0`` parameter to the EAL. +This parameter accepts options to allocate and use pcap-based Ethernet +transparently by the application. +This can be used, for example, for testing on a virtual machine +where there are no Ethernet ports. + +The device name must start with the ``net_pcap`` prefix followed by numbers or letters. +The name must be unique for each device. +Each device can have multiple stream options and multiple devices can be used. +Multiple device definitions can be specified using multiple ``--vdev`` arguments. +Device name and stream options must be separated by commas as shown below: + +.. code-block:: console + + dpdk-testpmd -l 0-3 \ + --vdev 'net_pcap0,stream_opt0=..,stream_opt1=..' \ + --vdev='net_pcap1,stream_opt0=..' + +Device Streams +~~~~~~~~~~~~~~ + +Stream definitions can be combined as long as one of the following two rules is met: + +* A device is provided with two different streams - reception and transmission. +* A device is provided with one network interface name used for reading and writing packets. + +The stream types are: + +``rx_pcap`` + + Defines a reception stream based on a pcap file. + The driver reads each packet within the given pcap file + as if it was receiving it from the wire. + The value is a path to a valid pcap file:: + + rx_pcap=/path/to/file.pcap + +``tx_pcap`` + + Defines a transmission stream based on a pcap file. + The driver writes each received packet to the given pcap file. + The file is overwritten if it already exists and it is created if it does not. + The value is a path to a pcap file:: + + tx_pcap=/path/to/file.pcap + +``rx_iface`` + + Defines a reception stream based on a network interface name. + The driver reads packets from the given interface + using the kernel driver for that interface. + The driver captures both the incoming and outgoing packets on that interface. + The value is an interface name:: + + rx_iface=eth0 + +``rx_iface_in`` + + Defines a reception stream based on a network interface name. + The driver reads packets from the given interface + using the kernel driver for that interface. + The driver captures only the incoming packets on that interface. + The value is an interface name:: + + rx_iface_in=eth0 + +``tx_iface`` + + Defines a transmission stream based on a network interface name. + The driver sends packets to the given interface + using the kernel driver for that interface. + The value is an interface name:: + + tx_iface=eth0 + +``iface`` + + Defines a device mapping a network interface. + The driver both reads and writes packets from and to the given interface. + The value is an interface name:: + + iface=eth0 + +Multi-queue Support +~~~~~~~~~~~~~~~~~~~ + +The pcap PMD supports multiple receive and transmit queues. +The number of receive queues is determined +by the number of ``rx_pcap`` or ``rx_iface`` arguments provided. +Similarly, the number of transmit queues is determined +by the number of ``tx_pcap`` or ``tx_iface`` arguments. + +Using the same file for multiple queues is not supported +because the underlying pcap library +does not support concurrent access to a single file handle. + +Runtime Config Options +~~~~~~~~~~~~~~~~~~~~~~ + +* Use pcap interface physical MAC + + When the ``iface=`` configuration is set, + the selected interface's physical MAC address can be used. + This can be done with the ``phy_mac`` devarg, for example:: + + --vdev 'net_pcap0,iface=eth0,phy_mac=1' + +* Use the Rx pcap file to infinitely receive packets + + When the ``rx_pcap=`` configuration is set, + the selected pcap file can be used for basic performance testing. + This can be done with the ``infinite_rx`` devarg, for example:: + + --vdev 'net_pcap0,rx_pcap=file_rx.pcap,infinite_rx=1' + + When this mode is used, it is recommended to drop all packets on transmit + by not providing a ``tx_pcap`` or ``tx_iface``. + + This option is device-wide, + so all queues on a device will either have this enabled or disabled. + This option should only be provided once per device. + +* Drop all packets on transmit + + To drop all packets on transmit for a device, + do not provide a ``tx_pcap`` or ``tx_iface``, for example:: + + --vdev 'net_pcap0,rx_pcap=file_rx.pcap' + + In this case, one Tx drop queue is created for each Rx queue on that device. + +* Receive no packets on Rx + + To run without receiving any packets on Rx, + do not provide a ``rx_pcap`` or ``rx_iface``, for example:: + + --vdev 'net_pcap0,tx_pcap=file_tx.pcap' + + In this case, one dummy Rx queue is created for each Tx queue argument passed. + +Examples of Usage +~~~~~~~~~~~~~~~~~ + +Read packets from one pcap file and write them to another: + +.. code-block:: console + + dpdk-testpmd -l 0-3 \ + --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_pcap=file_tx.pcap' \ + -- --port-topology=chained + +Read packets from a network interface and write them to a pcap file: + +.. code-block:: console + + dpdk-testpmd -l 0-3 \ + --vdev 'net_pcap0,rx_iface=eth0,tx_pcap=file_tx.pcap' \ + -- --port-topology=chained + +Read packets from a pcap file and write them to a network interface: + +.. code-block:: console + + dpdk-testpmd -l 0-3 \ + --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_iface=eth1' \ + -- --port-topology=chained + +Forward packets through 2 network interfaces: + +.. code-block:: console + + dpdk-testpmd -l 0-3 \ + --vdev 'net_pcap0,iface=eth0' --vdev='net_pcap1,iface=eth1' + +Enable 2 Tx queues on a network interface: + +.. code-block:: console + + dpdk-testpmd -l 0-3 \ + --vdev 'net_pcap0,rx_iface=eth1,tx_iface=eth1,tx_iface=eth1' \ + -- --txq 2 + +Read only incoming packets from a network interface +and write them back to the same network interface: + +.. code-block:: console + + dpdk-testpmd -l 0-3 \ + --vdev 'net_pcap0,rx_iface_in=eth1,tx_iface=eth1' + +Using Pcap-based PMD with the testpmd Application +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the first things that testpmd does before starting to forward packets +is to flush the Rx streams by reading the first 512 packets on every Rx stream +and discarding them. +When using a pcap-based PMD, this behavior can be turned off +using the ``--no-flush-rx`` option: + +.. code-block:: console + + --no-flush-rx + +This option is also available in the runtime command line: + +.. code-block:: console + + set flush_rx on/off + +It is useful for the case where the ``rx_pcap`` is being used +and no packets are meant to be discarded. +Otherwise, the first 512 packets from the input pcap file +will be discarded by the Rx flushing operation. + +.. code-block:: console + + dpdk-testpmd -l 0-3 \ + --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_pcap=file_tx.pcap' \ + -- --port-topology=chained --no-flush-rx + +.. note:: + + The network interface provided to the PMD should be up. + The PMD will return an error if the interface is down, + and the PMD itself won't change the status of the external network interface. diff --git a/doc/guides/nics/pcap_ring.rst b/doc/guides/nics/pcap_ring.rst deleted file mode 100644 index 6955e911301..00000000000 --- a/doc/guides/nics/pcap_ring.rst +++ /dev/null @@ -1,318 +0,0 @@ -.. SPDX-License-Identifier: BSD-3-Clause - Copyright(c) 2010-2015 Intel Corporation. - -Libpcap and Ring Based Poll Mode Drivers -======================================== - -In addition to Poll Mode Drivers (PMDs) for physical and virtual hardware, -the DPDK also includes pure-software PMDs, two of these drivers are: - -* A libpcap -based PMD (**librte_net_pcap**) that reads and writes packets using libpcap, - - both from files on disk, as well as from physical NIC devices using standard Linux kernel drivers. - -* A ring-based PMD (**librte_net_ring**) that allows a set of software FIFOs (that is, rte_ring) - to be accessed using the PMD APIs, as though they were physical NICs. - -.. note:: - - The libpcap -based PMD has an external dependency on the libpcap development files which must - be installed on the board. - -Using the Drivers from the EAL Command Line -------------------------------------------- - -For ease of use, the DPDK EAL also has been extended to allow pseudo-Ethernet devices, -using one or more of these drivers, -to be created at application startup time during EAL initialization. - -To do so, the --vdev= parameter must be passed to the EAL. -This takes take options to allow ring and pcap-based Ethernet to be allocated and used transparently by the application. -This can be used, for example, for testing on a virtual machine where there are no Ethernet ports. - -Libpcap-based PMD -~~~~~~~~~~~~~~~~~ - -Pcap-based devices can be created using the virtual device --vdev option. -The device name must start with the net_pcap prefix followed by numbers or letters. -The name is unique for each device. Each device can have multiple stream options and multiple devices can be used. -Multiple device definitions can be arranged using multiple --vdev. -Device name and stream options must be separated by commas as shown below: - -.. code-block:: console - - .//app/dpdk-testpmd -l 0-3 \ - --vdev 'net_pcap0,stream_opt0=..,stream_opt1=..' \ - --vdev='net_pcap1,stream_opt0=..' - -Device Streams -^^^^^^^^^^^^^^ - -Multiple ways of stream definitions can be assessed and combined as long as the following two rules are respected: - -* A device is provided with two different streams - reception and transmission. - -* A device is provided with one network interface name used for reading and writing packets. - -The different stream types are: - -* rx_pcap: Defines a reception stream based on a pcap file. - The driver reads each packet within the given pcap file as if it was receiving it from the wire. - The value is a path to a valid pcap file. - - rx_pcap=/path/to/file.pcap - -* tx_pcap: Defines a transmission stream based on a pcap file. - The driver writes each received packet to the given pcap file. - The value is a path to a pcap file. - The file is overwritten if it already exists and it is created if it does not. - - tx_pcap=/path/to/file.pcap - -* rx_iface: Defines a reception stream based on a network interface name. - The driver reads packets from the given interface using the Linux kernel driver for that interface. - The driver captures both the incoming and outgoing packets on that interface. - The value is an interface name. - - rx_iface=eth0 - -* rx_iface_in: Defines a reception stream based on a network interface name. - The driver reads packets from the given interface using the Linux kernel driver for that interface. - The driver captures only the incoming packets on that interface. - The value is an interface name. - - rx_iface_in=eth0 - -* tx_iface: Defines a transmission stream based on a network interface name. - The driver sends packets to the given interface using the Linux kernel driver for that interface. - The value is an interface name. - - tx_iface=eth0 - -* iface: Defines a device mapping a network interface. - The driver both reads and writes packets from and to the given interface. - The value is an interface name. - - iface=eth0 - -Runtime Config Options -^^^^^^^^^^^^^^^^^^^^^^ - -- Use PCAP interface physical MAC - - In case ``iface=`` configuration is set, user may want to use the selected interface's physical MAC - address. This can be done with a ``devarg`` ``phy_mac``, for example:: - - --vdev 'net_pcap0,iface=eth0,phy_mac=1' - -- Use the RX PCAP file to infinitely receive packets - - In case ``rx_pcap=`` configuration is set, user may want to use the selected PCAP file for rudimental - performance testing. This can be done with a ``devarg`` ``infinite_rx``, for example:: - - --vdev 'net_pcap0,rx_pcap=file_rx.pcap,infinite_rx=1' - - When this mode is used, it is recommended to drop all packets on transmit by not providing a tx_pcap or tx_iface. - - This option is device wide, so all queues on a device will either have this enabled or disabled. - This option should only be provided once per device. - -- Drop all packets on transmit - - The user may want to drop all packets on tx for a device. This can be done by not providing a tx_pcap or tx_iface, for example:: - - --vdev 'net_pcap0,rx_pcap=file_rx.pcap' - - In this case, one tx drop queue is created for each rxq on that device. - - - Receive no packets on Rx - - The user may want to run without receiving any packets on Rx. This can be done by not providing a rx_pcap or rx_iface, for example:: - - --vdev 'net_pcap0,tx_pcap=file_tx.pcap' - -In this case, one dummy rx queue is created for each tx queue argument passed - -Examples of Usage -^^^^^^^^^^^^^^^^^ - -Read packets from one pcap file and write them to another: - -.. code-block:: console - - .//app/dpdk-testpmd -l 0-3 \ - --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_pcap=file_tx.pcap' \ - -- --port-topology=chained - -Read packets from a network interface and write them to a pcap file: - -.. code-block:: console - - .//app/dpdk-testpmd -l 0-3 \ - --vdev 'net_pcap0,rx_iface=eth0,tx_pcap=file_tx.pcap' \ - -- --port-topology=chained - -Read packets from a pcap file and write them to a network interface: - -.. code-block:: console - - .//app/dpdk-testpmd -l 0-3 \ - --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_iface=eth1' \ - -- --port-topology=chained - -Forward packets through two network interfaces: - -.. code-block:: console - - .//app/dpdk-testpmd -l 0-3 \ - --vdev 'net_pcap0,iface=eth0' --vdev='net_pcap1,iface=eth1' - -Enable 2 tx queues on a network interface: - -.. code-block:: console - - .//app/dpdk-testpmd -l 0-3 \ - --vdev 'net_pcap0,rx_iface=eth1,tx_iface=eth1,tx_iface=eth1' \ - -- --txq 2 - -Read only incoming packets from a network interface and write them back to the same network interface: - -.. code-block:: console - - .//app/dpdk-testpmd -l 0-3 \ - --vdev 'net_pcap0,rx_iface_in=eth1,tx_iface=eth1' - -Using libpcap-based PMD with the testpmd Application -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -One of the first things that testpmd does before starting to forward packets is to flush the RX streams -by reading the first 512 packets on every RX stream and discarding them. -When using a libpcap-based PMD this behavior can be turned off using the following command line option: - -.. code-block:: console - - --no-flush-rx - -It is also available in the runtime command line: - -.. code-block:: console - - set flush_rx on/off - -It is useful for the case where the rx_pcap is being used and no packets are meant to be discarded. -Otherwise, the first 512 packets from the input pcap file will be discarded by the RX flushing operation. - -.. code-block:: console - - .//app/dpdk-testpmd -l 0-3 \ - --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_pcap=file_tx.pcap' \ - -- --port-topology=chained --no-flush-rx - -.. note:: - - The network interface provided to the PMD should be up. The PMD will return - an error if interface is down, and the PMD itself won't change the status - of the external network interface. - - -Rings-based PMD -~~~~~~~~~~~~~~~ - -To run a DPDK application on a machine without any Ethernet devices, a pair of ring-based rte_ethdevs can be used as below. -The device names passed to the --vdev option must start with net_ring and take no additional parameters. -Multiple devices may be specified, separated by commas. - -.. code-block:: console - - ./dpdk-testpmd -l 1-3 --vdev=net_ring0 --vdev=net_ring1 -- -i - ... - Interactive-mode selected - Configuring Port 0 (socket 0) - Configuring Port 1 (socket 0) - Checking link statuses... - Port 0 Link Up - speed 10000 Mbps - full-duplex - Port 1 Link Up - speed 10000 Mbps - full-duplex - Done - - testpmd> start tx_first - io packet forwarding - CRC stripping disabled - packets/burst=16 - nb forwarding cores=1 - nb forwarding ports=2 - RX queues=1 - RX desc=128 - RX free threshold=0 - RX threshold registers: pthresh=8 hthresh=8 wthresh=4 - TX queues=1 - TX desc=512 - TX free threshold=0 - TX threshold registers: pthresh=36 hthresh=0 wthresh=0 - TX RS bit threshold=0 - TXQ flags=0x0 - - testpmd> stop - Telling cores to stop... - Waiting for lcores to finish... - -.. image:: img/forward_stats.* - -.. code-block:: console - - +++++++++++++++ Accumulated forward statistics for allports++++++++++ - RX-packets: 462384736 RX-dropped: 0 RX-total: 462384736 - TX-packets: 462384768 TX-dropped: 0 TX-total: 462384768 - +++++++++++++++++++++++++++++++++++++++++++++++++++++ - - Done. - - -Using the Poll Mode Driver from an Application -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Both drivers can provide similar APIs to allow the user to create a PMD, that is, -rte_ethdev structure, instances at run-time in the end-application, -for example, using rte_eth_from_rings() or rte_eth_from_pcaps() APIs. -For the rings-based PMD, this functionality could be used, for example, -to allow data exchange between cores using rings to be done in exactly the -same way as sending or receiving packets from an Ethernet device. -For the libpcap-based PMD, it allows an application to open one or more pcap files -and use these as a source of packet input to the application. - -Usage Examples -^^^^^^^^^^^^^^ - -To create two pseudo-Ethernet ports where all traffic sent to a port is looped back -for reception on the same port (error handling omitted for clarity): - -.. code-block:: c - - #define RING_SIZE 256 - #define NUM_RINGS 2 - #define SOCKET0 0 - - struct rte_ring *ring[NUM_RINGS]; - int port0, port1; - - ring[0] = rte_ring_create("R0", RING_SIZE, SOCKET0, RING_F_SP_ENQ|RING_F_SC_DEQ); - ring[1] = rte_ring_create("R1", RING_SIZE, SOCKET0, RING_F_SP_ENQ|RING_F_SC_DEQ); - - /* create two ethdev's */ - - port0 = rte_eth_from_rings("net_ring0", ring, NUM_RINGS, ring, NUM_RINGS, SOCKET0); - port1 = rte_eth_from_rings("net_ring1", ring, NUM_RINGS, ring, NUM_RINGS, SOCKET0); - - -To create two pseudo-Ethernet ports where the traffic is switched between them, -that is, traffic sent to port 0 is read back from port 1 and vice-versa, -the final two lines could be changed as below: - -.. code-block:: c - - port0 = rte_eth_from_rings("net_ring0", &ring[0], 1, &ring[1], 1, SOCKET0); - port1 = rte_eth_from_rings("net_ring1", &ring[1], 1, &ring[0], 1, SOCKET0); - -This type of configuration could be useful in a pipeline model, for example, -where one may want to have inter-core communication using pseudo Ethernet devices rather than raw rings, -for reasons of API consistency. - -Enqueuing and dequeuing items from an rte_ring using the rings-based PMD may be slower than using the native rings API. -This is because DPDK Ethernet drivers make use of function pointers to call the appropriate enqueue or dequeue functions, -while the rte_ring specific functions are direct function calls in the code and are often inlined by the compiler. - - Once an ethdev has been created, for either a ring or a pcap-based PMD, - it should be configured and started in the same way as a regular Ethernet device, that is, - by calling rte_eth_dev_configure() to set the number of receive and transmit queues, - then calling rte_eth_rx_queue_setup() / tx_queue_setup() for each of those queues and - finally calling rte_eth_dev_start() to allow transmission and reception of packets to begin. diff --git a/doc/guides/nics/ring.rst b/doc/guides/nics/ring.rst new file mode 100644 index 00000000000..a6b2458a7f5 --- /dev/null +++ b/doc/guides/nics/ring.rst @@ -0,0 +1,118 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2015 Intel Corporation. + +Ring Based Poll Mode Driver +=========================== + +The ring-based PMD (``librte_net_ring``) allows software FIFOs (rte_ring) +to be accessed using the PMD API, as though they were physical NICs. + + +Using the Driver from the EAL Command Line +------------------------------------------ + +DPDK allows pseudo-Ethernet devices, as the ring driver, +to be created at application startup time during EAL initialization. + +To do so, pass the ``--vdev=net_ring0`` parameter to the EAL. +This parameter accepts options to allocate and use ring-based Ethernet +transparently by the application. +This can be used, for example, for testing on a virtual machine +where there are no Ethernet ports. + +The device names passed to the ``--vdev`` option must start with ``net_ring`` +and take no additional parameters. +Multiple devices may be specified using multiple ``--vdev`` arguments. + +.. code-block:: console + + ./dpdk-testpmd -l 1-3 --vdev=net_ring0 --vdev=net_ring1 -- -i + ... + Interactive-mode selected + Configuring Port 0 (socket 0) + Configuring Port 1 (socket 0) + Checking link statuses... + Port 0 Link Up - speed 10000 Mbps - full-duplex + Port 1 Link Up - speed 10000 Mbps - full-duplex + Done + + testpmd> start tx_first + io packet forwarding - CRC stripping disabled - packets/burst=16 + nb forwarding cores=1 - nb forwarding ports=2 + RX queues=1 - RX desc=128 - RX free threshold=0 + RX threshold registers: pthresh=8 hthresh=8 wthresh=4 + TX queues=1 - TX desc=512 - TX free threshold=0 + TX threshold registers: pthresh=36 hthresh=0 wthresh=0 + TX RS bit threshold=0 - TXQ flags=0x0 + + testpmd> stop + Telling cores to stop... + Waiting for lcores to finish... + +.. image:: img/forward_stats.* + +.. code-block:: console + + +++++++++++++++ Accumulated forward statistics for allports++++++++++ + RX-packets: 462384736 RX-dropped: 0 RX-total: 462384736 + TX-packets: 462384768 TX-dropped: 0 TX-total: 462384768 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ + + Done. + + +Using the Ring-based PMD from an Application +-------------------------------------------- + +The driver provides an API to create PMD (``rte_ethdev`` structure) instances +at run-time in the end-application using the function ``rte_eth_from_rings()``. +This functionality can be used to allow data exchange between cores using rings +in the same way as sending or receiving packets from an Ethernet device. + +Usage Examples +^^^^^^^^^^^^^^ + +To create two pseudo-Ethernet ports where all traffic sent to a port is looped back +for reception on the same port (error handling omitted for clarity): + +.. code-block:: c + + #define RING_SIZE 256 + #define NUM_RINGS 2 + #define SOCKET0 0 + + struct rte_ring *ring[NUM_RINGS]; + int port0, port1; + + ring[0] = rte_ring_create("R0", RING_SIZE, SOCKET0, RING_F_SP_ENQ|RING_F_SC_DEQ); + ring[1] = rte_ring_create("R1", RING_SIZE, SOCKET0, RING_F_SP_ENQ|RING_F_SC_DEQ); + + /* create two ethdev's */ + port0 = rte_eth_from_rings("net_ring0", ring, NUM_RINGS, ring, NUM_RINGS, SOCKET0); + port1 = rte_eth_from_rings("net_ring1", ring, NUM_RINGS, ring, NUM_RINGS, SOCKET0); + + +To create two pseudo-Ethernet ports where the traffic is switched between them +(traffic sent to port 0 is read back from port 1 and vice-versa), +the final two lines can be changed as follows: + +.. code-block:: c + + port0 = rte_eth_from_rings("net_ring0", &ring[0], 1, &ring[1], 1, SOCKET0); + port1 = rte_eth_from_rings("net_ring1", &ring[1], 1, &ring[0], 1, SOCKET0); + +This type of configuration is useful in a pipeline model where inter-core communication +using pseudo Ethernet devices is preferred over raw rings for API consistency. + +Enqueuing and dequeuing items from an ``rte_ring`` +using the ring-based PMD may be slower than using the native ring API. +DPDK Ethernet drivers use function pointers +to call the appropriate enqueue or dequeue functions, +while the ``rte_ring`` specific functions are direct function calls +and are often inlined by the compiler. + +Once an ethdev has been created for a ring-based PMD, +it should be configured and started in the same way as a regular Ethernet device: +call ``rte_eth_dev_configure()`` to set the number of receive and transmit queues, +then call ``rte_eth_rx_queue_setup()`` / ``tx_queue_setup()`` for each of those queues, +and finally call ``rte_eth_dev_start()`` to allow transmission and reception of packets to begin. From ff95e1e24ed1a467dd11e48cdfe1e9d2bb9be8f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Morten=20Br=C3=B8rup?= Date: Thu, 5 Feb 2026 09:28:48 +0000 Subject: [PATCH 13/28] ethdev: fix mbuf fast release requirements description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was unclear if mbuf fast release could support segmented packets, or if mbuf fast release required non-segmented packets. This has now been investigated in detail, and it was concluded that segmented packets can be supported with mbuf fast release still achieving the enhanced performance. So the description of the mbuf fast release Tx offload flag was fixed. Furthermore, the general descriptions of the Rx and Tx offloads were improved, to reflect that they are not only for device capability reporting, but also for device and queue configuration purposes. NB: If a driver does not support segmented packets with mbuf fast release, it can check the multi segment send flag when selecting transmit function. Fixes: 55624173bacb ("mbuf: add raw free and alloc bulk functions") Cc: stable@dpdk.org Signed-off-by: Morten Brørup Acked-by: Stephen Hemminger --- lib/ethdev/rte_ethdev.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index a66c2abbdbf..0d8e2d0236a 100644 --- a/lib/ethdev/rte_ethdev.h +++ b/lib/ethdev/rte_ethdev.h @@ -1578,7 +1578,7 @@ struct rte_eth_conf { }; /** - * Rx offload capabilities of a device. + * Rx offload capabilities/configuration of a device or queue. */ #define RTE_ETH_RX_OFFLOAD_VLAN_STRIP RTE_BIT64(0) #define RTE_ETH_RX_OFFLOAD_IPV4_CKSUM RTE_BIT64(1) @@ -1613,12 +1613,12 @@ struct rte_eth_conf { RTE_ETH_RX_OFFLOAD_QINQ_STRIP) /* - * If new Rx offload capabilities are defined, they also must be + * If new Rx offloads are defined, they also must be * mentioned in rte_rx_offload_names in rte_ethdev.c file. */ /** - * Tx offload capabilities of a device. + * Tx offload capabilities/configuration of a device or queue. */ #define RTE_ETH_TX_OFFLOAD_VLAN_INSERT RTE_BIT64(0) #define RTE_ETH_TX_OFFLOAD_IPV4_CKSUM RTE_BIT64(1) @@ -1639,39 +1639,39 @@ struct rte_eth_conf { * Tx queue without SW lock. */ #define RTE_ETH_TX_OFFLOAD_MT_LOCKFREE RTE_BIT64(14) -/** Device supports multi segment send. */ +/** Multi segment send. */ #define RTE_ETH_TX_OFFLOAD_MULTI_SEGS RTE_BIT64(15) /** - * Device supports optimization for fast release of mbufs. + * Optimization for fast release of mbufs. * When set application must guarantee that per-queue all mbufs come from the same mempool, - * are direct, have refcnt=1, next=NULL and nb_segs=1, as done by rte_pktmbuf_prefree_seg(). + * have refcnt=1, and are direct. * * @see rte_mbuf_raw_free_bulk() */ #define RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE RTE_BIT64(16) #define RTE_ETH_TX_OFFLOAD_SECURITY RTE_BIT64(17) /** - * Device supports generic UDP tunneled packet TSO. + * Generic UDP tunneled packet TSO. * Application must set RTE_MBUF_F_TX_TUNNEL_UDP and other mbuf fields required * for tunnel TSO. */ #define RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO RTE_BIT64(18) /** - * Device supports generic IP tunneled packet TSO. + * Generic IP tunneled packet TSO. * Application must set RTE_MBUF_F_TX_TUNNEL_IP and other mbuf fields required * for tunnel TSO. */ #define RTE_ETH_TX_OFFLOAD_IP_TNL_TSO RTE_BIT64(19) -/** Device supports outer UDP checksum */ +/** Outer UDP checksum. Used for tunneling packet. */ #define RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM RTE_BIT64(20) /** - * Device sends on time read from RTE_MBUF_DYNFIELD_TIMESTAMP_NAME + * Send on time read from RTE_MBUF_DYNFIELD_TIMESTAMP_NAME * if RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME is set in ol_flags. * The mbuf field and flag are registered when the offload is configured. */ #define RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP RTE_BIT64(21) /* - * If new Tx offload capabilities are defined, they also must be + * If new Tx offloads are defined, they also must be * mentioned in rte_tx_offload_names in rte_ethdev.c file. */ From 3ae6853f26f6b49a9172b58a273d0e160ac07c78 Mon Sep 17 00:00:00 2001 From: David Marchand Date: Wed, 19 Nov 2025 09:06:32 +0100 Subject: [PATCH 14/28] crypto/dpaa2_sec: use bus device name dpaa2_sec_dev_init() sets the crypto device name again after it has been set by rte_cryptodev_pmd_create/allocate(). Overwriting its value could end up as a bug if the cryptodev library changes the way it calls cryptodev objects. Besides, there is no need to generate a name for the crypto device different than the bus device, as there is a 1:1 relation between those objects. Reuse the bus device name directly, iow: dpseci.XXX instead of dpsec-XXX. Signed-off-by: David Marchand Acked-by: Hemant Agrawal --- drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c index 698548e6ead..995e375fb5a 100644 --- a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c +++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c @@ -4456,8 +4456,6 @@ dpaa2_sec_dev_init(struct rte_cryptodev *cryptodev) retcode); goto init_error; } - snprintf(cryptodev->data->name, sizeof(cryptodev->data->name), - "dpsec-%u", hw_id); internals->max_nb_queue_pairs = attr.num_tx_queues; cryptodev->data->nb_queue_pairs = internals->max_nb_queue_pairs; @@ -4482,7 +4480,6 @@ cryptodev_dpaa2_sec_probe(struct rte_dpaa2_driver *dpaa2_drv __rte_unused, struct rte_dpaa2_device *dpaa2_dev) { struct rte_cryptodev *cryptodev; - char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; int retval; struct rte_cryptodev_pmd_init_params init_params = { .name = "", @@ -4493,10 +4490,7 @@ cryptodev_dpaa2_sec_probe(struct rte_dpaa2_driver *dpaa2_drv __rte_unused, /* setting default, will be updated in init. */ }; - snprintf(cryptodev_name, sizeof(cryptodev_name), "dpsec-%d", - dpaa2_dev->object_id); - - cryptodev = rte_cryptodev_pmd_create(cryptodev_name, &dpaa2_dev->device, + cryptodev = rte_cryptodev_pmd_create(dpaa2_dev->device.name, &dpaa2_dev->device, &init_params); if (cryptodev == NULL) { DPAA2_SEC_ERR("failed to create cryptodev vdev"); @@ -4524,14 +4518,10 @@ cryptodev_dpaa2_sec_probe(struct rte_dpaa2_driver *dpaa2_drv __rte_unused, static int cryptodev_dpaa2_sec_remove(struct rte_dpaa2_device *dpaa2_dev) { - char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; struct rte_cryptodev *cryptodev; int ret; - snprintf(cryptodev_name, sizeof(cryptodev_name), "dpsec-%d", - dpaa2_dev->object_id); - - cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name); + cryptodev = rte_cryptodev_pmd_get_named_dev(dpaa2_dev->device.name); if (cryptodev == NULL) return -ENODEV; From e1e35f429546ab5c1a094d55ff010daed30c5a0f Mon Sep 17 00:00:00 2001 From: Radu Nicolau Date: Tue, 2 Dec 2025 10:45:50 +0000 Subject: [PATCH 15/28] pdcp: add digest physical address Set the physical address for digest buffer. Fixes: a785af14ff79 ("pdcp: add pre and post process for UL") Cc: stable@dpdk.org Signed-off-by: Radu Nicolau Acked-by: Kai Ji Acked-by: Anoob Joseph --- lib/pdcp/pdcp_process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/pdcp/pdcp_process.c b/lib/pdcp/pdcp_process.c index 9b9b881124b..f55ae3bec06 100644 --- a/lib/pdcp/pdcp_process.c +++ b/lib/pdcp/pdcp_process.c @@ -351,6 +351,8 @@ cop_prepare(const struct entity_priv *en_priv, struct rte_mbuf *mb, struct rte_c op->auth.data.length = (pkt_len - RTE_PDCP_MAC_I_LEN) << auth_shift; op->auth.digest.data = rte_pktmbuf_mtod_offset(mb, uint8_t *, (pkt_len - RTE_PDCP_MAC_I_LEN)); + op->auth.digest.phys_addr = rte_pktmbuf_iova_offset(mb, + (pkt_len - RTE_PDCP_MAC_I_LEN)); } __rte_crypto_sym_op_attach_sym_session(op, en_priv->crypto_sess); From ae546c0577ec5fd9f87f40b739be035cbadf2e3c Mon Sep 17 00:00:00 2001 From: Radu Nicolau Date: Tue, 2 Dec 2025 10:57:31 +0000 Subject: [PATCH 16/28] crypto/qat: align vector address Align the vector address rather than computed source address to make sure the alignment is properly propagated. Fixes: 253174309ff7 ("crypto/qat: fix source buffer alignment") Cc: stable@dpdk.org Signed-off-by: Radu Nicolau Acked-by: Kai Ji --- drivers/crypto/qat/dev/qat_crypto_pmd_gens.h | 33 +++++++++----------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h b/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h index 67dc889b503..22ee0fe4fef 100644 --- a/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h +++ b/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h @@ -443,6 +443,21 @@ qat_sym_build_req_set_data(struct icp_qat_fw_la_bulk_req *req, n_dst > QAT_SYM_SGL_MAX_NUMBER)) return -1; + /* For crypto API only: try to align the in-place buffers*/ + if (op != NULL && likely(n_dst == 0) && likely(!is_sgl)) { + rte_iova_t offset = src_vec[0].iova & RTE_CACHE_LINE_MASK; + if (offset) { + rte_iova_t buff_addr = rte_mbuf_iova_get(op->sym->m_src); + /* make sure src_data_start is still within the buffer */ + if (src_vec[0].iova - offset >= buff_addr) { + src_vec[0].iova -= offset; + src_vec[0].len += offset; + ofs->ofs.auth.head += offset; + ofs->ofs.cipher.head += offset; + } + } + } + if (likely(!is_sgl)) { src_data_start = src_vec[0].iova; tl_src = total_len_src = @@ -503,24 +518,6 @@ qat_sym_build_req_set_data(struct icp_qat_fw_la_bulk_req *req, dst_data_start = src_data_start; } - /* For crypto API only try to align the in-place buffers*/ - if (op != NULL && likely(n_dst == 0)) { - uint16_t offset = src_data_start & RTE_CACHE_LINE_MASK; - if (offset) { - rte_iova_t buff_addr = rte_mbuf_iova_get(op->sym->m_src); - /* make sure src_data_start is still within the buffer */ - if (src_data_start - offset >= buff_addr) { - src_data_start -= offset; - dst_data_start = src_data_start; - ofs->ofs.auth.head += offset; - ofs->ofs.cipher.head += offset; - tl_src += offset; - total_len_src = tl_src; - total_len_dst = tl_src; - } - } - } - req->comn_mid.src_data_addr = src_data_start; req->comn_mid.dest_data_addr = dst_data_start; req->comn_mid.src_length = total_len_src; From 926a88caa0413b2dc7755474bde6addd073577c1 Mon Sep 17 00:00:00 2001 From: Emma Finn Date: Mon, 15 Dec 2025 10:41:09 +0000 Subject: [PATCH 17/28] doc: update device configuration in qat guide Added note for QAT driver information and device configuration for services. Signed-off-by: Emma Finn Acked-by: Kai Ji --- doc/guides/cryptodevs/qat.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst index 68d792e4cc8..0c2b85444e3 100644 --- a/doc/guides/cryptodevs/qat.rst +++ b/doc/guides/cryptodevs/qat.rst @@ -501,6 +501,32 @@ If you are running on a kernel which includes a driver for your device, see The actual crypto services enabled on the system depend on QAT driver capabilities and hardware slice configuration. +.. note:: + + With the introduction of QAT Generation 4, + the in-tree drivers are available within the mainline Linux kernel. + Out-of-tree (OOT) drivers for QAT Gen 4 are currently in sustaining mode, + meaning they will only receive maintenance updates without new feature development. + Future generations of QAT will be supported with in-tree drivers exclusively. + +.. note:: + + For **in-tree drivers**, when multiple QAT instances are available, + each instance is assigned a different crypto service by default (asym;sym or dc). + The available crypto queue pair request will return zero + if the corresponding service is not enabled. + + To verify the device configuration, run:: + + cat /sys/bus/pci/devices//qat/cfg_services + + For symmetric and asymmetric crypto services, ensure that `"asym;sym"` are enabled. + + For **out-of-tree drivers**, the configuration file for each instance can be found at:: + + /etc/_dev.conf + + Installation using kernel.org driver ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From a45388c7d16e1ea85513f02fe3e0e67c308b5e05 Mon Sep 17 00:00:00 2001 From: Nithinsen Kaithakadan Date: Mon, 12 Jan 2026 17:53:08 +0530 Subject: [PATCH 18/28] common/cnxk: set CPT cache line size per platform Added conditional definition for cache line size: - For CN10K and CN9k platform, set cache line size to 128 bytes. - For others, default to 256 bytes. Signed-off-by: Nithinsen Kaithakadan --- drivers/common/cnxk/roc_cpt.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/common/cnxk/roc_cpt.h b/drivers/common/cnxk/roc_cpt.h index 67956758be8..37873820e20 100644 --- a/drivers/common/cnxk/roc_cpt.h +++ b/drivers/common/cnxk/roc_cpt.h @@ -130,7 +130,12 @@ BITS_PER_LONG_LONG) /* ROC CPTR Cache */ +#if defined(ROC_PLATFORM_CN10K) || defined(ROC_PLATFORM_CN9K) +#define ROC_CPTR_CACHE_LINE_SZ 128 +#else #define ROC_CPTR_CACHE_LINE_SZ 256 +#endif + #define ROC_CPTR_ALIGN ROC_CPTR_CACHE_LINE_SZ #define ROC_CPT_CQ_ENTRY_SIZE_UNIT 32 From c11bbb82dc52ac9f72824366ed5da76afa93711c Mon Sep 17 00:00:00 2001 From: Tejasree Kondoj Date: Mon, 12 Jan 2026 17:53:09 +0530 Subject: [PATCH 19/28] crypto/cnxk: align TLS CPTR to 256B Aligning CPTR to 256B for TLS cases. Signed-off-by: Tejasree Kondoj --- drivers/common/cnxk/roc_cpt.c | 4 +-- drivers/crypto/cnxk/cn20k_tls.c | 47 +++++++++++++++++++++++------ drivers/crypto/cnxk/cn20k_tls.h | 15 ++++++--- drivers/crypto/cnxk/cn20k_tls_ops.h | 6 +++- 4 files changed, 55 insertions(+), 17 deletions(-) diff --git a/drivers/common/cnxk/roc_cpt.c b/drivers/common/cnxk/roc_cpt.c index 83e0c9896b0..0deb0b52d5b 100644 --- a/drivers/common/cnxk/roc_cpt.c +++ b/drivers/common/cnxk/roc_cpt.c @@ -1275,8 +1275,8 @@ roc_cpt_ctx_write(struct roc_cpt_lf *lf, void *sa_dptr, void *sa_cptr, uint8_t egrp; int i; - if (!plt_is_aligned(sa_cptr, 128)) { - plt_err("Context pointer should be 128B aligned"); + if (!plt_is_aligned(sa_cptr, ROC_CPTR_ALIGN)) { + plt_err("Context pointer should be %dB aligned", ROC_CPTR_ALIGN); return -EINVAL; } diff --git a/drivers/crypto/cnxk/cn20k_tls.c b/drivers/crypto/cnxk/cn20k_tls.c index 9f7acefc19f..8556a95ab6c 100644 --- a/drivers/crypto/cnxk/cn20k_tls.c +++ b/drivers/crypto/cnxk/cn20k_tls.c @@ -385,13 +385,20 @@ cn20k_tls_read_sa_create(struct roc_cpt *roc_cpt, struct roc_cpt_lf *lf, int ret = 0; tls = &sec_sess->tls_rec; - read_sa = &tls->read_sa; + + read_sa = rte_zmalloc("cn20k_tls", sizeof(struct roc_ie_ow_tls_read_sa), ROC_CPTR_ALIGN); + if (read_sa == NULL) { + plt_err("Couldn't allocate memory for READ SA"); + return -ENOMEM; + } + tls->read_sa = read_sa; /* Allocate memory to be used as dptr for CPT ucode WRITE_SA op */ sa_dptr = plt_zmalloc(sizeof(struct roc_ie_ow_tls_read_sa), 8); if (sa_dptr == NULL) { plt_err("Could not allocate memory for SA dptr"); - return -ENOMEM; + ret = -ENOMEM; + goto sa_cptr_free; } /* Translate security parameters to SA */ @@ -457,6 +464,11 @@ cn20k_tls_read_sa_create(struct roc_cpt *roc_cpt, struct roc_cpt_lf *lf, sa_dptr_free: plt_free(sa_dptr); +sa_cptr_free: + if (ret != 0) { + rte_free(read_sa); + read_sa = NULL; + } return ret; } @@ -706,13 +718,20 @@ cn20k_tls_write_sa_create(struct roc_cpt *roc_cpt, struct roc_cpt_lf *lf, int ret = 0; tls = &sec_sess->tls_rec; - write_sa = &tls->write_sa; + + write_sa = rte_zmalloc("cn20k_tls", sizeof(struct roc_ie_ow_tls_write_sa), ROC_CPTR_ALIGN); + if (write_sa == NULL) { + plt_err("Couldn't allocate memory for WRITE SA"); + return -ENOMEM; + } + tls->write_sa = write_sa; /* Allocate memory to be used as dptr for CPT ucode WRITE_SA op */ sa_dptr = plt_zmalloc(sizeof(struct roc_ie_ow_tls_write_sa), 8); if (sa_dptr == NULL) { plt_err("Could not allocate memory for SA dptr"); - return -ENOMEM; + ret = -ENOMEM; + goto sa_cptr_free; } /* Translate security parameters to SA */ @@ -781,6 +800,11 @@ cn20k_tls_write_sa_create(struct roc_cpt *roc_cpt, struct roc_cpt_lf *lf, sa_dptr_free: plt_free(sa_dptr); +sa_cptr_free: + if (ret != 0) { + rte_free(write_sa); + write_sa = NULL; + } return ret; } @@ -868,15 +892,18 @@ cn20k_sec_tls_session_destroy(struct cnxk_cpt_qp *qp, struct cn20k_sec_session * tls = &sess->tls_rec; + if (tls->sa_ptr == NULL) + return -EINVAL; + /* Trigger CTX flush to write dirty data back to DRAM */ - roc_cpt_lf_ctx_flush(lf, &tls->read_sa, false); + roc_cpt_lf_ctx_flush(lf, tls->read_sa, false); if (sess->tls_opt.is_write) { sa_dptr = plt_zmalloc(sizeof(struct roc_ie_ow_tls_write_sa), 8); if (sa_dptr != NULL) { tls_write_sa_init(sa_dptr); - ret = roc_cpt_ctx_write(lf, sa_dptr, &tls->write_sa, + ret = roc_cpt_ctx_write(lf, sa_dptr, tls->write_sa, sizeof(struct roc_ie_ow_tls_write_sa)); plt_free(sa_dptr); } @@ -889,14 +916,14 @@ cn20k_sec_tls_session_destroy(struct cnxk_cpt_qp *qp, struct cn20k_sec_session * rte_atomic_thread_fence(rte_memory_order_seq_cst); /* Trigger CTX reload to fetch new data from DRAM */ - roc_cpt_lf_ctx_reload(lf, &tls->write_sa); + roc_cpt_lf_ctx_reload(lf, tls->write_sa); } } else { sa_dptr = plt_zmalloc(sizeof(struct roc_ie_ow_tls_read_sa), 8); if (sa_dptr != NULL) { tls_read_sa_init(sa_dptr); - ret = roc_cpt_ctx_write(lf, sa_dptr, &tls->read_sa, + ret = roc_cpt_ctx_write(lf, sa_dptr, tls->read_sa, sizeof(struct roc_ie_ow_tls_read_sa)); plt_free(sa_dptr); } @@ -909,9 +936,11 @@ cn20k_sec_tls_session_destroy(struct cnxk_cpt_qp *qp, struct cn20k_sec_session * rte_atomic_thread_fence(rte_memory_order_seq_cst); /* Trigger CTX reload to fetch new data from DRAM */ - roc_cpt_lf_ctx_reload(lf, &tls->read_sa); + roc_cpt_lf_ctx_reload(lf, tls->read_sa); } } + rte_free(tls->sa_ptr); + return 0; } diff --git a/drivers/crypto/cnxk/cn20k_tls.h b/drivers/crypto/cnxk/cn20k_tls.h index 27124602a0f..5fed7495459 100644 --- a/drivers/crypto/cnxk/cn20k_tls.h +++ b/drivers/crypto/cnxk/cn20k_tls.h @@ -16,13 +16,18 @@ /* Forward declaration */ struct cn20k_sec_session; -struct __rte_aligned(ROC_ALIGN) cn20k_tls_record +struct __rte_aligned(ROC_CPTR_ALIGN) cn20k_tls_record { union { - /** Read SA */ - struct roc_ie_ow_tls_read_sa read_sa; - /** Write SA */ - struct roc_ie_ow_tls_write_sa write_sa; + void *sa_ptr; + struct { + union { + /** Read SA */ + struct roc_ie_ow_tls_read_sa *read_sa; + /** Write SA */ + struct roc_ie_ow_tls_write_sa *write_sa; + }; + }; }; }; diff --git a/drivers/crypto/cnxk/cn20k_tls_ops.h b/drivers/crypto/cnxk/cn20k_tls_ops.h index 9f70a1d42d4..e7a8ba34aec 100644 --- a/drivers/crypto/cnxk/cn20k_tls_ops.h +++ b/drivers/crypto/cnxk/cn20k_tls_ops.h @@ -38,7 +38,11 @@ process_tls_write(struct roc_cpt_lf *lf, struct rte_crypto_op *cop, struct cn20k pad_len = (pad_bytes >> tls_opt.pad_shift) * tls_opt.enable_padding; #ifdef LA_IPSEC_DEBUG - write_sa = &sess->tls_rec.write_sa; + write_sa = sess->tls_rec.write_sa; + if (write_sa == NULL) { + return -EINVAL; + } + if (write_sa->w2.s.iv_at_cptr == ROC_IE_OW_TLS_IV_SRC_FROM_SA) { uint8_t *iv = PLT_PTR_ADD(write_sa->cipher_key, 32); From f64f4f0ef9dac0f34b69b9f887ce7317542e82ec Mon Sep 17 00:00:00 2001 From: Nithinsen Kaithakadan Date: Fri, 23 Jan 2026 10:51:23 +0000 Subject: [PATCH 20/28] test/crypto: fix mbuf segment number Fix mbuf sanity check failures by updating nb_segs field after mbuf allocation. Without this update, the append function fails due to incorrect segment count. Fixes: dcdd01691f39 ("test/crypto: add GMAC SGL") Fixes: 43220096d66a ("test/crypto: add PDCP cases for scatter gather") Fixes: f3dbf94be60c ("app/test: check SGL on QAT") Cc: stable@dpdk.org Signed-off-by: Nithinsen Kaithakadan Acked-by: Akhil Goyal --- app/test/test_cryptodev.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index b0a4b46743d..a60983c6b73 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -9717,7 +9717,6 @@ test_pdcp_proto_SGL(int i, int oop, int ret = TEST_SUCCESS; int to_trn = 0; int to_trn_tbl[16]; - int segs = 1; unsigned int trn_data = 0; struct rte_cryptodev_info dev_info; uint64_t feat_flags; @@ -9786,7 +9785,6 @@ test_pdcp_proto_SGL(int i, int oop, */ while (trn_data < input_vec_len) { - ++segs; to_trn = (input_vec_len - trn_data < fragsz) ? (input_vec_len - trn_data) : fragsz; @@ -9794,6 +9792,7 @@ test_pdcp_proto_SGL(int i, int oop, buf->next = rte_pktmbuf_alloc(ts_params->mbuf_pool); buf = buf->next; + ut_params->ibuf->nb_segs++; memset(rte_pktmbuf_mtod(buf, uint8_t *), 0, rte_pktmbuf_tailroom(buf)); @@ -9803,6 +9802,7 @@ test_pdcp_proto_SGL(int i, int oop, buf_oop->next = rte_pktmbuf_alloc(ts_params->mbuf_pool); buf_oop = buf_oop->next; + ut_params->obuf->nb_segs++; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); TEST_ASSERT_NOT_NULL(ut_params->obuf, "Output buffer not initialized"); @@ -9817,16 +9817,12 @@ test_pdcp_proto_SGL(int i, int oop, trn_data += to_trn; } - ut_params->ibuf->nb_segs = segs; - - segs = 1; if (fragsz_oop && oop) { to_trn = 0; ecx = 0; trn_data = frag_size_oop; while (trn_data < output_vec_len) { - ++segs; to_trn = (output_vec_len - trn_data < frag_size_oop) ? @@ -9838,13 +9834,13 @@ test_pdcp_proto_SGL(int i, int oop, buf_oop->next = rte_pktmbuf_alloc(ts_params->mbuf_pool); buf_oop = buf_oop->next; + ut_params->obuf->nb_segs++; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); trn_data += to_trn; } - ut_params->obuf->nb_segs = segs; } /* Setup Cipher Parameters */ @@ -15869,7 +15865,6 @@ test_AES_GMAC_authentication_SGL(const struct gmac_test_data *tdata, uint64_t feature_flags; unsigned int trn_data = 0; void *digest_mem = NULL; - uint32_t segs = 1; unsigned int to_trn = 0; struct rte_mbuf *buf = NULL; uint8_t *auth_tag, *plaintext; @@ -15930,12 +15925,12 @@ test_AES_GMAC_authentication_SGL(const struct gmac_test_data *tdata, */ while (trn_data < tdata->plaintext.len) { - ++segs; to_trn = (tdata->plaintext.len - trn_data < fragsz) ? (tdata->plaintext.len - trn_data) : fragsz; buf->next = rte_pktmbuf_alloc(ts_params->mbuf_pool); buf = buf->next; + ut_params->ibuf->nb_segs++; memset(rte_pktmbuf_mtod(buf, uint8_t *), 0, rte_pktmbuf_tailroom(buf)); @@ -15953,7 +15948,6 @@ test_AES_GMAC_authentication_SGL(const struct gmac_test_data *tdata, TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append digest data"); } } - ut_params->ibuf->nb_segs = segs; /* * Place digest at the end of the last buffer @@ -17106,7 +17100,6 @@ test_authenticated_encryption_SGL(const struct aead_test_data *tdata, int retval; int to_trn = 0; int to_trn_tbl[SGL_MAX_NO]; - int segs = 1; unsigned int trn_data = 0; uint8_t *plaintext, *ciphertext, *auth_tag; struct rte_cryptodev_info dev_info; @@ -17231,7 +17224,6 @@ test_authenticated_encryption_SGL(const struct aead_test_data *tdata, */ while (trn_data < tdata->plaintext.len) { - ++segs; to_trn = (tdata->plaintext.len - trn_data < fragsz) ? (tdata->plaintext.len - trn_data) : fragsz; @@ -17239,6 +17231,7 @@ test_authenticated_encryption_SGL(const struct aead_test_data *tdata, buf->next = rte_pktmbuf_alloc(ts_params->mbuf_pool); buf = buf->next; + ut_params->ibuf->nb_segs++; memset(rte_pktmbuf_mtod(buf, uint8_t *), 0, rte_pktmbuf_tailroom(buf)); @@ -17248,6 +17241,7 @@ test_authenticated_encryption_SGL(const struct aead_test_data *tdata, buf_last_oop = buf_oop->next = rte_pktmbuf_alloc(ts_params->mbuf_pool); buf_oop = buf_oop->next; + ut_params->obuf->nb_segs++; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); TEST_ASSERT_NOT_NULL(rte_pktmbuf_append(ut_params->obuf, to_trn), "Failed to append to mbuf"); @@ -17277,9 +17271,6 @@ test_authenticated_encryption_SGL(const struct aead_test_data *tdata, uint64_t digest_phys = 0; - ut_params->ibuf->nb_segs = segs; - - segs = 1; if (fragsz_oop && oop) { to_trn = 0; ecx = 0; @@ -17295,7 +17286,6 @@ test_authenticated_encryption_SGL(const struct aead_test_data *tdata, trn_data = frag_size_oop; while (trn_data < tdata->plaintext.len) { - ++segs; to_trn = (tdata->plaintext.len - trn_data < frag_size_oop) ? @@ -17307,6 +17297,7 @@ test_authenticated_encryption_SGL(const struct aead_test_data *tdata, buf_last_oop = buf_oop->next = rte_pktmbuf_alloc(ts_params->mbuf_pool); TEST_ASSERT_NOT_NULL(buf_oop->next, "Unexpected end of chain"); + ut_params->obuf->nb_segs++; buf_oop = buf_oop->next; memset(rte_pktmbuf_mtod(buf_oop, uint8_t *), 0, rte_pktmbuf_tailroom(buf_oop)); @@ -17320,8 +17311,6 @@ test_authenticated_encryption_SGL(const struct aead_test_data *tdata, TEST_ASSERT_NOT_NULL(digest_mem, "Failed to append auth tag"); } } - - ut_params->obuf->nb_segs = segs; } /* From bf18c92ef932b00b5d65db05948956ea0f634ddc Mon Sep 17 00:00:00 2001 From: Nithinsen Kaithakadan Date: Fri, 23 Jan 2026 10:56:50 +0000 Subject: [PATCH 21/28] test/crypto: fix RSA sign data length This patch fixes RSA sign data length assignment to correct value. The length was previously altered during a test scenario and is now restored to the proper value. Fixes: 9682e8246ae2 ("test/crypto: add negative case for RSA verification") Cc: stable@dpdk.org Signed-off-by: Nithinsen Kaithakadan Acked-by: Akhil Goyal --- app/test/test_cryptodev_asym.c | 1 + 1 file changed, 1 insertion(+) diff --git a/app/test/test_cryptodev_asym.c b/app/test/test_cryptodev_asym.c index 111f675c948..34c15e26bcd 100644 --- a/app/test/test_cryptodev_asym.c +++ b/app/test/test_cryptodev_asym.c @@ -139,6 +139,7 @@ queue_ops_rsa_sign_verify(void *sess) /* Negative test */ result_op->asym->rsa.sign.data[0] ^= 0xff; + result_op->asym->rsa.sign.length = RTE_DIM(rsa_n); if (rte_cryptodev_enqueue_burst(dev_id, 0, &result_op, 1) != 1) { RTE_LOG(ERR, USER1, "Error sending packet for verify\n"); status = TEST_FAILED; From 2a5052b9343c4414308925aa80de2c9df03f3614 Mon Sep 17 00:00:00 2001 From: Garvit Varshney Date: Tue, 27 Jan 2026 10:04:59 +0530 Subject: [PATCH 22/28] crypto/cnxk: return decrypted data for RSA verify For RSA verify operations with RTE_CRYPTO_RSA_PADDING_NONE, the driver cannot determine which padding algorithm the application is using. As per the API specification in rte_crypto_asym.h, when RTE_CRYPTO_RSA_PADDING_NONE and RTE_CRYPTO_ASYM_OP_VERIFY are selected, the decrypted signature should be returned to the application in the cipher output buffer. Fixes: dfd038b97ec3 ("crypto/cnxk: refactor RSA verification") Cc: stable@dpdk.org Signed-off-by: Garvit Varshney Acked-by: Akhil Goyal --- .mailmap | 1 + drivers/crypto/cnxk/cnxk_ae.h | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index cdaa471715a..81499a3337c 100644 --- a/.mailmap +++ b/.mailmap @@ -497,6 +497,7 @@ Ganghui Zeng Gao Feng Gaoxiang Liu Gargi Sau +Garvit Varshney Gary Mussar Gaurav Singh Gautam Dawar diff --git a/drivers/crypto/cnxk/cnxk_ae.h b/drivers/crypto/cnxk/cnxk_ae.h index 912a2a94965..21a0c8068a9 100644 --- a/drivers/crypto/cnxk/cnxk_ae.h +++ b/drivers/crypto/cnxk/cnxk_ae.h @@ -1591,9 +1591,10 @@ cnxk_ae_dequeue_rsa_op(struct rte_crypto_op *cop, uint8_t *rptr, break; case RTE_CRYPTO_ASYM_OP_VERIFY: if (rsa_ctx->padding.type == RTE_CRYPTO_RSA_PADDING_NONE) { - rsa->sign.length = rsa_ctx->n.length; - if (memcmp(rptr, rsa->message.data, rsa->message.length)) - cop->status = RTE_CRYPTO_OP_STATUS_ERROR; + /* Application compares decrypted data with message for SW padding schemes + */ + rsa->cipher.length = rsa_ctx->n.length; + memcpy(rsa->cipher.data, rptr, rsa->cipher.length); } else { /* Get length of signed output */ rsa->sign.length = rte_cpu_to_be_16(*((uint16_t *)rptr)); From 377a3b2393c06dec84688d1e56d8d7f9eeb53980 Mon Sep 17 00:00:00 2001 From: Sameer Vaze Date: Tue, 16 Dec 2025 12:36:16 -0700 Subject: [PATCH 23/28] compress/zlib: fix UDC checksum Fixes bugs with casting and checksum calculation for UDC checksum Fixes: 0dc314debb22 ("compress/zlib: support dictionaries and PDCP checksum") Cc: stable@dpdk.org Signed-off-by: Sameer Vaze Acked-by: Ashish Gupta --- drivers/compress/zlib/zlib_pmd.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/compress/zlib/zlib_pmd.c b/drivers/compress/zlib/zlib_pmd.c index 1d7651dd7d4..ed3905484ca 100644 --- a/drivers/compress/zlib/zlib_pmd.c +++ b/drivers/compress/zlib/zlib_pmd.c @@ -48,12 +48,12 @@ process_zlib_deflate_chksum(struct rte_comp_op *op, return; } - dictionary_start = (uint32_t)(*dictionary); - dictionary_end = (uint32_t)(*(dictionary + dictionary_len - 4)); + dictionary_start = *(uint32_t *)dictionary; + dictionary_end = *(uint32_t *)(dictionary + dictionary_len - 4); sum = (dictionary_start & BOTTOM_NIBBLE_OF_BYTES_IN_DOUBLE_WORD) - + (dictionary_start & (TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD >> 4)) + + ((dictionary_start & TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD) >> 4) + (dictionary_end & BOTTOM_NIBBLE_OF_BYTES_IN_DOUBLE_WORD) - + (dictionary_end & (TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD >> 4)); + + ((dictionary_end & TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD) >> 4); op->output_chksum = ~(sum_bytes[0] + sum_bytes[1] + sum_bytes[2] + sum_bytes[3]) & BOTTOM_NIBBLE_OF_BYTE; @@ -98,12 +98,12 @@ process_zlib_inflate_chksum(struct rte_comp_op *op, return; } - dictionary_start = (uint32_t)(*dictionary); - dictionary_end = (uint32_t)(*(dictionary + dictionary_len - 4)); + dictionary_start = *(uint32_t *)dictionary; + dictionary_end = *(uint32_t *)(dictionary + dictionary_len - 4); sum = (dictionary_start & BOTTOM_NIBBLE_OF_BYTES_IN_DOUBLE_WORD) - + (dictionary_start & (TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD >> 4)) + + ((dictionary_start & TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD) >> 4) + (dictionary_end & BOTTOM_NIBBLE_OF_BYTES_IN_DOUBLE_WORD) - + (dictionary_end & (TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD >> 4)); + + ((dictionary_end & TOP_NIBBLE_OF_BYTE_IN_DOUBLE_WORD) >> 4); op->output_chksum = ~(sum_bytes[0] + sum_bytes[1] + sum_bytes[2] + sum_bytes[3]) & BOTTOM_NIBBLE_OF_BYTE; From aa12932f602b04bbfca8898e2e8acc8b23c8e292 Mon Sep 17 00:00:00 2001 From: Shaokai Zhang Date: Wed, 19 Nov 2025 20:02:30 +0800 Subject: [PATCH 24/28] crypto/openssl: support AES-XTS operations Extend openssl crypto PMD to support AES XTS operations. Signed-off-by: Shaokai Zhang Reviewed-by: Joey Xing Acked-by: Akhil Goyal --- doc/guides/cryptodevs/features/openssl.ini | 2 ++ doc/guides/cryptodevs/openssl.rst | 1 + doc/guides/rel_notes/release_26_03.rst | 4 ++++ drivers/crypto/openssl/openssl_pmd_private.h | 2 +- drivers/crypto/openssl/rte_openssl_pmd.c | 13 +++++++++++++ drivers/crypto/openssl/rte_openssl_pmd_ops.c | 20 ++++++++++++++++++++ 6 files changed, 41 insertions(+), 1 deletion(-) diff --git a/doc/guides/cryptodevs/features/openssl.ini b/doc/guides/cryptodevs/features/openssl.ini index df6e7de3164..1ce6efd3a5a 100644 --- a/doc/guides/cryptodevs/features/openssl.ini +++ b/doc/guides/cryptodevs/features/openssl.ini @@ -24,6 +24,8 @@ AES CBC (256) = Y AES CTR (128) = Y AES CTR (192) = Y AES CTR (256) = Y +AES XTS (128) = Y +AES XTS (256) = Y 3DES CBC = Y 3DES CTR = Y DES DOCSIS BPI = Y diff --git a/doc/guides/cryptodevs/openssl.rst b/doc/guides/cryptodevs/openssl.rst index d467069cace..e48f425434a 100644 --- a/doc/guides/cryptodevs/openssl.rst +++ b/doc/guides/cryptodevs/openssl.rst @@ -22,6 +22,7 @@ Supported cipher algorithms: * ``RTE_CRYPTO_CIPHER_3DES_CBC`` * ``RTE_CRYPTO_CIPHER_AES_CBC`` * ``RTE_CRYPTO_CIPHER_AES_CTR`` +* ``RTE_CRYPTO_CIPHER_AES_XTS`` * ``RTE_CRYPTO_CIPHER_3DES_CTR`` * ``RTE_CRYPTO_CIPHER_DES_DOCSISBPI`` diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst index 5c2a4bb32ec..e8b04868e73 100644 --- a/doc/guides/rel_notes/release_26_03.rst +++ b/doc/guides/rel_notes/release_26_03.rst @@ -82,6 +82,10 @@ New Features * NEA5, NIA5, NCA5: AES 256 confidentiality, integrity and AEAD modes. * NEA6, NIA6, NCA6: ZUC 256 confidentiality, integrity and AEAD modes. +* **Updated openssl crypto driver.** + + * Added support for AES-XTS cipher algorithm. + Removed Items ------------- diff --git a/drivers/crypto/openssl/openssl_pmd_private.h b/drivers/crypto/openssl/openssl_pmd_private.h index fe89e522e1b..d5a751600a4 100644 --- a/drivers/crypto/openssl/openssl_pmd_private.h +++ b/drivers/crypto/openssl/openssl_pmd_private.h @@ -118,7 +118,7 @@ struct __rte_cache_aligned openssl_session { /**< cipher algorithm */ struct { - uint8_t data[32]; + uint8_t data[64]; /**< key data */ size_t length; /**< key length in bytes */ diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c index 4f171f48cc9..2d803912d49 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd.c +++ b/drivers/crypto/openssl/rte_openssl_pmd.c @@ -211,6 +211,18 @@ get_cipher_algo(enum rte_crypto_cipher_algorithm sess_algo, size_t keylen, res = -EINVAL; } break; + case RTE_CRYPTO_CIPHER_AES_XTS: + switch (keylen) { + case 32: + *algo = EVP_aes_128_xts(); + break; + case 64: + *algo = EVP_aes_256_xts(); + break; + default: + res = -EINVAL; + } + break; case RTE_CRYPTO_CIPHER_AES_CTR: switch (keylen) { case 16: @@ -493,6 +505,7 @@ openssl_set_session_cipher_parameters(struct openssl_session *sess, case RTE_CRYPTO_CIPHER_3DES_CBC: case RTE_CRYPTO_CIPHER_AES_CBC: case RTE_CRYPTO_CIPHER_AES_CTR: + case RTE_CRYPTO_CIPHER_AES_XTS: sess->cipher.mode = OPENSSL_CIPHER_LIB; sess->cipher.algo = xform->cipher.algo; sess->cipher.ctx = EVP_CIPHER_CTX_new(); diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c index 5095e6cbea8..df5c12626d3 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c +++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c @@ -269,6 +269,26 @@ static const struct rte_cryptodev_capabilities openssl_pmd_capabilities[] = { }, } }, } }, + { /* AES XTS */ + .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, + {.sym = { + .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER, + {.cipher = { + .algo = RTE_CRYPTO_CIPHER_AES_XTS, + .block_size = 16, + .key_size = { + .min = 32, + .max = 64, + .increment = 32 + }, + .iv_size = { + .min = 16, + .max = 16, + .increment = 0 + } + }, } + }, } + }, { /* AES CBC */ .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, {.sym = { From cce6320d8d11df78a4c504837c981b7a977fa040 Mon Sep 17 00:00:00 2001 From: Emma Finn Date: Tue, 27 Jan 2026 10:07:41 +0000 Subject: [PATCH 25/28] crypto/openssl: support SHAKE algorithms OpenSSL 3.X has support for SHAKE, Hence adding SHAKE-128 and SHAKE-256 support to the OpenSSL PMD. Signed-off-by: Emma Finn Acked-by: Akhil Goyal --- doc/guides/cryptodevs/features/openssl.ini | 2 + doc/guides/cryptodevs/openssl.rst | 2 + doc/guides/rel_notes/release_26_03.rst | 1 + drivers/crypto/openssl/rte_openssl_pmd.c | 36 ++++++++++++++-- drivers/crypto/openssl/rte_openssl_pmd_ops.c | 44 ++++++++++++++++++++ 5 files changed, 82 insertions(+), 3 deletions(-) diff --git a/doc/guides/cryptodevs/features/openssl.ini b/doc/guides/cryptodevs/features/openssl.ini index 1ce6efd3a5a..536557e9e09 100644 --- a/doc/guides/cryptodevs/features/openssl.ini +++ b/doc/guides/cryptodevs/features/openssl.ini @@ -45,6 +45,8 @@ SHA384 = Y SHA384 HMAC = Y SHA512 = Y SHA512 HMAC = Y +SHAKE_128 = Y +SHAKE_256 = Y AES GMAC = Y ; diff --git a/doc/guides/cryptodevs/openssl.rst b/doc/guides/cryptodevs/openssl.rst index e48f425434a..921592ba2d9 100644 --- a/doc/guides/cryptodevs/openssl.rst +++ b/doc/guides/cryptodevs/openssl.rst @@ -41,6 +41,8 @@ Supported authentication algorithms: * ``RTE_CRYPTO_AUTH_SHA256_HMAC`` * ``RTE_CRYPTO_AUTH_SHA384_HMAC`` * ``RTE_CRYPTO_AUTH_SHA512_HMAC`` +* ``RTE_CRYPTO_AUTH_SHAKE_128`` +* ``RTE_CRYPTO_AUTH_SHAKE_256`` Supported AEAD algorithms: diff --git a/doc/guides/rel_notes/release_26_03.rst b/doc/guides/rel_notes/release_26_03.rst index e8b04868e73..afdf1af06c2 100644 --- a/doc/guides/rel_notes/release_26_03.rst +++ b/doc/guides/rel_notes/release_26_03.rst @@ -85,6 +85,7 @@ New Features * **Updated openssl crypto driver.** * Added support for AES-XTS cipher algorithm. + * Added support for SHAKE-128 and SHAKE-256 authentication algorithms. Removed Items diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c index 2d803912d49..e5fa1a4eeb9 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd.c +++ b/drivers/crypto/openssl/rte_openssl_pmd.c @@ -282,6 +282,14 @@ get_auth_algo(enum rte_crypto_auth_algorithm sessalgo, case RTE_CRYPTO_AUTH_SHA512_HMAC: *algo = EVP_sha512(); break; +#if (OPENSSL_VERSION_NUMBER >= 0x30000000L) + case RTE_CRYPTO_AUTH_SHAKE_128: + *algo = EVP_shake128(); + break; + case RTE_CRYPTO_AUTH_SHAKE_256: + *algo = EVP_shake256(); + break; +#endif default: res = -EINVAL; break; @@ -672,6 +680,10 @@ openssl_set_session_auth_parameters(struct openssl_session *sess, case RTE_CRYPTO_AUTH_SHA256: case RTE_CRYPTO_AUTH_SHA384: case RTE_CRYPTO_AUTH_SHA512: +#if (OPENSSL_VERSION_NUMBER >= 0x30000000L) + case RTE_CRYPTO_AUTH_SHAKE_128: + case RTE_CRYPTO_AUTH_SHAKE_256: +#endif sess->auth.mode = OPENSSL_AUTH_AS_AUTH; if (get_auth_algo(xform->auth.algo, &sess->auth.auth.evp_algo) != 0) @@ -1410,7 +1422,7 @@ process_openssl_auth_decryption_ccm(struct rte_mbuf *mbuf_src, int offset, static int process_openssl_auth(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset, __rte_unused uint8_t *iv, __rte_unused EVP_PKEY * pkey, - int srclen, EVP_MD_CTX *ctx, const EVP_MD *algo) + int srclen, EVP_MD_CTX *ctx, const EVP_MD *algo, int digest_length) { size_t dstlen; struct rte_mbuf *m; @@ -1450,8 +1462,24 @@ process_openssl_auth(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset, } process_auth_final: - if (EVP_DigestFinal_ex(ctx, dst, (unsigned int *)&dstlen) <= 0) + /* SHAKE algorithms are XOFs and require EVP_DigestFinalXOF */ + if (algo == EVP_shake128() || algo == EVP_shake256()) { +#if (OPENSSL_VERSION_NUMBER >= 0x30000000L) + /* Set XOF output length before calling EVP_DigestFinalXOF */ + if (EVP_MD_CTX_ctrl(ctx, EVP_MD_CTRL_XOF_LEN, digest_length, NULL) <= 0) + goto process_auth_err; + if (EVP_DigestFinalXOF(ctx, dst, digest_length) <= 0) + goto process_auth_err; +#else + RTE_SET_USED(digest_length); + OPENSSL_LOG(ERR, "SHAKE algorithms require OpenSSL 3.0+"); goto process_auth_err; +#endif + } else { + if (EVP_DigestFinal_ex(ctx, dst, (unsigned int *)&dstlen) <= 0) + goto process_auth_err; + } + return 0; process_auth_err: @@ -2008,7 +2036,7 @@ process_openssl_auth_op(struct openssl_qp *qp, struct rte_crypto_op *op, ctx_a = get_local_auth_ctx(sess, qp); status = process_openssl_auth(mbuf_src, dst, op->sym->auth.data.offset, NULL, NULL, srclen, - ctx_a, sess->auth.auth.evp_algo); + ctx_a, sess->auth.auth.evp_algo, sess->auth.digest_length); break; case OPENSSL_AUTH_AS_HMAC: ctx_h = get_local_hmac_ctx(sess, qp); @@ -4021,12 +4049,14 @@ mldsa_sign_op_evp(struct rte_crypto_op *cop, case RTE_CRYPTO_AUTH_SHA3_512: check_md = EVP_sha3_512(); break; +#if (OPENSSL_VERSION_NUMBER >= 0x30000000L) case RTE_CRYPTO_AUTH_SHAKE_128: check_md = EVP_shake128(); break; case RTE_CRYPTO_AUTH_SHAKE_256: check_md = EVP_shake256(); break; +#endif default: break; } diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c index df5c12626d3..0f2b82ec00f 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c +++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c @@ -289,6 +289,50 @@ static const struct rte_cryptodev_capabilities openssl_pmd_capabilities[] = { }, } }, } }, +#if (OPENSSL_VERSION_NUMBER >= 0x30000000L) + { /* SHAKE_128 */ + .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, + {.sym = { + .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH, + {.auth = { + .algo = RTE_CRYPTO_AUTH_SHAKE_128, + .block_size = 168, + .key_size = { + .min = 0, + .max = 0, + .increment = 0 + }, + .digest_size = { + .min = 1, + .max = 256, + .increment = 1 + }, + .iv_size = { 0 } + }, } + }, } + }, + { /* SHAKE_256 */ + .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, + {.sym = { + .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH, + {.auth = { + .algo = RTE_CRYPTO_AUTH_SHAKE_256, + .block_size = 136, + .key_size = { + .min = 0, + .max = 0, + .increment = 0 + }, + .digest_size = { + .min = 1, + .max = 256, + .increment = 1 + }, + .iv_size = { 0 } + }, } + }, } + }, +#endif { /* AES CBC */ .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, {.sym = { From fe79e9f2f474cff97249849024650ca0aec77cad Mon Sep 17 00:00:00 2001 From: Rahul Bhansali Date: Wed, 28 Jan 2026 19:13:08 +0530 Subject: [PATCH 26/28] test/security: check IPsec rule for out-of-place case For out-of-place (OOP) inline ingress test, some of the hardware supports ESP specific flow rule instead of default flow rule. OOP test case will try first using ESP specific flow rule with SPI specified in flow pattern. If ESP rule is not supported then will retry with default flow rule. Signed-off-by: Rahul Bhansali Acked-by: Akhil Goyal --- app/test/test_security_inline_proto.c | 52 ++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c index 09d710eff2f..8b88fce3e99 100644 --- a/app/test/test_security_inline_proto.c +++ b/app/test/test_security_inline_proto.c @@ -542,6 +542,48 @@ init_mempools(unsigned int nb_mbuf) return 0; } +static int +create_ipsec_flow(uint16_t portid, void *ses, uint32_t spi) +{ + struct rte_flow_item_esp esp_spec; + struct rte_flow_action action[2]; + struct rte_flow_item pattern[2]; + struct rte_flow_attr attr = {0}; + struct rte_flow_error err; + struct rte_flow *flow; + int ret; + + esp_spec.hdr.spi = rte_cpu_to_be_32(spi); + + pattern[0].type = RTE_FLOW_ITEM_TYPE_ESP; + pattern[0].spec = &esp_spec; + pattern[0].mask = &rte_flow_item_esp_mask; + pattern[0].last = NULL; + pattern[1].type = RTE_FLOW_ITEM_TYPE_END; + + action[0].type = RTE_FLOW_ACTION_TYPE_SECURITY; + action[0].conf = ses; + action[1].type = RTE_FLOW_ACTION_TYPE_END; + action[1].conf = NULL; + + attr.ingress = 1; + + ret = rte_flow_validate(portid, &attr, pattern, action, &err); + if (ret) { + printf("\nValidate ESP flow failed, ret = %d\n", ret); + return -1; + } + flow = rte_flow_create(portid, &attr, pattern, action, &err); + if (flow == NULL) { + printf("\nESP flow rule create failed\n"); + return -1; + } + + default_flow[portid] = flow; + + return 0; +} + static int create_default_flow(uint16_t portid) { @@ -1373,7 +1415,15 @@ test_ipsec_inline_proto_process(struct ipsec_test_data *td, } if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS) { - ret = create_default_flow(port_id); + if (flags->inb_oop) { + ret = create_ipsec_flow(port_id, ses, td->ipsec_xform.spi); + if (ret) { + /* Check with default flow rule */ + printf("\nFailed to create ESP flow, try with default flow"); + ret = create_default_flow(port_id); + } + } else + ret = create_default_flow(port_id); if (ret) goto out; } From 6f3d4ea3f2ec3c2a3ccaa0a16497727ed4763b40 Mon Sep 17 00:00:00 2001 From: Sunyang Wu Date: Mon, 17 Nov 2025 17:08:51 +0800 Subject: [PATCH 27/28] examples/ipsec-secgw: support SM4-CBC and SM3-HMAC This patch adds support for Chinese cryptographic algorithms in the IPsec security gateway example application: - Add SM4-CBC cipher algorithm support with 16-byte IV and key; - Add SM3-HMAC authentication algorithm support with 20-byte key; - Update SA configuration parsing to recognize "sm4-cbc" and "sm3-hmac" keywords; - Implement proper IV handling and authentication offset/length configuration. These additions enable the IPsec security gateway to use Chinese national cryptographic standards for secure communications. Signed-off-by: Sunyang Wu Acked-by: Akhil Goyal --- doc/guides/sample_app_ug/ipsec_secgw.rst | 9 +++++++++ examples/ipsec-secgw/esp.c | 5 +++++ examples/ipsec-secgw/sa.c | 17 ++++++++++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/guides/sample_app_ug/ipsec_secgw.rst b/doc/guides/sample_app_ug/ipsec_secgw.rst index 7319505fe9f..7c31c96b7c3 100644 --- a/doc/guides/sample_app_ug/ipsec_secgw.rst +++ b/doc/guides/sample_app_ug/ipsec_secgw.rst @@ -576,6 +576,7 @@ where each options means: * *aes-128-ctr*: AES-CTR 128-bit algorithm * *3des-cbc*: 3DES-CBC 192-bit algorithm * *des-cbc*: DES-CBC 64-bit algorithm + * *sm4-cbc*: SM4-CBC 128-bit algorithm * Syntax: *cipher_algo * @@ -605,6 +606,7 @@ where each options means: * *sha1-hmac*: HMAC SHA1 algorithm * *sha256-hmac*: HMAC SHA256 algorithm * *aes-xcbc-mac*: AES XCBC MAC algorithm + * *sm3-hmac*: HMAC SM3 algorithm ```` @@ -820,6 +822,13 @@ Example SA rules: src 1111:1111:1111:1111:1111:1111:1111:5555 \ dst 2222:2222:2222:2222:2222:2222:2222:5555 + sa out 30 cipher_algo sm4-cbc \ + cipher_key 01:23:45:67:89:ab:cd:ef:fe:dc:ba:98:76:54:32:10 \ + auth_algo sm3-hmac \ + auth_key 01:23:45:67:89:ab:cd:ef:fe:dc:ba:98:76:54:32:10:11:22:33:44 \ + mode ipv4-tunnel \ + src 172.16.1.5 dst 172.16.2.5 + sa in 105 aead_algo aes-128-gcm \ aead_key de:ad:be:ef:de:ad:be:ef:de:ad:be:ef:de:ad:be:ef:de:ad:be:ef \ mode ipv4-tunnel src 172.16.2.5 dst 172.16.1.5 diff --git a/examples/ipsec-secgw/esp.c b/examples/ipsec-secgw/esp.c index b72a5604c80..46c3ad3ec73 100644 --- a/examples/ipsec-secgw/esp.c +++ b/examples/ipsec-secgw/esp.c @@ -103,6 +103,7 @@ esp_inbound(struct rte_mbuf *m, struct ipsec_sa *sa, case RTE_CRYPTO_CIPHER_DES_CBC: case RTE_CRYPTO_CIPHER_3DES_CBC: case RTE_CRYPTO_CIPHER_AES_CBC: + case RTE_CRYPTO_CIPHER_SM4_CBC: /* Copy IV at the end of crypto operation */ rte_memcpy(iv_ptr, iv, sa->iv_len); break; @@ -123,6 +124,7 @@ esp_inbound(struct rte_mbuf *m, struct ipsec_sa *sa, case RTE_CRYPTO_AUTH_SHA1_HMAC: case RTE_CRYPTO_AUTH_SHA256_HMAC: case RTE_CRYPTO_AUTH_AES_XCBC_MAC: + case RTE_CRYPTO_AUTH_SM3_HMAC: sym_cop->auth.data.offset = ip_hdr_len; sym_cop->auth.data.length = sizeof(struct rte_esp_hdr) + sa->iv_len + payload_len; @@ -341,6 +343,7 @@ esp_outbound(struct rte_mbuf *m, struct ipsec_sa *sa, case RTE_CRYPTO_CIPHER_DES_CBC: case RTE_CRYPTO_CIPHER_3DES_CBC: case RTE_CRYPTO_CIPHER_AES_CBC: + case RTE_CRYPTO_CIPHER_SM4_CBC: memset(iv, 0, sa->iv_len); break; case RTE_CRYPTO_CIPHER_AES_CTR: @@ -405,6 +408,7 @@ esp_outbound(struct rte_mbuf *m, struct ipsec_sa *sa, case RTE_CRYPTO_CIPHER_DES_CBC: case RTE_CRYPTO_CIPHER_3DES_CBC: case RTE_CRYPTO_CIPHER_AES_CBC: + case RTE_CRYPTO_CIPHER_SM4_CBC: sym_cop->cipher.data.offset = ip_hdr_len + sizeof(struct rte_esp_hdr); sym_cop->cipher.data.length = pad_payload_len + sa->iv_len; @@ -436,6 +440,7 @@ esp_outbound(struct rte_mbuf *m, struct ipsec_sa *sa, case RTE_CRYPTO_AUTH_SHA1_HMAC: case RTE_CRYPTO_AUTH_SHA256_HMAC: case RTE_CRYPTO_AUTH_AES_XCBC_MAC: + case RTE_CRYPTO_AUTH_SM3_HMAC: sym_cop->auth.data.offset = ip_hdr_len; sym_cop->auth.data.length = sizeof(struct rte_esp_hdr) + sa->iv_len + pad_payload_len; diff --git a/examples/ipsec-secgw/sa.c b/examples/ipsec-secgw/sa.c index 313919b4b57..86aeb25a498 100644 --- a/examples/ipsec-secgw/sa.c +++ b/examples/ipsec-secgw/sa.c @@ -128,6 +128,13 @@ const struct supported_cipher_algo cipher_algos[] = { .iv_len = 8, .block_size = 8, .key_len = 8 + }, + { + .keyword = "sm4-cbc", + .algo = RTE_CRYPTO_CIPHER_SM4_CBC, + .iv_len = 16, + .block_size = 16, + .key_len = 16 } }; @@ -175,6 +182,12 @@ const struct supported_auth_algo auth_algos[] = { .algo = RTE_CRYPTO_AUTH_AES_XCBC_MAC, .digest_len = 12, .key_len = 16 + }, + { + .keyword = "sm3-hmac", + .algo = RTE_CRYPTO_AUTH_SM3_HMAC, + .digest_len = 12, + .key_len = 20 } }; @@ -502,7 +515,8 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens, return; if (algo->algo == RTE_CRYPTO_CIPHER_AES_CBC || - algo->algo == RTE_CRYPTO_CIPHER_3DES_CBC) + algo->algo == RTE_CRYPTO_CIPHER_3DES_CBC || + algo->algo == RTE_CRYPTO_CIPHER_SM4_CBC) rule->salt = (uint32_t)rte_rand(); if (algo->algo == RTE_CRYPTO_CIPHER_AES_CTR) { @@ -1319,6 +1333,7 @@ sa_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[], case RTE_CRYPTO_CIPHER_DES_CBC: case RTE_CRYPTO_CIPHER_3DES_CBC: case RTE_CRYPTO_CIPHER_AES_CBC: + case RTE_CRYPTO_CIPHER_SM4_CBC: iv_length = sa->iv_len; break; case RTE_CRYPTO_CIPHER_AES_CTR: From c8b0f9bbcfe38c60f39c7582345415d1970ae990 Mon Sep 17 00:00:00 2001 From: gong-flying Date: Thu, 12 Feb 2026 15:23:06 +0800 Subject: [PATCH 28/28] feat:add riscv support for hash --- config/riscv/meson.build | 2 +- lib/hash/rte_cmp_riscv.h | 93 ++++++++++++++++++++++++++++++++++++++ lib/hash/rte_cuckoo_hash.c | 2 +- lib/hash/rte_cuckoo_hash.h | 6 ++- 4 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 lib/hash/rte_cmp_riscv.h diff --git a/config/riscv/meson.build b/config/riscv/meson.build index 07d7d9da230..712d02ff919 100644 --- a/config/riscv/meson.build +++ b/config/riscv/meson.build @@ -43,7 +43,7 @@ vendor_generic = { ['RTE_MAX_NUMA_NODES', 2] ], 'arch_config': { - 'generic': {'machine_args': ['-march=rv64gc']} + 'generic': {'machine_args': ['-march=rv64gc_zve64x']} } } diff --git a/lib/hash/rte_cmp_riscv.h b/lib/hash/rte_cmp_riscv.h new file mode 100644 index 00000000000..b2ae0ce2875 --- /dev/null +++ b/lib/hash/rte_cmp_riscv.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 Intel Corporation + */ + +#include + +/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ +static inline int +rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const uint8_t *p1 = (const uint8_t *)key1; + const uint8_t *p2 = (const uint8_t *)key2; + size_t offset = 0; + + while (offset < 16) { + size_t vl = __riscv_vsetvl_e8m1(16 - offset); + + vuint8m1_t v1 = __riscv_vle8_v_u8m1(p1 + offset, vl); + vuint8m1_t v2 = __riscv_vle8_v_u8m1(p2 + offset, vl); + + /* find != bytes */ + vbool8_t neq = __riscv_vmsne_vv_u8m1_b8(v1, v2, vl); + + /* if any byte mismatches ¡ú not equal */ + if (__riscv_vfirst_m_b8(neq, vl) >= 0) + return 1; + + offset += vl; + } + + /* all bytes equal */ + return 0; +} + +static inline int +rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len); +} + +static inline int +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static inline int +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k32_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static inline int +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static inline int +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static inline int +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 96, + (const char *) key2 + 96, key_len); +} + +static inline int +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k64_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c index 9cf94645f6b..bca876fc98b 100644 --- a/lib/hash/rte_cuckoo_hash.c +++ b/lib/hash/rte_cuckoo_hash.c @@ -357,7 +357,7 @@ rte_hash_create(const struct rte_hash_parameters *params) * If x86 architecture is used, select appropriate compare function, * which may use x86 intrinsics, otherwise use memcmp */ -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) || defined(RTE_ARCH_RISCV) /* Select function to compare keys */ switch (params->key_len) { case 16: diff --git a/lib/hash/rte_cuckoo_hash.h b/lib/hash/rte_cuckoo_hash.h index a528f1d1a03..a715be910bb 100644 --- a/lib/hash/rte_cuckoo_hash.h +++ b/lib/hash/rte_cuckoo_hash.h @@ -21,6 +21,10 @@ #include "rte_cmp_arm64.h" #endif +#if defined(RTE_ARCH_RISCV) +#include "rte_cmp_riscv.h" +#endif + /* Macro to enable/disable run-time checking of function parameters */ #if defined(RTE_LIBRTE_HASH_DEBUG) #define RETURN_IF_TRUE(cond, retval) do { \ @@ -34,7 +38,7 @@ #include #include -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) || defined(RTE_ARCH_RISCV) /* * All different options to select a key compare function, * based on the key size and custom function.