Skip to content

Commit 78a4753

Browse files
author
Paolo Abeni
committed
Merge branch 'mlx5-misc-fixes-2025-12-09'
Tariq Toukan says: ==================== mlx5 misc fixes 2025-12-09 This patchset provides misc bug fixes from the team to the mlx5 core and Eth drivers. ==================== Link: https://patch.msgid.link/1765284977-1363052-1-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2 parents 71e6b15 + 4198a14 commit 78a4753

File tree

10 files changed

+152
-23
lines changed

10 files changed

+152
-23
lines changed

drivers/net/ethernet/mellanox/mlx5/core/devlink.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
197197
struct pci_dev *pdev = dev->pdev;
198198
int ret = 0;
199199

200+
if (mlx5_fw_reset_in_progress(dev)) {
201+
NL_SET_ERR_MSG_MOD(extack, "Can't reload during firmware reset");
202+
return -EBUSY;
203+
}
204+
200205
if (mlx5_dev_is_lightweight(dev)) {
201206
if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
202207
return -EOPNOTSUPP;

drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c

Lines changed: 84 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "lib/eq.h"
3434
#include "fw_tracer.h"
3535
#include "fw_tracer_tracepoint.h"
36+
#include <linux/ctype.h>
3637

3738
static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
3839
{
@@ -358,6 +359,47 @@ static const char *VAL_PARM = "%llx";
358359
static const char *REPLACE_64_VAL_PARM = "%x%x";
359360
static const char *PARAM_CHAR = "%";
360361

362+
static bool mlx5_is_valid_spec(const char *str)
363+
{
364+
/* Parse format specifiers to find the actual type.
365+
* Structure: %[flags][width][.precision][length]type
366+
* Skip flags, width, precision & length.
367+
*/
368+
while (isdigit(*str) || *str == '#' || *str == '.' || *str == 'l')
369+
str++;
370+
371+
/* Check if it's a valid integer/hex specifier or %%:
372+
* Valid formats: %x, %d, %i, %u, etc.
373+
*/
374+
if (*str != 'x' && *str != 'X' && *str != 'd' && *str != 'i' &&
375+
*str != 'u' && *str != 'c' && *str != '%')
376+
return false;
377+
378+
return true;
379+
}
380+
381+
static bool mlx5_tracer_validate_params(const char *str)
382+
{
383+
const char *substr = str;
384+
385+
if (!str)
386+
return false;
387+
388+
substr = strstr(substr, PARAM_CHAR);
389+
while (substr) {
390+
if (!mlx5_is_valid_spec(substr + 1))
391+
return false;
392+
393+
if (*(substr + 1) == '%')
394+
substr = strstr(substr + 2, PARAM_CHAR);
395+
else
396+
substr = strstr(substr + 1, PARAM_CHAR);
397+
398+
}
399+
400+
return true;
401+
}
402+
361403
static int mlx5_tracer_message_hash(u32 message_id)
362404
{
363405
return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1);
@@ -419,6 +461,10 @@ static int mlx5_tracer_get_num_of_params(char *str)
419461
char *substr, *pstr = str;
420462
int num_of_params = 0;
421463

464+
/* Validate that all parameters are valid before processing */
465+
if (!mlx5_tracer_validate_params(str))
466+
return -EINVAL;
467+
422468
/* replace %llx with %x%x */
423469
substr = strstr(pstr, VAL_PARM);
424470
while (substr) {
@@ -427,11 +473,15 @@ static int mlx5_tracer_get_num_of_params(char *str)
427473
substr = strstr(pstr, VAL_PARM);
428474
}
429475

430-
/* count all the % characters */
476+
/* count all the % characters, but skip %% (escaped percent) */
431477
substr = strstr(str, PARAM_CHAR);
432478
while (substr) {
433-
num_of_params += 1;
434-
str = substr + 1;
479+
if (*(substr + 1) != '%') {
480+
num_of_params += 1;
481+
str = substr + 1;
482+
} else {
483+
str = substr + 2;
484+
}
435485
substr = strstr(str, PARAM_CHAR);
436486
}
437487

@@ -570,14 +620,17 @@ void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
570620
{
571621
char tmp[512];
572622

573-
snprintf(tmp, sizeof(tmp), str_frmt->string,
574-
str_frmt->params[0],
575-
str_frmt->params[1],
576-
str_frmt->params[2],
577-
str_frmt->params[3],
578-
str_frmt->params[4],
579-
str_frmt->params[5],
580-
str_frmt->params[6]);
623+
if (str_frmt->invalid_string)
624+
snprintf(tmp, sizeof(tmp), "BAD_FORMAT: %s", str_frmt->string);
625+
else
626+
snprintf(tmp, sizeof(tmp), str_frmt->string,
627+
str_frmt->params[0],
628+
str_frmt->params[1],
629+
str_frmt->params[2],
630+
str_frmt->params[3],
631+
str_frmt->params[4],
632+
str_frmt->params[5],
633+
str_frmt->params[6]);
581634

582635
trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
583636
str_frmt->event_id, tmp);
@@ -609,6 +662,13 @@ static int mlx5_tracer_handle_raw_string(struct mlx5_fw_tracer *tracer,
609662
return 0;
610663
}
611664

665+
static void mlx5_tracer_handle_bad_format_string(struct mlx5_fw_tracer *tracer,
666+
struct tracer_string_format *cur_string)
667+
{
668+
cur_string->invalid_string = true;
669+
list_add_tail(&cur_string->list, &tracer->ready_strings_list);
670+
}
671+
612672
static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
613673
struct tracer_event *tracer_event)
614674
{
@@ -619,12 +679,18 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
619679
if (!cur_string)
620680
return mlx5_tracer_handle_raw_string(tracer, tracer_event);
621681

622-
cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
623-
cur_string->last_param_num = 0;
624682
cur_string->event_id = tracer_event->event_id;
625683
cur_string->tmsn = tracer_event->string_event.tmsn;
626684
cur_string->timestamp = tracer_event->string_event.timestamp;
627685
cur_string->lost = tracer_event->lost_event;
686+
cur_string->last_param_num = 0;
687+
cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
688+
if (cur_string->num_of_params < 0) {
689+
pr_debug("%s Invalid format string parameters\n",
690+
__func__);
691+
mlx5_tracer_handle_bad_format_string(tracer, cur_string);
692+
return 0;
693+
}
628694
if (cur_string->num_of_params == 0) /* trace with no params */
629695
list_add_tail(&cur_string->list, &tracer->ready_strings_list);
630696
} else {
@@ -634,6 +700,11 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
634700
__func__, tracer_event->string_event.tmsn);
635701
return mlx5_tracer_handle_raw_string(tracer, tracer_event);
636702
}
703+
if (cur_string->num_of_params < 0) {
704+
pr_debug("%s string parameter of invalid string, dumping\n",
705+
__func__);
706+
return 0;
707+
}
637708
cur_string->last_param_num += 1;
638709
if (cur_string->last_param_num > TRACER_MAX_PARAMS) {
639710
pr_debug("%s Number of params exceeds the max (%d)\n",

drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ struct tracer_string_format {
125125
struct list_head list;
126126
u32 timestamp;
127127
bool lost;
128+
bool invalid_string;
128129
};
129130

130131
enum mlx5_fw_tracer_ownership_state {

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ struct page_pool;
6969
#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
7070
#define MLX5E_METADATA_ETHER_LEN 8
7171

72-
#define MLX5E_ETH_HARD_MTU (ETH_HLEN + PSP_ENCAP_HLEN + PSP_TRL_SIZE + VLAN_HLEN + ETH_FCS_LEN)
72+
#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
7373

7474
#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
7575
#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,8 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
342342
rt_dst_entry = &rt->dst;
343343
break;
344344
case AF_INET6:
345-
rt_dst_entry = ipv6_stub->ipv6_dst_lookup_flow(
346-
dev_net(netdev), NULL, &fl6, NULL);
347-
if (IS_ERR(rt_dst_entry))
345+
if (!IS_ENABLED(CONFIG_IPV6) ||
346+
ip6_dst_lookup(dev_net(netdev), NULL, &rt_dst_entry, &fl6))
348347
goto neigh;
349348
break;
350349
default:
@@ -359,6 +358,9 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
359358

360359
neigh_ha_snapshot(addr, n, netdev);
361360
ether_addr_copy(dst, addr);
361+
if (attrs->dir == XFRM_DEV_OFFLOAD_OUT &&
362+
is_zero_ether_addr(addr))
363+
neigh_event_send(n, NULL);
362364
dst_release(rt_dst_entry);
363365
neigh_release(n);
364366
return;

drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -939,7 +939,11 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
939939
sq->dma_fifo_cc = dma_fifo_cc;
940940
sq->cc = sqcc;
941941

942-
netdev_tx_completed_queue(sq->txq, npkts, nbytes);
942+
/* Do not update BQL for TXQs that got replaced by new active ones, as
943+
* netdev_tx_reset_queue() is called for them in mlx5e_activate_txqsq().
944+
*/
945+
if (sq == sq->priv->txq2sq[sq->txq_ix])
946+
netdev_tx_completed_queue(sq->txq, npkts, nbytes);
943947
}
944948

945949
#ifdef CONFIG_MLX5_CORE_IPOIB

drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "devlink.h"
5353
#include "lag/lag.h"
5454
#include "en/tc/post_meter.h"
55+
#include "fw_reset.h"
5556

5657
/* There are two match-all miss flows, one for unicast dst mac and
5758
* one for multicast.
@@ -3991,6 +3992,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
39913992
if (IS_ERR(esw))
39923993
return PTR_ERR(esw);
39933994

3995+
if (mlx5_fw_reset_in_progress(esw->dev)) {
3996+
NL_SET_ERR_MSG_MOD(extack, "Can't change eswitch mode during firmware reset");
3997+
return -EBUSY;
3998+
}
3999+
39944000
if (esw_mode_from_devlink(mode, &mlx5_mode))
39954001
return -EINVAL;
39964002

drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ enum {
1515
MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
1616
MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED,
1717
MLX5_FW_RESET_FLAGS_UNLOAD_EVENT,
18+
MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS,
1819
};
1920

2021
struct mlx5_fw_reset {
@@ -128,6 +129,16 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty
128129
return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL);
129130
}
130131

132+
bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev)
133+
{
134+
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
135+
136+
if (!fw_reset)
137+
return false;
138+
139+
return test_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
140+
}
141+
131142
static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev,
132143
u8 *reset_method)
133144
{
@@ -243,6 +254,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
243254
BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
244255
devl_unlock(devlink);
245256
}
257+
258+
clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
246259
}
247260

248261
static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
@@ -462,27 +475,48 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
462475
struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
463476
reset_request_work);
464477
struct mlx5_core_dev *dev = fw_reset->dev;
478+
bool nack_request = false;
479+
struct devlink *devlink;
465480
int err;
466481

467482
err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method);
468-
if (err)
483+
if (err) {
484+
nack_request = true;
469485
mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err);
486+
} else if (!mlx5_is_reset_now_capable(dev, fw_reset->reset_method) ||
487+
test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
488+
&fw_reset->reset_flags)) {
489+
nack_request = true;
490+
}
470491

471-
if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
472-
!mlx5_is_reset_now_capable(dev, fw_reset->reset_method)) {
492+
devlink = priv_to_devlink(dev);
493+
/* For external resets, try to acquire devl_lock. Skip if devlink reset is
494+
* pending (lock already held)
495+
*/
496+
if (nack_request ||
497+
(!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP,
498+
&fw_reset->reset_flags) &&
499+
!devl_trylock(devlink))) {
473500
err = mlx5_fw_reset_set_reset_sync_nack(dev);
474501
mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
475502
err ? "Failed" : "Sent");
476503
return;
477504
}
505+
478506
if (mlx5_sync_reset_set_reset_requested(dev))
479-
return;
507+
goto unlock;
508+
509+
set_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
480510

481511
err = mlx5_fw_reset_set_reset_sync_ack(dev);
482512
if (err)
483513
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
484514
else
485515
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
516+
517+
unlock:
518+
if (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags))
519+
devl_unlock(devlink);
486520
}
487521

488522
static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id)
@@ -722,6 +756,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
722756

723757
if (mlx5_sync_reset_clear_reset_requested(dev, true))
724758
return;
759+
760+
clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
725761
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
726762
}
727763

@@ -758,6 +794,7 @@ static void mlx5_sync_reset_timeout_work(struct work_struct *work)
758794

759795
if (mlx5_sync_reset_clear_reset_requested(dev, true))
760796
return;
797+
clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
761798
mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n");
762799
}
763800

@@ -844,7 +881,8 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
844881
cancel_work_sync(&fw_reset->reset_reload_work);
845882
cancel_work_sync(&fw_reset->reset_now_work);
846883
cancel_work_sync(&fw_reset->reset_abort_work);
847-
cancel_delayed_work(&fw_reset->reset_timeout_work);
884+
if (test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
885+
mlx5_sync_reset_clear_reset_requested(dev, true);
848886
}
849887

850888
static const struct devlink_param mlx5_fw_reset_devlink_params[] = {

drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty
1010
int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
1111
struct netlink_ext_ack *extack);
1212
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
13+
bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev);
1314

1415
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
1516
void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked);

drivers/net/ethernet/mellanox/mlx5/core/main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,6 +2232,7 @@ static void shutdown(struct pci_dev *pdev)
22322232

22332233
mlx5_core_info(dev, "Shutdown was called\n");
22342234
set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
2235+
mlx5_drain_fw_reset(dev);
22352236
mlx5_drain_health_wq(dev);
22362237
err = mlx5_try_fast_unload(dev);
22372238
if (err)

0 commit comments

Comments
 (0)