From 9282a1e171ad8d2205067e8ec3bbe4e3cef4f29f Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Mon, 5 Jan 2026 22:04:38 +0100 Subject: [PATCH 01/96] wifi: ath10k: fix dma_free_coherent() pointer dma_alloc_coherent() allocates a DMA mapped buffer and stores the addresses in XXX_unaligned fields. Those should be reused when freeing the buffer rather than the aligned addresses. Fixes: 2a1e1ad3fd37 ("ath10k: Add support for 64 bit ce descriptor") Cc: stable@vger.kernel.org Signed-off-by: Thomas Fourier Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260105210439.20131-2-fourier.thomas@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/ce.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index 7bbda46cfd93..82f120ee1c66 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -1727,8 +1727,8 @@ static void _ath10k_ce_free_pipe(struct ath10k *ar, int ce_id) (ce_state->src_ring->nentries * sizeof(struct ce_desc) + CE_DESC_RING_ALIGN), - ce_state->src_ring->base_addr_owner_space, - ce_state->src_ring->base_addr_ce_space); + ce_state->src_ring->base_addr_owner_space_unaligned, + ce_state->src_ring->base_addr_ce_space_unaligned); kfree(ce_state->src_ring); } @@ -1737,8 +1737,8 @@ static void _ath10k_ce_free_pipe(struct ath10k *ar, int ce_id) (ce_state->dest_ring->nentries * sizeof(struct ce_desc) + CE_DESC_RING_ALIGN), - ce_state->dest_ring->base_addr_owner_space, - ce_state->dest_ring->base_addr_ce_space); + ce_state->dest_ring->base_addr_owner_space_unaligned, + ce_state->dest_ring->base_addr_ce_space_unaligned); kfree(ce_state->dest_ring); } @@ -1758,8 +1758,8 @@ static void _ath10k_ce_free_pipe_64(struct ath10k *ar, int ce_id) (ce_state->src_ring->nentries * sizeof(struct ce_desc_64) + CE_DESC_RING_ALIGN), - ce_state->src_ring->base_addr_owner_space, - ce_state->src_ring->base_addr_ce_space); + ce_state->src_ring->base_addr_owner_space_unaligned, + ce_state->src_ring->base_addr_ce_space_unaligned); kfree(ce_state->src_ring); } @@ -1768,8 +1768,8 @@ static void _ath10k_ce_free_pipe_64(struct ath10k *ar, int ce_id) (ce_state->dest_ring->nentries * sizeof(struct ce_desc_64) + CE_DESC_RING_ALIGN), - ce_state->dest_ring->base_addr_owner_space, - ce_state->dest_ring->base_addr_ce_space); + ce_state->dest_ring->base_addr_owner_space_unaligned, + ce_state->dest_ring->base_addr_ce_space_unaligned); kfree(ce_state->dest_ring); } From bb97131fbf9b708dd9616ac2bdc793ad102b5c48 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Tue, 6 Jan 2026 09:49:04 +0100 Subject: [PATCH 02/96] wifi: ath12k: fix dma_free_coherent() pointer dma_alloc_coherent() allocates a DMA mapped buffer and stores the addresses in XXX_unaligned fields. Those should be reused when freeing the buffer rather than the aligned addresses. Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Cc: stable@vger.kernel.org Signed-off-by: Thomas Fourier Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260106084905.18622-2-fourier.thomas@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/ce.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/ce.c b/drivers/net/wireless/ath/ath12k/ce.c index 9a63608838ac..4aea58446838 100644 --- a/drivers/net/wireless/ath/ath12k/ce.c +++ b/drivers/net/wireless/ath/ath12k/ce.c @@ -984,8 +984,8 @@ void ath12k_ce_free_pipes(struct ath12k_base *ab) dma_free_coherent(ab->dev, pipe->src_ring->nentries * desc_sz + CE_DESC_RING_ALIGN, - pipe->src_ring->base_addr_owner_space, - pipe->src_ring->base_addr_ce_space); + pipe->src_ring->base_addr_owner_space_unaligned, + pipe->src_ring->base_addr_ce_space_unaligned); kfree(pipe->src_ring); pipe->src_ring = NULL; } @@ -995,8 +995,8 @@ void ath12k_ce_free_pipes(struct ath12k_base *ab) dma_free_coherent(ab->dev, pipe->dest_ring->nentries * desc_sz + CE_DESC_RING_ALIGN, - pipe->dest_ring->base_addr_owner_space, - pipe->dest_ring->base_addr_ce_space); + pipe->dest_ring->base_addr_owner_space_unaligned, + pipe->dest_ring->base_addr_ce_space_unaligned); kfree(pipe->dest_ring); pipe->dest_ring = NULL; } @@ -1007,8 +1007,8 @@ void ath12k_ce_free_pipes(struct ath12k_base *ab) dma_free_coherent(ab->dev, pipe->status_ring->nentries * desc_sz + CE_DESC_RING_ALIGN, - pipe->status_ring->base_addr_owner_space, - pipe->status_ring->base_addr_ce_space); + pipe->status_ring->base_addr_owner_space_unaligned, + pipe->status_ring->base_addr_ce_space_unaligned); kfree(pipe->status_ring); pipe->status_ring = NULL; } From 1fed08c5519d2f929457f354d3c06c6a8c33829c Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 8 Jan 2026 11:21:46 +0800 Subject: [PATCH 03/96] wifi: ath12k: don't force radio frequency check in freq_to_idx() freq_to_idx() is used to map a channel to a survey index. Commit acc152f9be20 ("wifi: ath12k: combine channel list for split-phy devices in single-wiphy") adds radio specific frequency range check in this helper to make sure an invalid index is returned if the channel falls outside that range. However, this check introduces a race, resulting in below warnings as reported in [1]. ath12k_pci 0000:08:00.0: chan info: invalid frequency 6455 (idx 101 out of bounds) ath12k_pci 0000:08:00.0: chan info: invalid frequency 6535 (idx 101 out of bounds) ath12k_pci 0000:08:00.0: chan info: invalid frequency 6615 (idx 101 out of bounds) ath12k_pci 0000:08:00.0: chan info: invalid frequency 6695 (idx 101 out of bounds) ath12k_pci 0000:08:00.0: chan info: invalid frequency 6775 (idx 101 out of bounds) ath12k_pci 0000:08:00.0: chan info: invalid frequency 6855 (idx 101 out of bounds) ath12k_pci 0000:08:00.0: chan info: invalid frequency 6935 (idx 101 out of bounds) ath12k_pci 0000:08:00.0: chan info: invalid frequency 7015 (idx 101 out of bounds) ath12k_pci 0000:08:00.0: chan info: invalid frequency 7095 (idx 101 out of bounds) ath12k_pci 0000:08:00.0: chan info: invalid frequency 6435 (idx 101 out of bounds) Race scenario: 1) A regdomain covering below frequency range is uploaded to host via WMI_REG_CHAN_LIST_CC_EXT_EVENTID event: Country 00, CFG Regdomain UNSET FW Regdomain 0, num_reg_rules 6 1. (2402 - 2472 @ 40) (0, 20) (0 ms) (FLAGS 360448) (0, 0) 2. (2457 - 2477 @ 20) (0, 20) (0 ms) (FLAGS 360576) (0, 0) 3. (5170 - 5330 @ 160) (0, 20) (0 ms) (FLAGS 264320) (0, 0) 4. (5490 - 5730 @ 160) (0, 20) (0 ms) (FLAGS 264320) (0, 0) 5. (5735 - 5895 @ 160) (0, 20) (0 ms) (FLAGS 264320) (0, 0) 6. (5925 - 7125 @ 320) (0, 24) (0 ms) (FLAGS 2056) (0, 255) As a result, radio frequency range is updated as [2402, 7125] ath12k_pci 0000:08:00.0: mac pdev 0 freq limit updated. New range 2402->7125 MHz If no scan in progress or after scan finished, command WMI_SCAN_CHAN_LIST_CMDID is sent to firmware notifying that firmware is allowed to do scan on all channels within that range. The running path is: /* redomain uploaded */ 1. WMI_REG_CHAN_LIST_CC_EXT_EVENTID 2. ath12k_reg_chan_list_event() 3. ath12k_reg_handle_chan_list() 4. queue_work(..., &ar->regd_update_work) 5. ath12k_regd_update_work() 6. ath12k_regd_update() /* update radio frequency range */ 7. ath12k_mac_update_freq_range() 8. regulatory_set_wiphy_regd() 9. ath12k_reg_notifier() 10. ath12k_reg_update_chan_list() 11. queue_work(..., &ar->regd_channel_update_work) 12. ath12k_regd_update_chan_list_work() /* wait scan finishes */ 13. wait_for_completion_timeout(&ar->scan.completed, ...) /* command notifying list of valid channels */ 14. ath12k_wmi_send_scan_chan_list_cmd() 2) Hardware scan is triggered on all allowed channels. 3) Before scan completed, 11D mechanism detects a new country code ath12k_pci 0000:08:00.0: wmi 11d new cc GB With this code sent to firmware, firmware uploads a new regdomain Country GB, CFG Regdomain ETSI FW Regdomain 2, num_reg_rules 9 1. (2402 - 2482 @ 40) (0, 20) (0 ms) (FLAGS 360448) (0, 0) 2. (5170 - 5250 @ 80) (0, 23) (0 ms) (FLAGS 264192) (0, 0) 3. (5250 - 5330 @ 80) (0, 23) (0 ms) (FLAGS 264216) (0, 0) 4. (5490 - 5590 @ 80) (0, 30) (0 ms) (FLAGS 264208) 5. (5590 - 5650 @ 40) (0, 30) (600000 ms) (FLAGS 264208) 6. (5650 - 5730 @ 80) (0, 30) (0 ms) (FLAGS 264208) 7. (5735 - 5875 @ 80) (0, 14) (0 ms) (FLAGS 264192) (0, 0) 8. (5855 - 5875 @ 20) (0, 14) (0 ms) (FLAGS 264192) (0, 0) 9. (5945 - 6425 @ 320) (0, 24) (0 ms) (FLAGS 2056) (0, 11) Then radio frequency range is updated as [2402, 6425] ath12k_pci 0000:08:00.0: mac pdev 0 freq limit updated. New range 2402->6425 MHz Please note this is a smaller range than the previous one. Later host runs the same path for the purpose of notifying the new channel list. However since scan not completed, host just waits there. Meanwhile, firmware is possibly scanning channels outside the new range. As a result, WMI_CHAN_INFO_EVENTID events for those channels fail freq_to_idx() check and triggers warnings above. Fix this issue by removing radio frequency check in freq_to_idx(). This is valid because channels being scanned do not synchronize with frequency range update. Besides, this won't cause any problem, since freq_to_idx() is only used for survey data. Even out-of-range channels filled in the survey, they won't get delivered to userspace due to the range check already there in ath12k_mac_op_get_survey(). Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: acc152f9be20 ("wifi: ath12k: combine channel list for split-phy devices in single-wiphy") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220871 # 1 Signed-off-by: Baochen Qiang Link: https://patch.msgid.link/20260108-ath12k-fix-freq-to-idx-v1-1-b2458cf7aa0d@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index be8b2943094f..3ce5fcb0e460 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -6575,16 +6575,9 @@ static int freq_to_idx(struct ath12k *ar, int freq) if (!sband) continue; - for (ch = 0; ch < sband->n_channels; ch++, idx++) { - if (sband->channels[ch].center_freq < - KHZ_TO_MHZ(ar->freq_range.start_freq) || - sband->channels[ch].center_freq > - KHZ_TO_MHZ(ar->freq_range.end_freq)) - continue; - + for (ch = 0; ch < sband->n_channels; ch++, idx++) if (sband->channels[ch].center_freq == freq) goto exit; - } } exit: From 4f431d88ea8093afc7ba55edf4652978c5a68f33 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 10 Jan 2026 00:56:29 +0100 Subject: [PATCH 04/96] wifi: rsi: Fix memory corruption due to not set vif driver data size The struct ieee80211_vif contains trailing space for vif driver data, when struct ieee80211_vif is allocated, the total memory size that is allocated is sizeof(struct ieee80211_vif) + size of vif driver data. The size of vif driver data is set by each WiFi driver as needed. The RSI911x driver does not set vif driver data size, no trailing space for vif driver data is therefore allocated past struct ieee80211_vif . The RSI911x driver does however use the vif driver data to store its vif driver data structure "struct vif_priv". An access to vif->drv_priv leads to access out of struct ieee80211_vif bounds and corruption of some memory. In case of the failure observed locally, rsi_mac80211_add_interface() would write struct vif_priv *vif_info = (struct vif_priv *)vif->drv_priv; vif_info->vap_id = vap_idx. This write corrupts struct fq_tin member struct list_head new_flows . The flow = list_first_entry(head, struct fq_flow, flowchain); in fq_tin_reset() then reports non-NULL bogus address, which when accessed causes a crash. The trigger is very simple, boot the machine with init=/bin/sh , mount devtmpfs, sysfs, procfs, and then do "ip link set wlan0 up", "sleep 1", "ip link set wlan0 down" and the crash occurs. Fix this by setting the correct size of vif driver data, which is the size of "struct vif_priv", so that memory is allocated and the driver can store its driver data in it, instead of corrupting memory around it. Cc: stable@vger.kernel.org Fixes: dad0d04fa7ba ("rsi: Add RS9113 wireless driver") Signed-off-by: Marek Vasut Link: https://patch.msgid.link/20260109235817.150330-1-marex@nabladev.com Signed-off-by: Johannes Berg --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index f3a853edfc11..8c8e074a3a70 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -2035,6 +2035,7 @@ int rsi_mac80211_attach(struct rsi_common *common) hw->queues = MAX_HW_QUEUES; hw->extra_tx_headroom = RSI_NEEDED_HEADROOM; + hw->vif_data_size = sizeof(struct vif_priv); hw->max_rates = 1; hw->max_rate_tries = MAX_RETRIES; From a3034bf0746d88a00cceda9541534a5721445a24 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Fri, 9 Jan 2026 20:30:04 +0530 Subject: [PATCH 05/96] wifi: cfg80211: Fix bitrate calculation overflow for HE rates An integer overflow occurs in cfg80211_calculate_bitrate_he() when calculating bitrates for high throughput HE configurations. For example, with 160 MHz bandwidth, HE-MCS 13, HE-NSS 4, and HE-GI 0, the multiplication (result * rate->nss) overflows the 32-bit 'result' variable before division by 8, leading to significantly underestimated bitrate values. The overflow occurs because the NSS multiplication operates on a 32-bit integer that cannot accommodate intermediate values exceeding 4,294,967,295. When overflow happens, the value wraps around, producing incorrect bitrates for high MCS and NSS combinations. Fix this by utilizing the 64-bit 'tmp' variable for the NSS multiplication and subsequent divisions via do_div(). This approach preserves full precision throughout the entire calculation, with the final value assigned to 'result' only after completing all operations. Signed-off-by: Veerendranath Jakkam Link: https://patch.msgid.link/20260109-he_bitrate_overflow-v1-1-95575e466b6e@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/wireless/util.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 27e8a2f52f04..4f581aed45b7 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1561,12 +1561,14 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) tmp = result; tmp *= SCALE; do_div(tmp, mcs_divisors[rate->mcs]); - result = tmp; /* and take NSS, DCM into account */ - result = (result * rate->nss) / 8; + tmp *= rate->nss; + do_div(tmp, 8); if (rate->he_dcm) - result /= 2; + do_div(tmp, 2); + + result = tmp; return result / 10000; } From db1d0b6ab11f612ea8a327663a578c8946efeee9 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 11 Jan 2026 19:19:30 +0200 Subject: [PATCH 06/96] wifi: mac80211: correctly check if CSA is active We are not adding an interface if an existing one is doing CSA. But the check won't work for MLO station interfaces, since for those, vif->bss_conf is zeroed out. Fix this by checking if any link of the vif has an active CSA. Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260111191912.7ceff62fc561.Ia38d27f42684d1cfd82d930d232bd5dea6ab9282@changeid Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7b0aa24c1f97..515384ca2f8f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -350,6 +350,8 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, /* we hold the RTNL here so can safely walk the list */ list_for_each_entry(nsdata, &local->interfaces, list) { if (nsdata != sdata && ieee80211_sdata_running(nsdata)) { + struct ieee80211_link_data *link; + /* * Only OCB and monitor mode may coexist */ @@ -376,8 +378,10 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, * will not add another interface while any channel * switch is active. */ - if (nsdata->vif.bss_conf.csa_active) - return -EBUSY; + for_each_link_data(nsdata, link) { + if (link->conf->csa_active) + return -EBUSY; + } /* * The remaining checks are only performed for interfaces From 2120f3a3738a65730c81bf10447b1ff776078915 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Jan 2026 23:00:24 +0300 Subject: [PATCH 07/96] wifi: mwifiex: Fix a loop in mwifiex_update_ampdu_rxwinsize() The "i" iterator variable is used to count two different things but unfortunately we can't store two different numbers in the same variable. Use "i" for the outside loop and "j" for the inside loop. Cc: stable@vger.kernel.org Fixes: d219b7eb3792 ("mwifiex: handle BT coex event to adjust Rx BA window size") Signed-off-by: Dan Carpenter Reviewed-by: Jeff Chen Link: https://patch.msgid.link/aWAM2MGUWRP0zWUd@stanley.mountain Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index 354c5ce66045..f3397dc6c422 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -825,7 +825,7 @@ void mwifiex_update_rxreor_flags(struct mwifiex_adapter *adapter, u8 flags) static void mwifiex_update_ampdu_rxwinsize(struct mwifiex_adapter *adapter, bool coex_flag) { - u8 i; + u8 i, j; u32 rx_win_size; struct mwifiex_private *priv; @@ -863,8 +863,8 @@ static void mwifiex_update_ampdu_rxwinsize(struct mwifiex_adapter *adapter, if (rx_win_size != priv->add_ba_param.rx_win_size) { if (!priv->media_connected) continue; - for (i = 0; i < MAX_NUM_TID; i++) - mwifiex_11n_delba(priv, i); + for (j = 0; j < MAX_NUM_TID; j++) + mwifiex_11n_delba(priv, j); } } } From 39c90b1a1dbe6d7c49d19da6e5aec00980c55d8b Mon Sep 17 00:00:00 2001 From: Manish Dharanenthiran Date: Wed, 7 Jan 2026 11:32:35 +0530 Subject: [PATCH 08/96] wifi: ath12k: cancel scan only on active scan vdev Cancel the scheduled scan request only on the vdev that has an active scan running. Currently, ahvif->links_map is used to obtain the links, but this includes links for which no scan is scheduled. In failure cases where the scan fails due to an invalid channel definition, other links which are not yet brought up (vdev not created) may also be accessed, leading to the following trace: Unable to handle kernel paging request at virtual address 0000000000004c8c pc : _raw_spin_lock_bh+0x1c/0x54 lr : ath12k_scan_abort+0x20/0xc8 [ath12k] Call trace: _raw_spin_lock_bh+0x1c/0x54 (P) ath12k_mac_op_cancel_hw_scan+0xac/0xc4 [ath12k] ieee80211_scan_cancel+0xcc/0x12c [mac80211] ieee80211_do_stop+0x6c4/0x7a8 [mac80211] ieee80211_stop+0x60/0xd8 [mac80211] Skip links that are not created or are not the current scan vdev. This ensures only the scan for the matching links is aborted and avoids aborting unrelated links during cancellation, thus aligning with how start/cleanup manage ar->scan.arvif. Also, remove the redundant arvif->is_started check from ath12k_mac_op_cancel_hw_scan() that was introduced in commit 3863f014ad23 ("wifi: ath12k: symmetrize scan vdev creation and deletion during HW scan") to avoid deleting the scan interface if the scan is triggered on the existing AP vdev as this use case is already handled in ath12k_scan_vdev_clean_work(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: feed05f1526e ("wifi: ath12k: Split scan request for split band device") Signed-off-by: Manish Dharanenthiran Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260107-scan_vdev-v1-1-b600aedc645a@qti.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index f7a2a544bef2..088b5ffd0797 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -5495,7 +5495,8 @@ static void ath12k_mac_op_cancel_hw_scan(struct ieee80211_hw *hw, for_each_set_bit(link_id, &links_map, ATH12K_NUM_MAX_LINKS) { arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); - if (!arvif || arvif->is_started) + if (!arvif || !arvif->is_created || + arvif->ar->scan.arvif != arvif) continue; ar = arvif->ar; From 8b8d6ee53dfdee61b0beff66afe3f712456e707a Mon Sep 17 00:00:00 2001 From: Yingying Tang Date: Mon, 12 Jan 2026 19:55:16 +0800 Subject: [PATCH 09/96] wifi: ath12k: Fix scan state stuck in ABORTING after cancel_remain_on_channel Scan finish workqueue was introduced in __ath12k_mac_scan_finish() by [1]. During ath12k_mac_op_cancel_remain_on_channel(), scan state is set to ABORTING and should be reset to IDLE in the queued work. However, wiphy_work_cancel() is called before exiting ath12k_mac_op_cancel_remain_on_channel(), which prevents the work from running and leaves the state in ABORTING. This blocks all subsequent scan requests. Replace wiphy_work_cancel() with wiphy_work_flush() to ensure the queued work runs and scan state is reset to IDLE. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 3863f014ad23 ("wifi: ath12k: symmetrize scan vdev creation and deletion during HW scan") # [1] Signed-off-by: Yingying Tang Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20260112115516.2144219-1-yingying.tang@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 088b5ffd0797..ed2ac2fe12f0 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -13344,7 +13344,7 @@ static int ath12k_mac_op_cancel_remain_on_channel(struct ieee80211_hw *hw, ath12k_scan_abort(ar); cancel_delayed_work_sync(&ar->scan.timeout); - wiphy_work_cancel(hw->wiphy, &ar->scan.vdev_clean_wk); + wiphy_work_flush(hw->wiphy, &ar->scan.vdev_clean_wk); return 0; } From f88e9fc30a261d63946ddc6cc6a33405e6aa27c3 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Tue, 13 Jan 2026 09:48:11 +0800 Subject: [PATCH 10/96] wifi: ath12k: fix dead lock while flushing management frames Commit [1] converted the management transmission work item into a wiphy work. Since a wiphy work can only run under wiphy lock protection, a race condition happens in below scenario: 1. a management frame is queued for transmission. 2. ath12k_mac_op_flush() gets called to flush pending frames associated with the hardware (i.e, vif being NULL). Then in ath12k_mac_flush() the process waits for the transmission done. 3. Since wiphy lock has been taken by the flush process, the transmission work item has no chance to run, hence the dead lock. >From user view, this dead lock results in below issue: wlp8s0: authenticate with xxxxxx (local address=xxxxxx) wlp8s0: send auth to xxxxxx (try 1/3) wlp8s0: authenticate with xxxxxx (local address=xxxxxx) wlp8s0: send auth to xxxxxx (try 1/3) wlp8s0: authenticated wlp8s0: associate with xxxxxx (try 1/3) wlp8s0: aborting association with xxxxxx by local choice (Reason: 3=DEAUTH_LEAVING) ath12k_pci 0000:08:00.0: failed to flush mgmt transmit queue, mgmt pkts pending 1 The dead lock can be avoided by invoking wiphy_work_flush() to proactively run the queued work item. Note actually it is already present in ath12k_mac_op_flush(), however it does not protect the case where vif being NULL. Hence move it ahead to cover this case as well. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 56dcbf0b5207 ("wifi: ath12k: convert struct ath12k::wmi_mgmt_tx_work to struct wiphy_work") # [1] Reported-by: Stuart Hayhurst Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220959 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20260113-ath12k-fix-dead-lock-while-flushing-v1-1-9713621f3a0f@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index ed2ac2fe12f0..8476a1ff308d 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -12143,6 +12143,9 @@ static void ath12k_mac_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *v if (drop) return; + for_each_ar(ah, ar, i) + wiphy_work_flush(hw->wiphy, &ar->wmi_mgmt_tx_work); + /* vif can be NULL when flush() is considered for hw */ if (!vif) { for_each_ar(ah, ar, i) @@ -12150,9 +12153,6 @@ static void ath12k_mac_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *v return; } - for_each_ar(ah, ar, i) - wiphy_work_flush(hw->wiphy, &ar->wmi_mgmt_tx_work); - ahvif = ath12k_vif_to_ahvif(vif); links = ahvif->links_map; for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { From 31707572108da55a005e7fed32cc3869c16b7c16 Mon Sep 17 00:00:00 2001 From: Yingying Tang Date: Tue, 13 Jan 2026 13:46:36 +0800 Subject: [PATCH 11/96] wifi: ath12k: Fix wrong P2P device link id issue Wrong P2P device link id value of 0 was introduced in ath12k_mac_op_tx() by [1]. During the P2P negotiation process, there is only one scan vdev with link ID 15. Currently, the device link ID is incorrectly set to 0 in ath12k_mac_op_tx() during the P2P negotiation process, which leads to TX failures. Set the correct P2P device link ID to 15 to fix the TX failure issue. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00302-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.115823.3 Fixes: 648a121bafa3 ("wifi: ath12k: ath12k_mac_op_tx(): MLO support") # [1] Signed-off-by: Yingying Tang Reviewed-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Cc: linux-next@vger.kernel.org Cc: netdev@vger.kernel.org Link: https://patch.msgid.link/20260113054636.2620035-1-yingying.tang@oss.qualcomm.com Signed-off-by: Jeff Johnson --- Note to linux-next and netdev maintainers: This patch going through the "current" tree conflicts with the following going through the "next" tree: commit 631ee338f04d ("Merge branch 'ath12k-ng' into ath-next") The conflict resolution is to leave the following file unmodified: drivers/net/wireless/ath/ath12k/mac. And to apply the following patch to ath12k_wifi7_mac_op_tx() in the file drivers/net/wireless/ath/ath12k/wifi7/hw.c -705,7 +705,10 return; } } else { - link_id = 0; + if (vif->type == NL80211_IFTYPE_P2P_DEVICE) + link_id = ATH12K_FIRST_SCAN_LINK; + else + link_id = 0; } arvif = rcu_dereference(ahvif->link[link_id]); --- drivers/net/wireless/ath/ath12k/mac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 8476a1ff308d..e0e49f782bf8 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -9173,7 +9173,10 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, return; } } else { - link_id = 0; + if (vif->type == NL80211_IFTYPE_P2P_DEVICE) + link_id = ATH12K_FIRST_SCAN_LINK; + else + link_id = 0; } arvif = rcu_dereference(ahvif->link[link_id]); From 8439016c3b8b5ab687c2420317b1691585106611 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 20 Nov 2025 12:20:41 -0800 Subject: [PATCH 12/96] ice: initialize ring_stats->syncp The u64_stats_sync structure is empty on 64-bit systems. However, on 32-bit systems it contains a seqcount_t which needs to be initialized. While the memory is zero-initialized, a lack of u64_stats_init means that lockdep won't get initialized properly. Fix this by adding u64_stats_init() calls to the rings just after allocation. Fixes: 2b245cb29421 ("ice: Implement transmit and NAPI support") Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 15621707fbf8..9ebbe1bff214 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -398,6 +398,8 @@ static int ice_vsi_alloc_ring_stats(struct ice_vsi *vsi) if (!ring_stats) goto err_out; + u64_stats_init(&ring_stats->syncp); + WRITE_ONCE(tx_ring_stats[i], ring_stats); } @@ -417,6 +419,8 @@ static int ice_vsi_alloc_ring_stats(struct ice_vsi *vsi) if (!ring_stats) goto err_out; + u64_stats_init(&ring_stats->syncp); + WRITE_ONCE(rx_ring_stats[i], ring_stats); } From a9d45c22ed120cdd15ff56d0a6e4700c46451901 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Thu, 20 Nov 2025 09:58:26 -0800 Subject: [PATCH 13/96] ice: Avoid detrimental cleanup for bond during interface stop When the user issues an administrative down to an interface that is the primary for an aggregate bond, the prune lists are being purged. This breaks communication to the secondary interface, which shares a prune list on the main switch block while bonded together. For the primary interface of an aggregate, avoid deleting these prune lists during stop, and since they are hardcoded to specific values for the default vlan and QinQ vlans, the attempt to re-add them during the up phase will quietly fail without any additional problem. Fixes: 1e0f9881ef79 ("ice: Flesh out implementation of support for SRIOV on bonded interface") Reviewed-by: Jacob Keller Reviewed-by: Marcin Szycik Signed-off-by: Dave Ertman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 25 ++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 9ebbe1bff214..98010354db15 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3809,22 +3809,31 @@ int ice_vsi_add_vlan_zero(struct ice_vsi *vsi) int ice_vsi_del_vlan_zero(struct ice_vsi *vsi) { struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + struct ice_pf *pf = vsi->back; struct ice_vlan vlan; int err; - vlan = ICE_VLAN(0, 0, 0); - err = vlan_ops->del_vlan(vsi, &vlan); - if (err && err != -EEXIST) - return err; + if (pf->lag && pf->lag->primary) { + dev_dbg(ice_pf_to_dev(pf), "Interface is primary in aggregate - not deleting prune list\n"); + } else { + vlan = ICE_VLAN(0, 0, 0); + err = vlan_ops->del_vlan(vsi, &vlan); + if (err && err != -EEXIST) + return err; + } /* in SVM both VLAN 0 filters are identical */ if (!ice_is_dvm_ena(&vsi->back->hw)) return 0; - vlan = ICE_VLAN(ETH_P_8021Q, 0, 0); - err = vlan_ops->del_vlan(vsi, &vlan); - if (err && err != -EEXIST) - return err; + if (pf->lag && pf->lag->primary) { + dev_dbg(ice_pf_to_dev(pf), "Interface is primary in aggregate - not deleting QinQ prune list\n"); + } else { + vlan = ICE_VLAN(ETH_P_8021Q, 0, 0); + err = vlan_ops->del_vlan(vsi, &vlan); + if (err && err != -EEXIST) + return err; + } /* when deleting the last VLAN filter, make sure to disable the VLAN * promisc mode so the filter isn't left by accident From 01139a2ce532d77379e1593230127caa261a8036 Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Sat, 6 Dec 2025 21:46:09 +0800 Subject: [PATCH 14/96] ice: Fix incorrect timeout ice_release_res() The commit 5f6df173f92e ("ice: implement and use rd32_poll_timeout for ice_sq_done timeout") converted ICE_CTL_Q_SQ_CMD_TIMEOUT from jiffies to microseconds. But the ice_release_res() function was missed, and its logic still treats ICE_CTL_Q_SQ_CMD_TIMEOUT as a jiffies value. So correct the issue by usecs_to_jiffies(). Found by inspection of the DDP downloading process. Compile and modprobe tested only. Fixes: 5f6df173f92e ("ice: implement and use rd32_poll_timeout for ice_sq_done timeout") Signed-off-by: Ding Hui Reviewed-by: Simon Horman Reviewed-by: Aleksandr Loktionov Reviewed-by: Jacob Keller Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 046bc9c65c51..785bf5cc1b25 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2251,7 +2251,7 @@ void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res) /* there are some rare cases when trying to release the resource * results in an admin queue timeout, so handle them correctly */ - timeout = jiffies + 10 * ICE_CTL_Q_SQ_CMD_TIMEOUT; + timeout = jiffies + 10 * usecs_to_jiffies(ICE_CTL_Q_SQ_CMD_TIMEOUT); do { status = ice_aq_release_res(hw, res, 0, NULL); if (status != -EIO) From 41a9a6826f20a524242a6c984845c4855f629841 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Thu, 20 Nov 2025 09:18:29 +0100 Subject: [PATCH 15/96] igc: Restore default Qbv schedule when changing channels The Multi-queue Priority (MQPRIO) and Earliest TxTime First (ETF) offloads utilize the Time Sensitive Networking (TSN) Tx mode. This mode is always coupled to IEEE 802.1Qbv time aware shaper (Qbv). Therefore, the driver sets a default Qbv schedule of all gates opened and a cycle time of 1s. This schedule is set during probe. However, the following sequence of events lead to Tx issues: - Boot a dual core system igc_probe(): igc_tsn_clear_schedule(): -> Default Schedule is set Note: At this point the driver has allocated two Tx/Rx queues, because there are only two CPUs. - ethtool -L enp3s0 combined 4 igc_ethtool_set_channels(): igc_reinit_queues() -> Default schedule is gone, per Tx ring start and end time are zero - tc qdisc replace dev enp3s0 handle 100 parent root mqprio \ num_tc 4 map 3 3 2 2 0 1 1 1 3 3 3 3 3 3 3 3 \ queues 1@0 1@1 1@2 1@3 hw 1 igc_tsn_offload_apply(): igc_tsn_enable_offload(): -> Writes zeros to IGC_STQT(i) and IGC_ENDQT(i), causing Tx to stall/fail Therefore, restore the default Qbv schedule after changing the number of channels. Furthermore, add a restriction to not allow queue reconfiguration when TSN/Qbv is enabled, because it may lead to inconsistent states. Fixes: c814a2d2d48f ("igc: Use default cycle 'start' and 'end' values for queues") Signed-off-by: Kurt Kanzenbach Reviewed-by: Aleksandr Loktionov Tested-by: Avigail Dahan Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 4 ++-- drivers/net/ethernet/intel/igc/igc_main.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index e94c1922b97a..3172cdbca9cc 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1565,8 +1565,8 @@ static int igc_ethtool_set_channels(struct net_device *netdev, if (ch->other_count != NON_Q_VECTORS) return -EINVAL; - /* Do not allow channel reconfiguration when mqprio is enabled */ - if (adapter->strict_priority_enable) + /* Do not allow channel reconfiguration when any TSN qdisc is enabled */ + if (adapter->flags & IGC_FLAG_TSN_ANY_ENABLED) return -EINVAL; /* Verify the number of channels doesn't exceed hw limits */ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 7aafa60ba0c8..89a321a344d2 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7759,6 +7759,11 @@ int igc_reinit_queues(struct igc_adapter *adapter) if (netif_running(netdev)) err = igc_open(netdev); + if (!err) { + /* Restore default IEEE 802.1Qbv schedule after queue reinit */ + igc_tsn_clear_schedule(adapter); + } + return err; } From 6990dc392a9ab10e52af37e0bee8c7b753756dc4 Mon Sep 17 00:00:00 2001 From: Chwee-Lin Choong Date: Fri, 28 Nov 2025 18:53:04 +0800 Subject: [PATCH 16/96] igc: fix race condition in TX timestamp read for register 0 The current HW bug workaround checks the TXTT_0 ready bit first, then reads TXSTMPL_0 twice (before and after reading TXSTMPH_0) to detect whether a new timestamp was captured by timestamp register 0 during the workaround. This sequence has a race: if a new timestamp is captured after checking the TXTT_0 bit but before the first TXSTMPL_0 read, the detection fails because both the "old" and "new" values come from the same timestamp. Fix by reading TXSTMPL_0 first to establish a baseline, then checking the TXTT_0 bit. This ensures any timestamp captured during the race window will be detected. Old sequence: 1. Check TXTT_0 ready bit 2. Read TXSTMPL_0 (baseline) 3. Read TXSTMPH_0 (interrupt workaround) 4. Read TXSTMPL_0 (detect changes vs baseline) New sequence: 1. Read TXSTMPL_0 (baseline) 2. Check TXTT_0 ready bit 3. Read TXSTMPH_0 (interrupt workaround) 4. Read TXSTMPL_0 (detect changes vs baseline) Fixes: c789ad7cbebc ("igc: Work around HW bug causing missing timestamps") Suggested-by: Avi Shalev Reviewed-by: Aleksandr Loktionov Co-developed-by: Song Yoong Siang Signed-off-by: Song Yoong Siang Signed-off-by: Chwee-Lin Choong Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 43 ++++++++++++++---------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index b7b46d863bee..7aae83c108fd 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -774,36 +774,43 @@ static void igc_ptp_tx_reg_to_stamp(struct igc_adapter *adapter, static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + u32 txstmpl_old; u64 regval; u32 mask; int i; + /* Establish baseline of TXSTMPL_0 before checking TXTT_0. + * This baseline is used to detect if a new timestamp arrives in + * register 0 during the hardware bug workaround below. + */ + txstmpl_old = rd32(IGC_TXSTMPL); + mask = rd32(IGC_TSYNCTXCTL) & IGC_TSYNCTXCTL_TXTT_ANY; if (mask & IGC_TSYNCTXCTL_TXTT_0) { regval = rd32(IGC_TXSTMPL); regval |= (u64)rd32(IGC_TXSTMPH) << 32; } else { - /* There's a bug in the hardware that could cause - * missing interrupts for TX timestamping. The issue - * is that for new interrupts to be triggered, the - * IGC_TXSTMPH_0 register must be read. + /* TXTT_0 not set - register 0 has no new timestamp initially. * - * To avoid discarding a valid timestamp that just - * happened at the "wrong" time, we need to confirm - * that there was no timestamp captured, we do that by - * assuming that no two timestamps in sequence have - * the same nanosecond value. + * Hardware bug: Future timestamp interrupts won't fire unless + * TXSTMPH_0 is read, even if the timestamp was captured in + * registers 1-3. * - * So, we read the "low" register, read the "high" - * register (to latch a new timestamp) and read the - * "low" register again, if "old" and "new" versions - * of the "low" register are different, a valid - * timestamp was captured, we can read the "high" - * register again. + * Workaround: Read TXSTMPH_0 here to enable future interrupts. + * However, this read clears TXTT_0. If a timestamp arrives in + * register 0 after checking TXTT_0 but before this read, it + * would be lost. + * + * To detect this race: We saved a baseline read of TXSTMPL_0 + * before TXTT_0 check. After performing the workaround read of + * TXSTMPH_0, we read TXSTMPL_0 again. Since consecutive + * timestamps never share the same nanosecond value, a change + * between the baseline and new TXSTMPL_0 indicates a timestamp + * arrived during the race window. If so, read the complete + * timestamp. */ - u32 txstmpl_old, txstmpl_new; + u32 txstmpl_new; - txstmpl_old = rd32(IGC_TXSTMPL); rd32(IGC_TXSTMPH); txstmpl_new = rd32(IGC_TXSTMPL); @@ -818,7 +825,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) done: /* Now that the problematic first register was handled, we can - * use retrieve the timestamps from the other registers + * retrieve the timestamps from the other registers * (starting from '1') with less complications. */ for (i = 1; i < IGC_MAX_TX_TSTAMP_REGS; i++) { From 8ad1b6c1e63d25f5465b7a8aa403bdcee84b86f9 Mon Sep 17 00:00:00 2001 From: Chwee-Lin Choong Date: Thu, 4 Dec 2025 20:21:50 +0800 Subject: [PATCH 17/96] igc: Reduce TSN TX packet buffer from 7KB to 5KB per queue The previous 7 KB per queue caused TX unit hangs under heavy timestamping load. Reducing to 5 KB avoids these hangs and matches the TSN recommendation in I225/I226 SW User Manual Section 7.5.4. The 8 KB "freed" by this change is currently unused. This reduction is not expected to impact throughput, as the i226 is PCIe-limited for small TSN packets rather than TX-buffer-limited. Fixes: 0d58cdc902da ("igc: optimize TX packet buffer utilization for TSN mode") Reported-by: Zdenek Bouska Closes: https://lore.kernel.org/netdev/AS1PR10MB5675DBFE7CE5F2A9336ABFA4EBEAA@AS1PR10MB5675.EURPRD10.PROD.OUTLOOK.COM/ Reviewed-by: Paul Menzel Reviewed-by: Simon Horman Reviewed-by: Aleksandr Loktionov Signed-off-by: Chwee-Lin Choong Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 498ba1522ca4..9482ab11f050 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -443,9 +443,10 @@ #define IGC_TXPBSIZE_DEFAULT ( \ IGC_TXPB0SIZE(20) | IGC_TXPB1SIZE(0) | IGC_TXPB2SIZE(0) | \ IGC_TXPB3SIZE(0) | IGC_OS2BMCPBSIZE(4)) +/* TSN value following I225/I226 SW User Manual Section 7.5.4 */ #define IGC_TXPBSIZE_TSN ( \ - IGC_TXPB0SIZE(7) | IGC_TXPB1SIZE(7) | IGC_TXPB2SIZE(7) | \ - IGC_TXPB3SIZE(7) | IGC_OS2BMCPBSIZE(4)) + IGC_TXPB0SIZE(5) | IGC_TXPB1SIZE(5) | IGC_TXPB2SIZE(5) | \ + IGC_TXPB3SIZE(5) | IGC_OS2BMCPBSIZE(4)) #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ From c4efd7a770c54964bf2d8c98b2f4fd10be13dc54 Mon Sep 17 00:00:00 2001 From: Simon Schippers Date: Tue, 13 Jan 2026 08:51:38 +0100 Subject: [PATCH 18/96] usbnet: fix crash due to missing BQL accounting after resume In commit 7ff14c52049e ("usbnet: Add support for Byte Queue Limits (BQL)"), it was missed that usbnet_resume() may enqueue SKBs using __skb_queue_tail() without reporting them to BQL. As a result, the next call to netdev_completed_queue() triggers a BUG_ON() in dql_completed(), since the SKBs queued during resume were never accounted for. This patch fixes the issue by adding a corresponding netdev_sent_queue() call in usbnet_resume() when SKBs are queued after suspend. Because dev->txq.lock is held at this point, no concurrent calls to netdev_sent_queue() from usbnet_start_xmit() can occur. The crash can be reproduced by generating network traffic (e.g. iperf3 -c ... -t 0), suspending the system, and then waking it up (e.g. rtcwake -m mem -s 5). When testing USB2 Android tethering (cdc_ncm), the system crashed within three suspend/resume cycles without this patch. With the patch applied, no crashes were observed after 90 cycles. Testing with an AX88179 USB Ethernet adapter also showed no crashes. Fixes: 7ff14c52049e ("usbnet: Add support for Byte Queue Limits (BQL)") Reported-by: Bard Liao Tested-by: Bard Liao Tested-by: Simon Schippers Signed-off-by: Simon Schippers Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260113075139.6735-1-simon.schippers@tu-dortmund.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 36742e64cff7..35789ff4dd55 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1984,6 +1984,7 @@ int usbnet_resume(struct usb_interface *intf) } else { netif_trans_update(dev->net); __skb_queue_tail(&dev->txq, skb); + netdev_sent_queue(dev->net, skb->len); } } From 0386bd321d0f95d041a7b3d7b07643411b044a96 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 13 Jan 2026 16:08:18 +0100 Subject: [PATCH 19/96] vsock/virtio: Coalesce only linear skb vsock/virtio common tries to coalesce buffers in rx queue: if a linear skb (with a spare tail room) is followed by a small skb (length limited by GOOD_COPY_LEN = 128), an attempt is made to join them. Since the introduction of MSG_ZEROCOPY support, assumption that a small skb will always be linear is incorrect. In the zerocopy case, data is lost and the linear skb is appended with uninitialized kernel memory. Of all 3 supported virtio-based transports, only loopback-transport is affected. G2H virtio-transport rx queue operates on explicitly linear skbs; see virtio_vsock_alloc_linear_skb() in virtio_vsock_rx_fill(). H2G vhost-transport may allocate non-linear skbs, but only for sizes that are not considered for coalescence; see PAGE_ALLOC_COSTLY_ORDER in virtio_vsock_alloc_skb(). Ensure only linear skbs are coalesced. Note that skb_tailroom(last_skb) > 0 guarantees last_skb is linear. Fixes: 581512a6dc93 ("vsock/virtio: MSG_ZEROCOPY flag support") Signed-off-by: Michal Luczaj Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260113-vsock-recv-coalescence-v2-1-552b17837cf4@rbox.co Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index dcc8a1d5851e..26b979ad71f0 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1359,9 +1359,11 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, /* Try to copy small packets into the buffer of last packet queued, * to avoid wasting memory queueing the entire buffer with a small - * payload. + * payload. Skip non-linear (e.g. zerocopy) skbs; these carry payload + * in skb_shinfo. */ - if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) { + if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue) && + !skb_is_nonlinear(skb)) { struct virtio_vsock_hdr *last_hdr; struct sk_buff *last_skb; From a63e5fe0959200afcfefa7640db44c491f102c4c Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 13 Jan 2026 16:08:19 +0100 Subject: [PATCH 20/96] vsock/test: Add test for a linear and non-linear skb getting coalesced Loopback transport can mangle data in rx queue when a linear skb is followed by a small MSG_ZEROCOPY packet. To exercise the logic, send out two packets: a weirdly sized one (to ensure some spare tail room in the skb) and a zerocopy one that's small enough to fit in the spare room of its predecessor. Then, wait for both to land in the rx queue, and check the data received. Faulty packets merger manifests itself by corrupting payload of the later packet. Signed-off-by: Michal Luczaj Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260113-vsock-recv-coalescence-v2-2-552b17837cf4@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 5 ++ tools/testing/vsock/vsock_test_zerocopy.c | 74 +++++++++++++++++++++++ tools/testing/vsock/vsock_test_zerocopy.h | 3 + 3 files changed, 82 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index bbe3723babdc..27e39354499a 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -2403,6 +2403,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_accepted_setsockopt_client, .run_server = test_stream_accepted_setsockopt_server, }, + { + .name = "SOCK_STREAM virtio MSG_ZEROCOPY coalescence corruption", + .run_client = test_stream_msgzcopy_mangle_client, + .run_server = test_stream_msgzcopy_mangle_server, + }, {}, }; diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c index 9d9a6cb9614a..a31ddfc1cd0c 100644 --- a/tools/testing/vsock/vsock_test_zerocopy.c +++ b/tools/testing/vsock/vsock_test_zerocopy.c @@ -9,14 +9,18 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #include #include "control.h" +#include "timeout.h" #include "vsock_test_zerocopy.h" #include "msg_zerocopy_common.h" @@ -356,3 +360,73 @@ void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts) control_expectln("DONE"); close(fd); } + +#define GOOD_COPY_LEN 128 /* net/vmw_vsock/virtio_transport_common.c */ + +void test_stream_msgzcopy_mangle_client(const struct test_opts *opts) +{ + char sbuf1[PAGE_SIZE + 1], sbuf2[GOOD_COPY_LEN]; + unsigned long hash; + struct pollfd fds; + int fd, i; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + + memset(sbuf1, 'x', sizeof(sbuf1)); + send_buf(fd, sbuf1, sizeof(sbuf1), 0, sizeof(sbuf1)); + + for (i = 0; i < sizeof(sbuf2); i++) + sbuf2[i] = rand() & 0xff; + + send_buf(fd, sbuf2, sizeof(sbuf2), MSG_ZEROCOPY, sizeof(sbuf2)); + + hash = hash_djb2(sbuf2, sizeof(sbuf2)); + control_writeulong(hash); + + fds.fd = fd; + fds.events = 0; + + if (poll(&fds, 1, TIMEOUT * MSEC_PER_SEC) != 1 || + !(fds.revents & POLLERR)) { + perror("poll"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +void test_stream_msgzcopy_mangle_server(const struct test_opts *opts) +{ + unsigned long local_hash, remote_hash; + char rbuf[PAGE_SIZE + 1]; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + /* Wait, don't race the (buggy) skbs coalescence. */ + vsock_ioctl_int(fd, SIOCINQ, PAGE_SIZE + 1 + GOOD_COPY_LEN); + + /* Discard the first packet. */ + recv_buf(fd, rbuf, PAGE_SIZE + 1, 0, PAGE_SIZE + 1); + + recv_buf(fd, rbuf, GOOD_COPY_LEN, 0, GOOD_COPY_LEN); + remote_hash = control_readulong(); + local_hash = hash_djb2(rbuf, GOOD_COPY_LEN); + + if (local_hash != remote_hash) { + fprintf(stderr, "Data received corrupted\n"); + exit(EXIT_FAILURE); + } + + close(fd); +} diff --git a/tools/testing/vsock/vsock_test_zerocopy.h b/tools/testing/vsock/vsock_test_zerocopy.h index 3ef2579e024d..d46c91a69f16 100644 --- a/tools/testing/vsock/vsock_test_zerocopy.h +++ b/tools/testing/vsock/vsock_test_zerocopy.h @@ -12,4 +12,7 @@ void test_seqpacket_msgzcopy_server(const struct test_opts *opts); void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts); void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts); +void test_stream_msgzcopy_mangle_client(const struct test_opts *opts); +void test_stream_msgzcopy_mangle_server(const struct test_opts *opts); + #endif /* VSOCK_TEST_ZEROCOPY_H */ From 7d7dbafefbe74f5a25efc4807af093b857a7612e Mon Sep 17 00:00:00 2001 From: Ethan Nelson-Moore Date: Mon, 12 Jan 2026 22:39:24 -0800 Subject: [PATCH 21/96] net: usb: dm9601: remove broken SR9700 support The SR9700 chip sends more than one packet in a USB transaction, like the DM962x chips can optionally do, but the dm9601 driver does not support this mode, and the hardware does not have the DM962x MODE_CTL register to disable it, so this driver drops packets on SR9700 devices. The sr9700 driver correctly handles receiving more than one packet per transaction. While the dm9601 driver could be improved to handle this, the easiest way to fix this issue in the short term is to remove the SR9700 device ID from the dm9601 driver so the sr9700 driver is always used. This device ID should not have been in more than one driver to begin with. The "Fixes" commit was chosen so that the patch is automatically included in all kernels that have the sr9700 driver, even though the issue affects dm9601. Fixes: c9b37458e956 ("USB2NET : SR9700 : One chip USB 1.1 USB2NET SR9700Device Driver Support") Signed-off-by: Ethan Nelson-Moore Acked-by: Peter Korsgaard Link: https://patch.msgid.link/20260113063924.74464-1-enelsonmoore@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/dm9601.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 8b6d6a1b3c2e..2b4716ccf0c5 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -603,10 +603,6 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0fe6, 0x8101), /* DM9601 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, - { - USB_DEVICE(0x0fe6, 0x9700), /* DM9601 USB to Fast Ethernet Adapter */ - .driver_info = (unsigned long)&dm9601_info, - }, { USB_DEVICE(0x0a46, 0x9000), /* DM9000E */ .driver_info = (unsigned long)&dm9601_info, From 220d89df1da6ed95ac74883a72a5fb43abf2a586 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Jan 2026 17:26:21 +0000 Subject: [PATCH 22/96] net: add skb->data_len and (skb>end - skb->tail) to skb_dump() While working on a syzbot report, I found that skb_dump() is lacking two important parts : - skb->data_len. - (skb>end - skb->tail) tailroom is zero if skb is not linear. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20260112172621.4188700-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a56133902c0d..61746c2b95f6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1312,14 +1312,15 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) has_mac = skb_mac_header_was_set(skb); has_trans = skb_transport_header_was_set(skb); - printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n" - "mac=(%d,%d) mac_len=%u net=(%d,%d) trans=%d\n" + printk("%sskb len=%u data_len=%u headroom=%u headlen=%u tailroom=%u\n" + "end-tail=%u mac=(%d,%d) mac_len=%u net=(%d,%d) trans=%d\n" "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n" "csum(0x%x start=%u offset=%u ip_summed=%u complete_sw=%u valid=%u level=%u)\n" "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n" "priority=0x%x mark=0x%x alloc_cpu=%u vlan_all=0x%x\n" "encapsulation=%d inner(proto=0x%04x, mac=%u, net=%u, trans=%u)\n", - level, skb->len, headroom, skb_headlen(skb), tailroom, + level, skb->len, skb->data_len, headroom, skb_headlen(skb), + tailroom, skb->end - skb->tail, has_mac ? skb->mac_header : -1, has_mac ? skb_mac_header_len(skb) : -1, skb->mac_len, From c84fcb79e5dbde0b8d5aeeaf04282d2149aebcf6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Jan 2026 19:12:01 +0000 Subject: [PATCH 23/96] bonding: limit BOND_MODE_8023AD to Ethernet devices BOND_MODE_8023AD makes sense for ARPHRD_ETHER only. syzbot reported: BUG: KASAN: global-out-of-bounds in __hw_addr_create net/core/dev_addr_lists.c:63 [inline] BUG: KASAN: global-out-of-bounds in __hw_addr_add_ex+0x25d/0x760 net/core/dev_addr_lists.c:118 Read of size 16 at addr ffffffff8bf94040 by task syz.1.3580/19497 CPU: 1 UID: 0 PID: 19497 Comm: syz.1.3580 Tainted: G L syzkaller #0 PREEMPT(full) Tainted: [L]=SOFTLOCKUP Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025 Call Trace: dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xca/0x240 mm/kasan/report.c:482 kasan_report+0x118/0x150 mm/kasan/report.c:595 check_region_inline mm/kasan/generic.c:-1 [inline] kasan_check_range+0x2b0/0x2c0 mm/kasan/generic.c:200 __asan_memcpy+0x29/0x70 mm/kasan/shadow.c:105 __hw_addr_create net/core/dev_addr_lists.c:63 [inline] __hw_addr_add_ex+0x25d/0x760 net/core/dev_addr_lists.c:118 __dev_mc_add net/core/dev_addr_lists.c:868 [inline] dev_mc_add+0xa1/0x120 net/core/dev_addr_lists.c:886 bond_enslave+0x2b8b/0x3ac0 drivers/net/bonding/bond_main.c:2180 do_set_master+0x533/0x6d0 net/core/rtnetlink.c:2963 do_setlink+0xcf0/0x41c0 net/core/rtnetlink.c:3165 rtnl_changelink net/core/rtnetlink.c:3776 [inline] __rtnl_newlink net/core/rtnetlink.c:3935 [inline] rtnl_newlink+0x161c/0x1c90 net/core/rtnetlink.c:4072 rtnetlink_rcv_msg+0x7cf/0xb70 net/core/rtnetlink.c:6958 netlink_rcv_skb+0x208/0x470 net/netlink/af_netlink.c:2550 netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline] netlink_unicast+0x82f/0x9e0 net/netlink/af_netlink.c:1344 netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1894 sock_sendmsg_nosec net/socket.c:727 [inline] __sock_sendmsg+0x21c/0x270 net/socket.c:742 ____sys_sendmsg+0x505/0x820 net/socket.c:2592 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2646 __sys_sendmsg+0x164/0x220 net/socket.c:2678 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0x1dc/0x560 arch/x86/entry/syscall_32.c:307 do_fast_syscall_32+0x34/0x80 arch/x86/entry/syscall_32.c:332 entry_SYSENTER_compat_after_hwframe+0x84/0x8e The buggy address belongs to the variable: lacpdu_mcast_addr+0x0/0x40 Fixes: 872254dd6b1f ("net/bonding: Enable bonding to enslave non ARPHRD_ETHER") Reported-by: syzbot+9c081b17773615f24672@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6966946b.a70a0220.245e30.0002.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Andrew Lunn Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20260113191201.3970737-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3d56339a8a10..0aca6c937297 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1862,6 +1862,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, */ if (!bond_has_slaves(bond)) { if (bond_dev->type != slave_dev->type) { + if (slave_dev->type != ARPHRD_ETHER && + BOND_MODE(bond) == BOND_MODE_8023AD) { + SLAVE_NL_ERR(bond_dev, slave_dev, extack, + "8023AD mode requires Ethernet devices"); + return -EINVAL; + } slave_dbg(bond_dev, slave_dev, "change device type from %d to %d\n", bond_dev->type, slave_dev->type); From 4d10edfd1475b69dbd4c47f34b61a3772ece83ca Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 13 Jan 2026 18:54:44 +0000 Subject: [PATCH 24/96] l2tp: Fix memleak in l2tp_udp_encap_recv(). syzbot reported memleak of struct l2tp_session, l2tp_tunnel, sock, etc. [0] The cited commit moved down the validation of the protocol version in l2tp_udp_encap_recv(). The new place requires an extra error handling to avoid the memleak. Let's call l2tp_session_put() there. [0]: BUG: memory leak unreferenced object 0xffff88810a290200 (size 512): comm "syz.0.17", pid 6086, jiffies 4294944299 hex dump (first 32 bytes): 7d eb 04 0c 00 00 00 00 01 00 00 00 00 00 00 00 }............... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc babb6a4f): kmemleak_alloc_recursive include/linux/kmemleak.h:44 [inline] slab_post_alloc_hook mm/slub.c:4958 [inline] slab_alloc_node mm/slub.c:5263 [inline] __do_kmalloc_node mm/slub.c:5656 [inline] __kmalloc_noprof+0x3e0/0x660 mm/slub.c:5669 kmalloc_noprof include/linux/slab.h:961 [inline] kzalloc_noprof include/linux/slab.h:1094 [inline] l2tp_session_create+0x3a/0x3b0 net/l2tp/l2tp_core.c:1778 pppol2tp_connect+0x48b/0x920 net/l2tp/l2tp_ppp.c:755 __sys_connect_file+0x7a/0xb0 net/socket.c:2089 __sys_connect+0xde/0x110 net/socket.c:2108 __do_sys_connect net/socket.c:2114 [inline] __se_sys_connect net/socket.c:2111 [inline] __x64_sys_connect+0x1c/0x30 net/socket.c:2111 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xa4/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 364798056f518 ("l2tp: Support different protocol versions with same IP/port quadruple") Reported-by: syzbot+2c42ea4485b29beb0643@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/696693f2.a70a0220.245e30.0001.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20260113185446.2533333-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 687c1366a4d0..70335667ef03 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1086,8 +1086,10 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) tunnel = session->tunnel; /* Check protocol version */ - if (version != tunnel->version) + if (version != tunnel->version) { + l2tp_session_put(session); goto invalid; + } if (version == L2TP_HDR_VER_3 && l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) { From 4f5f148dd7c0459229d2ab9a769b2e820f9ee6a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20B=2E=20Marli=C3=A8re?= Date: Tue, 13 Jan 2026 12:37:44 -0300 Subject: [PATCH 25/96] selftests: net: fib-onlink-tests: Convert to use namespaces by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the test breaks if the SUT already has a default route configured for IPv6. Fix by avoiding the use of the default namespace. Fixes: 4ed591c8ab44 ("net/ipv6: Allow onlink routes to have a device mismatch if it is the default route") Suggested-by: Fernando Fernandez Mancera Signed-off-by: Ricardo B. Marlière Reviewed-by: Ido Schimmel Reviewed-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20260113-selftests-net-fib-onlink-v2-1-89de2b931389@suse.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/fib-onlink-tests.sh | 71 ++++++++----------- 1 file changed, 30 insertions(+), 41 deletions(-) diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index ec2d6ceb1f08..c01be076b210 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -120,7 +120,7 @@ log_subsection() run_cmd() { - local cmd="$*" + local cmd="$1" local out local rc @@ -145,7 +145,7 @@ get_linklocal() local pfx local addr - addr=$(${pfx} ip -6 -br addr show dev ${dev} | \ + addr=$(${pfx} ${IP} -6 -br addr show dev ${dev} | \ awk '{ for (i = 3; i <= NF; ++i) { if ($i ~ /^fe80/) @@ -173,58 +173,48 @@ setup() set -e - # create namespace - setup_ns PEER_NS + # create namespaces + setup_ns ns1 + IP="ip -netns $ns1" + setup_ns ns2 # add vrf table - ip li add ${VRF} type vrf table ${VRF_TABLE} - ip li set ${VRF} up - ip ro add table ${VRF_TABLE} unreachable default metric 8192 - ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192 + ${IP} li add ${VRF} type vrf table ${VRF_TABLE} + ${IP} li set ${VRF} up + ${IP} ro add table ${VRF_TABLE} unreachable default metric 8192 + ${IP} -6 ro add table ${VRF_TABLE} unreachable default metric 8192 # create test interfaces - ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]} - ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]} - ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]} - ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]} + ${IP} li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]} + ${IP} li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]} + ${IP} li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]} + ${IP} li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]} # enslave vrf interfaces for n in 5 7; do - ip li set ${NETIFS[p${n}]} vrf ${VRF} + ${IP} li set ${NETIFS[p${n}]} vrf ${VRF} done # add addresses for n in 1 3 5 7; do - ip li set ${NETIFS[p${n}]} up - ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} - ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad + ${IP} li set ${NETIFS[p${n}]} up + ${IP} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} + ${IP} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad done # move peer interfaces to namespace and add addresses for n in 2 4 6 8; do - ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up - ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} - ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad + ${IP} li set ${NETIFS[p${n}]} netns ${ns2} up + ip -netns $ns2 addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} + ip -netns $ns2 addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad done - ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64} - ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64} + ${IP} -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64} + ${IP} -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64} set +e } -cleanup() -{ - # make sure we start from a clean slate - cleanup_ns ${PEER_NS} 2>/dev/null - for n in 1 3 5 7; do - ip link del ${NETIFS[p${n}]} 2>/dev/null - done - ip link del ${VRF} 2>/dev/null - ip ro flush table ${VRF_TABLE} - ip -6 ro flush table ${VRF_TABLE} -} - ################################################################################ # IPv4 tests # @@ -241,7 +231,7 @@ run_ip() # dev arg may be empty [ -n "${dev}" ] && dev="dev ${dev}" - run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink + run_cmd "${IP} ro add table ${table} ${prefix}/32 via ${gw} ${dev} onlink" log_test $? ${exp_rc} "${desc}" } @@ -257,8 +247,8 @@ run_ip_mpath() # dev arg may be empty [ -n "${dev}" ] && dev="dev ${dev}" - run_cmd ip ro add table "${table}" "${prefix}"/32 \ - nexthop via ${nh1} nexthop via ${nh2} + run_cmd "${IP} ro add table ${table} ${prefix}/32 \ + nexthop via ${nh1} nexthop via ${nh2}" log_test $? ${exp_rc} "${desc}" } @@ -339,7 +329,7 @@ run_ip6() # dev arg may be empty [ -n "${dev}" ] && dev="dev ${dev}" - run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink + run_cmd "${IP} -6 ro add table ${table} ${prefix}/128 via ${gw} ${dev} onlink" log_test $? ${exp_rc} "${desc}" } @@ -353,8 +343,8 @@ run_ip6_mpath() local exp_rc="$6" local desc="$7" - run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \ - nexthop via ${nh1} nexthop via ${nh2} + run_cmd "${IP} -6 ro add table ${table} ${prefix}/128 ${opts} \ + nexthop via ${nh1} nexthop via ${nh2}" log_test $? ${exp_rc} "${desc}" } @@ -491,10 +481,9 @@ do esac done -cleanup setup run_onlink_tests -cleanup +cleanup_ns ${ns1} ${ns2} if [ "$TESTS" != "none" ]; then printf "\nTests passed: %3d\n" ${nsuccess} From a74c7a58ca2ca1cbb93f4c01421cf24b8642b962 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 14 Jan 2026 09:02:46 +0100 Subject: [PATCH 26/96] net: freescale: ucc_geth: Return early when TBI PHY can't be found In ucc_geth's .mac_config(), we configure the TBI Serdes block represented by a struct phy_device that we get from firmware. While porting to phylink, a check was missed to make sure we don't try to access the TBI PHY if we can't get it. Let's add it and return early in case of error Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202601130843.rFGNXA5a-lkp@intel.com/ Fixes: 53036aa8d031 ("net: freescale: ucc_geth: phylink conversion") Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20260114080247.366252-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index affd5a6c44e7..131d1210dc4a 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1602,8 +1602,10 @@ static void ugeth_mac_config(struct phylink_config *config, unsigned int mode, pr_warn("TBI mode requires that the device tree specify a tbi-handle\n"); tbiphy = of_phy_find_device(ug_info->tbi_node); - if (!tbiphy) + if (!tbiphy) { pr_warn("Could not get TBI device\n"); + return; + } value = phy_read(tbiphy, ENET_TBI_MII_CR); value &= ~0x1000; /* Turn off autonegotiation */ From 375629c92fd842bc2a229bb34c4453f62e097169 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 16 Jan 2026 13:13:45 +0100 Subject: [PATCH 27/96] can: dev: alloc_candev_mqs(): add missing default CAN capabilities The idea behind series 6c1f5146b214 ("Merge patch series "can: raw: better approach to instantly reject unsupported CAN frames"") is to set the capabilities of a CAN device (CAN-CC, CAN-FD, CAN-XL, and listen only) [1] and, based on these capabilities, reject unsupported CAN frames in the CAN-RAW protocol [2]. This works perfectly for CAN devices configured in CAN-FD or CAN-XL mode. CAN devices with static CAN control modes define their capabilities via can_set_static_ctrlmode() -> can_set_cap_info(). CAN devices configured by the user space for CAN-FD or CAN-XL set their capabilities via can_changelink() -> can_ctrlmode_changelink() -> can_set_cap_info(). However, in commit 166e87329ce6 ("can: propagate CAN device capabilities via ml_priv"), the capabilities of CAN devices are not initialized. This results in CAN-RAW rejecting all CAN frames on devices directly after ifup if the user space has not changed the CAN control mode. Fix this problem by setting the default capabilities to CAN-CC in alloc_candev_mqs() as soon as the CAN specific ml_priv is allocated. [1] commit 166e87329ce6 ("can: propagate CAN device capabilities via ml_priv") [2] commit faba5860fcf9 ("can: raw: instantly reject disabled CAN frames") Fixes: 166e87329ce6 ("can: propagate CAN device capabilities via ml_priv") Acked-by: Oliver Hartkopp Tested-by: Oliver Hartkopp Link: https://patch.msgid.link/20260116-can_add_missing_set_caps-v1-1-7525126d8b20@pengutronix.de [mkl: fix typo in subject] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 7ab9578f5b89..769745e22a3c 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -332,6 +332,7 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, can_ml = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN); can_set_ml_priv(dev, can_ml); + can_set_cap(dev, CAN_CAP_CC); if (echo_skb_max) { priv->echo_skb_max = echo_skb_max; From 79a6d1bfe1148bc921b8d7f3371a7fbce44e30f7 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 16 Jan 2026 14:10:10 +0100 Subject: [PATCH 28/96] can: gs_usb: gs_usb_receive_bulk_callback(): unanchor URL on usb_submit_urb() error In commit 7352e1d5932a ("can: gs_usb: gs_usb_receive_bulk_callback(): fix URB memory leak"), the URB was re-anchored before usb_submit_urb() in gs_usb_receive_bulk_callback() to prevent a leak of this URB during cleanup. However, this patch did not take into account that usb_submit_urb() could fail. The URB remains anchored and usb_kill_anchored_urbs(&parent->rx_submitted) in gs_can_close() loops infinitely since the anchor list never becomes empty. To fix the bug, unanchor the URB when an usb_submit_urb() error occurs, also print an info message. Fixes: 7352e1d5932a ("can: gs_usb: gs_usb_receive_bulk_callback(): fix URB memory leak") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/all/20260110223836.3890248-1-kuba@kernel.org/ Link: https://patch.msgid.link/20260116-can_usb-fix-reanchor-v1-1-9d74e7289225@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index d093babbc320..192338b481f2 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -754,6 +754,10 @@ resubmit_urb: usb_anchor_urb(urb, &parent->rx_submitted); rc = usb_submit_urb(urb, GFP_ATOMIC); + if (!rc) + return; + + usb_unanchor_urb(urb); /* USB failure take down all interfaces */ if (rc == -ENODEV) { @@ -762,6 +766,9 @@ device_detach: if (parent->canch[rc]) netif_device_detach(parent->canch[rc]->netdev); } + } else if (rc != -ESHUTDOWN && net_ratelimit()) { + netdev_info(netdev, "failed to re-submit IN URB: %pe\n", + ERR_PTR(urb->status)); } } From 0ce73a0eb5a27070957b67fd74059b6da89cc516 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 10 Jan 2026 12:52:27 +0100 Subject: [PATCH 29/96] can: ems_usb: ems_usb_read_bulk_callback(): fix URB memory leak Fix similar memory leak as in commit 7352e1d5932a ("can: gs_usb: gs_usb_receive_bulk_callback(): fix URB memory leak"). In ems_usb_open(), the URBs for USB-in transfers are allocated, added to the dev->rx_submitted anchor and submitted. In the complete callback ems_usb_read_bulk_callback(), the URBs are processed and resubmitted. In ems_usb_close() the URBs are freed by calling usb_kill_anchored_urbs(&dev->rx_submitted). However, this does not take into account that the USB framework unanchors the URB before the complete function is called. This means that once an in-URB has been completed, it is no longer anchored and is ultimately not released in ems_usb_close(). Fix the memory leak by anchoring the URB in the ems_usb_read_bulk_callback() to the dev->rx_submitted anchor. Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260116-can_usb-fix-memory-leak-v2-1-4b8cb2915571@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index de8e212a1366..4c219a5b139b 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -486,11 +486,17 @@ resubmit_urb: urb->transfer_buffer, RX_BUFFER_SIZE, ems_usb_read_bulk_callback, dev); + usb_anchor_urb(urb, &dev->rx_submitted); + retval = usb_submit_urb(urb, GFP_ATOMIC); + if (!retval) + return; + + usb_unanchor_urb(urb); if (retval == -ENODEV) netif_device_detach(netdev); - else if (retval) + else netdev_err(netdev, "failed resubmitting read bulk urb: %d\n", retval); } From 5a4391bdc6c8357242f62f22069c865b792406b3 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 10 Jan 2026 12:52:27 +0100 Subject: [PATCH 30/96] can: esd_usb: esd_usb_read_bulk_callback(): fix URB memory leak Fix similar memory leak as in commit 7352e1d5932a ("can: gs_usb: gs_usb_receive_bulk_callback(): fix URB memory leak"). In esd_usb_open(), the URBs for USB-in transfers are allocated, added to the dev->rx_submitted anchor and submitted. In the complete callback esd_usb_read_bulk_callback(), the URBs are processed and resubmitted. In esd_usb_close() the URBs are freed by calling usb_kill_anchored_urbs(&dev->rx_submitted). However, this does not take into account that the USB framework unanchors the URB before the complete function is called. This means that once an in-URB has been completed, it is no longer anchored and is ultimately not released in esd_usb_close(). Fix the memory leak by anchoring the URB in the esd_usb_read_bulk_callback() to the dev->rx_submitted anchor. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260116-can_usb-fix-memory-leak-v2-2-4b8cb2915571@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 08da507faef4..8cc924c47042 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c @@ -541,13 +541,20 @@ resubmit_urb: urb->transfer_buffer, ESD_USB_RX_BUFFER_SIZE, esd_usb_read_bulk_callback, dev); + usb_anchor_urb(urb, &dev->rx_submitted); + err = usb_submit_urb(urb, GFP_ATOMIC); + if (!err) + return; + + usb_unanchor_urb(urb); + if (err == -ENODEV) { for (i = 0; i < dev->net_count; i++) { if (dev->nets[i]) netif_device_detach(dev->nets[i]->netdev); } - } else if (err) { + } else { dev_err(dev->udev->dev.parent, "failed resubmitting read bulk urb: %pe\n", ERR_PTR(err)); } From 248e8e1a125fa875158df521b30f2cc7e27eeeaa Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 10 Jan 2026 12:52:27 +0100 Subject: [PATCH 31/96] can: kvaser_usb: kvaser_usb_read_bulk_callback(): fix URB memory leak Fix similar memory leak as in commit 7352e1d5932a ("can: gs_usb: gs_usb_receive_bulk_callback(): fix URB memory leak"). In kvaser_usb_set_{,data_}bittiming() -> kvaser_usb_setup_rx_urbs(), the URBs for USB-in transfers are allocated, added to the dev->rx_submitted anchor and submitted. In the complete callback kvaser_usb_read_bulk_callback(), the URBs are processed and resubmitted. In kvaser_usb_remove_interfaces() the URBs are freed by calling usb_kill_anchored_urbs(&dev->rx_submitted). However, this does not take into account that the USB framework unanchors the URB before the complete function is called. This means that once an in-URB has been completed, it is no longer anchored and is ultimately not released in usb_kill_anchored_urbs(). Fix the memory leak by anchoring the URB in the kvaser_usb_read_bulk_callback() to the dev->rx_submitted anchor. Fixes: 080f40a6fa28 ("can: kvaser_usb: Add support for Kvaser CAN/USB devices") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260116-can_usb-fix-memory-leak-v2-3-4b8cb2915571@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 62701ec34272..d0a2a2a33c1c 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -361,7 +361,14 @@ resubmit_urb: urb->transfer_buffer, KVASER_USB_RX_BUFFER_SIZE, kvaser_usb_read_bulk_callback, dev); + usb_anchor_urb(urb, &dev->rx_submitted); + err = usb_submit_urb(urb, GFP_ATOMIC); + if (!err) + return; + + usb_unanchor_urb(urb); + if (err == -ENODEV) { for (i = 0; i < dev->nchannels; i++) { struct kvaser_usb_net_priv *priv; @@ -372,7 +379,7 @@ resubmit_urb: netif_device_detach(priv->netdev); } - } else if (err) { + } else { dev_err(&dev->intf->dev, "Failed resubmitting read bulk urb: %d\n", err); } From 710a7529fb13c5a470258ff5508ed3c498d54729 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 10 Jan 2026 12:52:27 +0100 Subject: [PATCH 32/96] can: mcba_usb: mcba_usb_read_bulk_callback(): fix URB memory leak Fix similar memory leak as in commit 7352e1d5932a ("can: gs_usb: gs_usb_receive_bulk_callback(): fix URB memory leak"). In mcba_usb_probe() -> mcba_usb_start(), the URBs for USB-in transfers are allocated, added to the priv->rx_submitted anchor and submitted. In the complete callback mcba_usb_read_bulk_callback(), the URBs are processed and resubmitted. In mcba_usb_close() -> mcba_urb_unlink() the URBs are freed by calling usb_kill_anchored_urbs(&priv->rx_submitted). However, this does not take into account that the USB framework unanchors the URB before the complete function is called. This means that once an in-URB has been completed, it is no longer anchored and is ultimately not released in usb_kill_anchored_urbs(). Fix the memory leak by anchoring the URB in the mcba_usb_read_bulk_callback()to the priv->rx_submitted anchor. Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260116-can_usb-fix-memory-leak-v2-4-4b8cb2915571@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 41c0a1c399bf..04170326dc7e 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -608,11 +608,17 @@ resubmit_urb: urb->transfer_buffer, MCBA_USB_RX_BUFF_SIZE, mcba_usb_read_bulk_callback, priv); + usb_anchor_urb(urb, &priv->rx_submitted); + retval = usb_submit_urb(urb, GFP_ATOMIC); + if (!retval) + return; + + usb_unanchor_urb(urb); if (retval == -ENODEV) netif_device_detach(netdev); - else if (retval) + else netdev_err(netdev, "failed resubmitting read bulk urb: %d\n", retval); } From f7a980b3b8f80fe367f679da376cf76e800f9480 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 10 Jan 2026 12:52:27 +0100 Subject: [PATCH 33/96] can: usb_8dev: usb_8dev_read_bulk_callback(): fix URB memory leak Fix similar memory leak as in commit 7352e1d5932a ("can: gs_usb: gs_usb_receive_bulk_callback(): fix URB memory leak"). In usb_8dev_open() -> usb_8dev_start(), the URBs for USB-in transfers are allocated, added to the priv->rx_submitted anchor and submitted. In the complete callback usb_8dev_read_bulk_callback(), the URBs are processed and resubmitted. In usb_8dev_close() -> unlink_all_urbs() the URBs are freed by calling usb_kill_anchored_urbs(&priv->rx_submitted). However, this does not take into account that the USB framework unanchors the URB before the complete function is called. This means that once an in-URB has been completed, it is no longer anchored and is ultimately not released in usb_kill_anchored_urbs(). Fix the memory leak by anchoring the URB in the usb_8dev_read_bulk_callback() to the priv->rx_submitted anchor. Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260116-can_usb-fix-memory-leak-v2-5-4b8cb2915571@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/usb_8dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 7449328f7cd7..3125cf59d002 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -541,11 +541,17 @@ resubmit_urb: urb->transfer_buffer, RX_BUFFER_SIZE, usb_8dev_read_bulk_callback, priv); + usb_anchor_urb(urb, &priv->rx_submitted); + retval = usb_submit_urb(urb, GFP_ATOMIC); + if (!retval) + return; + + usb_unanchor_urb(urb); if (retval == -ENODEV) netif_device_detach(netdev); - else if (retval) + else netdev_err(netdev, "failed resubmitting read bulk urb: %d\n", retval); } From a80c9d945aef55b23b54838334345f20251dad83 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 13 Jan 2026 12:10:26 -0500 Subject: [PATCH 34/96] sctp: move SCTP_CMD_ASSOC_SHKEY right after SCTP_CMD_PEER_INIT A null-ptr-deref was reported in the SCTP transmit path when SCTP-AUTH key initialization fails: ================================================================== KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] CPU: 0 PID: 16 Comm: ksoftirqd/0 Tainted: G W 6.6.0 #2 RIP: 0010:sctp_packet_bundle_auth net/sctp/output.c:264 [inline] RIP: 0010:sctp_packet_append_chunk+0xb36/0x1260 net/sctp/output.c:401 Call Trace: sctp_packet_transmit_chunk+0x31/0x250 net/sctp/output.c:189 sctp_outq_flush_data+0xa29/0x26d0 net/sctp/outqueue.c:1111 sctp_outq_flush+0xc80/0x1240 net/sctp/outqueue.c:1217 sctp_cmd_interpreter.isra.0+0x19a5/0x62c0 net/sctp/sm_sideeffect.c:1787 sctp_side_effects net/sctp/sm_sideeffect.c:1198 [inline] sctp_do_sm+0x1a3/0x670 net/sctp/sm_sideeffect.c:1169 sctp_assoc_bh_rcv+0x33e/0x640 net/sctp/associola.c:1052 sctp_inq_push+0x1dd/0x280 net/sctp/inqueue.c:88 sctp_rcv+0x11ae/0x3100 net/sctp/input.c:243 sctp6_rcv+0x3d/0x60 net/sctp/ipv6.c:1127 The issue is triggered when sctp_auth_asoc_init_active_key() fails in sctp_sf_do_5_1C_ack() while processing an INIT_ACK. In this case, the command sequence is currently: - SCTP_CMD_PEER_INIT - SCTP_CMD_TIMER_STOP (T1_INIT) - SCTP_CMD_TIMER_START (T1_COOKIE) - SCTP_CMD_NEW_STATE (COOKIE_ECHOED) - SCTP_CMD_ASSOC_SHKEY - SCTP_CMD_GEN_COOKIE_ECHO If SCTP_CMD_ASSOC_SHKEY fails, asoc->shkey remains NULL, while asoc->peer.auth_capable and asoc->peer.peer_chunks have already been set by SCTP_CMD_PEER_INIT. This allows a DATA chunk with auth = 1 and shkey = NULL to be queued by sctp_datamsg_from_user(). Since command interpretation stops on failure, no COOKIE_ECHO should been sent via SCTP_CMD_GEN_COOKIE_ECHO. However, the T1_COOKIE timer has already been started, and it may enqueue a COOKIE_ECHO into the outqueue later. As a result, the DATA chunk can be transmitted together with the COOKIE_ECHO in sctp_outq_flush_data(), leading to the observed issue. Similar to the other places where it calls sctp_auth_asoc_init_active_key() right after sctp_process_init(), this patch moves the SCTP_CMD_ASSOC_SHKEY immediately after SCTP_CMD_PEER_INIT, before stopping T1_INIT and starting T1_COOKIE. This ensures that if shared key generation fails, authenticated DATA cannot be sent. It also allows the T1_INIT timer to retransmit INIT, giving the client another chance to process INIT_ACK and retry key setup. Fixes: 730fc3d05cd4 ("[SCTP]: Implete SCTP-AUTH parameter processing") Reported-by: Zhen Chen Tested-by: Zhen Chen Signed-off-by: Xin Long Link: https://patch.msgid.link/44881224b375aa8853f5e19b4055a1a56d895813.1768324226.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3755ba079d07..7b823d759141 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -603,6 +603,11 @@ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net, sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT, SCTP_PEER_INIT(initchunk)); + /* SCTP-AUTH: generate the association shared keys so that + * we can potentially sign the COOKIE-ECHO. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_SHKEY, SCTP_NULL()); + /* Reset init error count upon receipt of INIT-ACK. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL()); @@ -617,11 +622,6 @@ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_COOKIE_ECHOED)); - /* SCTP-AUTH: generate the association shared keys so that - * we can potentially sign the COOKIE-ECHO. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_SHKEY, SCTP_NULL()); - /* 5.1 C) "A" shall then send the State Cookie received in the * INIT ACK chunk in a COOKIE ECHO chunk, ... */ From d998b0e5afffa90d0f03770bad31083767079858 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Wed, 14 Jan 2026 13:31:06 +0100 Subject: [PATCH 35/96] octeontx2: Fix otx2_dma_map_page() error return code 0 is a valid DMA address [1] so using it as the error value can lead to errors. The error value of dma_map_XXX() functions is DMA_MAPPING_ERROR which is ~0. The callers of otx2_dma_map_page() use dma_mapping_error() to test the return value of otx2_dma_map_page(). This means that they would not detect an error in otx2_dma_map_page(). Make otx2_dma_map_page() return the raw value of dma_map_page_attrs(). [1] https://lore.kernel.org/all/f977f68b-cec5-4ab7-b4bd-2cf6aca46267@intel.com Fixes: caa2da34fd25 ("octeontx2-pf: Initialize and config queues") Cc: Signed-off-by: Thomas Fourier Link: https://patch.msgid.link/20260114123107.42387-2-fourier.thomas@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index e616a727a3a9..8cdfc36d79d2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -940,13 +940,8 @@ static inline dma_addr_t otx2_dma_map_page(struct otx2_nic *pfvf, size_t offset, size_t size, enum dma_data_direction dir) { - dma_addr_t iova; - - iova = dma_map_page_attrs(pfvf->dev, page, + return dma_map_page_attrs(pfvf->dev, page, offset, size, dir, DMA_ATTR_SKIP_CPU_SYNC); - if (unlikely(dma_mapping_error(pfvf->dev, iova))) - return (dma_addr_t)NULL; - return iova; } static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf, From c158f985cf6c2c36c99c4f67af2ff3f5ebe09f8f Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 14 Jan 2026 22:00:37 +0530 Subject: [PATCH 36/96] amd-xgbe: avoid misleading per-packet error log On the receive path, packet can be damaged because of buffer overflow in Rx FIFO. Avoid misleading per-packet error log when packet->errors is set, this can flood the log. Instead, rely on the standard rtnl_link_stats64 stats. Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver") Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20260114163037.2062606-1-Raju.Rangoju@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 3ddd896d6987..b5a60a048896 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1837,7 +1837,7 @@ static void xgbe_get_stats64(struct net_device *netdev, s->multicast = pstats->rxmulticastframes_g; s->rx_length_errors = pstats->rxlengtherror; s->rx_crc_errors = pstats->rxcrcerror; - s->rx_fifo_errors = pstats->rxfifooverflow; + s->rx_over_errors = pstats->rxfifooverflow; s->tx_packets = pstats->txframecount_gb; s->tx_bytes = pstats->txoctetcount_gb; @@ -2292,9 +2292,6 @@ read_again: goto read_again; if (error || packet->errors) { - if (packet->errors) - netif_err(pdata, rx_err, netdev, - "error in received packet\n"); dev_kfree_skb(skb); goto next_packet; } From ff7737946812eb59faad70d497b803c4f59200b9 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 15 Jan 2026 13:54:00 +0000 Subject: [PATCH 37/96] docs: netdev: refine 15-patch limit The 15 patch limit is intended by the maintainers to cover all outstanding patches on the mailing list on a per-tree basis. Not just those in a single patchset. Document this practice accordingly. Signed-off-by: Simon Horman Link: https://patch.msgid.link/20260115-15-minutes-of-fame-v2-1-70cbf0883aff@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/process/maintainer-netdev.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 989192421cc9..6bce4507d5d3 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -363,6 +363,18 @@ just do it. As a result, a sequence of smaller series gets merged quicker and with better review coverage. Re-posting large series also increases the mailing list traffic. +Limit patches outstanding on mailing list +----------------------------------------- + +Avoid having more than 15 patches, across all series, outstanding for +review on the mailing list for a single tree. In other words, a maximum of +15 patches under review on net, and a maximum of 15 patches under review on +net-next. + +This limit is intended to focus developer effort on testing patches before +upstream review. Aiding the quality of upstream submissions, and easing the +load on reviewers. + .. _rcs: Local variable ordering ("reverse xmas tree", "RCS") From 9a56796ad258786d3624eef5aefba394fc9bdded Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Jan 2026 17:24:46 +0000 Subject: [PATCH 38/96] gue: Fix skb memleak with inner IP protocol 0. syzbot reported skb memleak below. [0] The repro generated a GUE packet with its inner protocol 0. gue_udp_recv() returns -guehdr->proto_ctype for "resubmit" in ip_protocol_deliver_rcu(), but this only works with non-zero protocol number. Let's drop such packets. Note that 0 is a valid number (IPv6 Hop-by-Hop Option). I think it is not practical to encap HOPOPT in GUE, so once someone starts to complain, we could pass down a resubmit flag pointer to distinguish two zeros from the upper layer: * no error * resubmit HOPOPT [0] BUG: memory leak unreferenced object 0xffff888109695a00 (size 240): comm "syz.0.17", pid 6088, jiffies 4294943096 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 40 c2 10 81 88 ff ff 00 00 00 00 00 00 00 00 .@.............. backtrace (crc a84b336f): kmemleak_alloc_recursive include/linux/kmemleak.h:44 [inline] slab_post_alloc_hook mm/slub.c:4958 [inline] slab_alloc_node mm/slub.c:5263 [inline] kmem_cache_alloc_noprof+0x3b4/0x590 mm/slub.c:5270 __build_skb+0x23/0x60 net/core/skbuff.c:474 build_skb+0x20/0x190 net/core/skbuff.c:490 __tun_build_skb drivers/net/tun.c:1541 [inline] tun_build_skb+0x4a1/0xa40 drivers/net/tun.c:1636 tun_get_user+0xc12/0x2030 drivers/net/tun.c:1770 tun_chr_write_iter+0x71/0x120 drivers/net/tun.c:1999 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x45d/0x710 fs/read_write.c:686 ksys_write+0xa7/0x170 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xa4/0xf80 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 37dd0247797b1 ("gue: Receive side for Generic UDP Encapsulation") Reported-by: syzbot+4d8c7d16b0e95c0d0f0d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6965534b.050a0220.38aacd.0001.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260115172533.693652-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/fou_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 3970b6b7ace5..ab8f309f8925 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -215,6 +215,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) return gue_control_message(skb, guehdr); proto_ctype = guehdr->proto_ctype; + if (unlikely(!proto_ctype)) + goto drop; + __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); From 68578370f9b3a2aba5964b273312d51c581b6aad Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Jan 2026 17:24:47 +0000 Subject: [PATCH 39/96] tools: ynl: Specify --no-line-number in ynl-regen.sh. If grep.lineNumber is enabled in .gitconfig, [grep] lineNumber = true ynl-regen.sh fails with the following error: $ ./tools/net/ynl/ynl-regen.sh -f ... ynl_gen_c.py: error: argument --mode: invalid choice: '4:' (choose from user, kernel, uapi) GEN 4: net/ipv4/fou_nl.c Let's specify --no-line-number explicitly. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Suggested-by: Jakub Kicinski Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260115172533.693652-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-regen.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh index 81b4ecd89100..d9809276db98 100755 --- a/tools/net/ynl/ynl-regen.sh +++ b/tools/net/ynl/ynl-regen.sh @@ -21,7 +21,7 @@ files=$(git grep --files-with-matches '^/\* YNL-GEN \(kernel\|uapi\|user\)') for f in $files; do # params: 0 1 2 3 # $YAML YNL-GEN kernel $mode - params=( $(git grep -B1 -h '/\* YNL-GEN' $f | sed 's@/\*\(.*\)\*/@\1@') ) + params=( $(git grep --no-line-number -B1 -h '/\* YNL-GEN' $f | sed 's@/\*\(.*\)\*/@\1@') ) args=$(sed -n 's@/\* YNL-ARG \(.*\) \*/@\1@p' $f) if [ $f -nt ${params[0]} -a -z "$force" ]; then From 7a9bc9e3f42391e4c187e099263cf7a1c4b69ff5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Jan 2026 17:24:48 +0000 Subject: [PATCH 40/96] fou: Don't allow 0 for FOU_ATTR_IPPROTO. fou_udp_recv() has the same problem mentioned in the previous patch. If FOU_ATTR_IPPROTO is set to 0, skb is not freed by fou_udp_recv() nor "resubmit"-ted in ip_protocol_deliver_rcu(). Let's forbid 0 for FOU_ATTR_IPPROTO. Fixes: 23461551c0062 ("fou: Support for foo-over-udp RX path") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260115172533.693652-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/fou.yaml | 2 ++ net/ipv4/fou_nl.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml index 8e7974ec453f..331f1b342b3a 100644 --- a/Documentation/netlink/specs/fou.yaml +++ b/Documentation/netlink/specs/fou.yaml @@ -39,6 +39,8 @@ attribute-sets: - name: ipproto type: u8 + checks: + min: 1 - name: type type: u8 diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 7a99639204b1..309d5ba983d0 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -15,7 +15,7 @@ const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = { [FOU_ATTR_PORT] = { .type = NLA_BE16, }, [FOU_ATTR_AF] = { .type = NLA_U8, }, - [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, + [FOU_ATTR_IPPROTO] = NLA_POLICY_MIN(NLA_U8, 1), [FOU_ATTR_TYPE] = { .type = NLA_U8, }, [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, }, From b47adaab8b3d443868096bac08fdbb3d403194ba Mon Sep 17 00:00:00 2001 From: David Yang Date: Wed, 14 Jan 2026 20:24:45 +0800 Subject: [PATCH 41/96] veth: fix data race in veth_get_ethtool_stats In veth_get_ethtool_stats(), some statistics protected by u64_stats_sync, are read and accumulated in ignorance of possible u64_stats_fetch_retry() events. These statistics, peer_tq_xdp_xmit and peer_tq_xdp_xmit_err, are already accumulated by veth_xdp_xmit(). Fix this by reading them into a temporary buffer first. Fixes: 5fe6e56776ba ("veth: rely on peer veth_rq for ndo_xdp_xmit accounting") Signed-off-by: David Yang Link: https://patch.msgid.link/20260114122450.227982-1-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 14e6f2a2fb77..9982412fd7f2 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -228,16 +228,20 @@ static void veth_get_ethtool_stats(struct net_device *dev, const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats; const void *base = (void *)&rq_stats->vs; unsigned int start, tx_idx = idx; + u64 buf[VETH_TQ_STATS_LEN]; size_t offset; - tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; do { start = u64_stats_fetch_begin(&rq_stats->syncp); for (j = 0; j < VETH_TQ_STATS_LEN; j++) { offset = veth_tq_stats_desc[j].offset; - data[tx_idx + j] += *(u64 *)(base + offset); + buf[j] = *(u64 *)(base + offset); } } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); + + tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; + for (j = 0; j < VETH_TQ_STATS_LEN; j++) + data[tx_idx + j] += buf[j]; } pp_idx = idx + dev->real_num_tx_queues * VETH_TQ_STATS_LEN; From a92a6c50e35b75a8021265507f3c2a9084df0b94 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 13 Jan 2026 18:29:57 -0500 Subject: [PATCH 42/96] net: sfp: add potron quirk to the H-COM SPP425H-GAB4 SFP+ Stick This is another one of those XGSPON ONU sticks that's using the X-ONU-SFPP internally, thus it also requires the potron quirk to avoid tx faults. So, add an entry for it in sfp_quirks[]. Cc: stable@vger.kernel.org Signed-off-by: Hamza Mahfooz Link: https://patch.msgid.link/20260113232957.609642-1-someguy@effective-light.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 84bef5099dda..47f095bd91ce 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -519,6 +519,8 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK_F("HALNy", "HL-GSFP", sfp_fixup_halny_gsfp), + SFP_QUIRK_F("H-COM", "SPP425H-GAB4", sfp_fixup_potron), + // HG MXPD-483II-F 2.5G supports 2500Base-X, but incorrectly reports // 2600MBd in their EERPOM SFP_QUIRK_S("HG GENUINE", "MXPD-483II", sfp_quirk_2500basex), From f40ddcc0c0ca1a0122a7f4440b429f97d5832bdf Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 13 Jan 2026 17:24:58 -0300 Subject: [PATCH 43/96] Revert "nfc/nci: Add the inconsistency check between the input data length and count" This reverts commit 068648aab72c9ba7b0597354ef4d81ffaac7b979. NFC packets may have NUL-bytes. Checking for string length is not a correct assumption here. As long as there is a check for the length copied from copy_from_user, all should be fine. The fix only prevented the syzbot reproducer from triggering the bug because the packet is not enqueued anymore and the code that triggers the bug is not exercised. The fix even broke testing/selftests/nci/nci_dev, making all tests there fail. After the revert, 6 out of 8 tests pass. Fixes: 068648aab72c ("nfc/nci: Add the inconsistency check between the input data length and count") Cc: stable@vger.kernel.org Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://patch.msgid.link/20260113202458.449455-1-cascardo@igalia.com Signed-off-by: Jakub Kicinski --- drivers/nfc/virtual_ncidev.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c index 9ef8ef2d4363..b957fce83b7c 100644 --- a/drivers/nfc/virtual_ncidev.c +++ b/drivers/nfc/virtual_ncidev.c @@ -125,10 +125,6 @@ static ssize_t virtual_ncidev_write(struct file *file, kfree_skb(skb); return -EFAULT; } - if (strnlen(skb->data, count) != count) { - kfree_skb(skb); - return -EINVAL; - } nci_recv_frame(vdev->ndev, skb); return count; From 7a29f6bf60f2590fe5e9c4decb451e19afad2bcf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Jan 2026 09:21:39 +0000 Subject: [PATCH 44/96] l2tp: avoid one data-race in l2tp_tunnel_del_work() We should read sk->sk_socket only when dealing with kernel sockets. syzbot reported the following data-race: BUG: KCSAN: data-race in l2tp_tunnel_del_work / sk_common_release write to 0xffff88811c182b20 of 8 bytes by task 5365 on cpu 0: sk_set_socket include/net/sock.h:2092 [inline] sock_orphan include/net/sock.h:2118 [inline] sk_common_release+0xae/0x230 net/core/sock.c:4003 udp_lib_close+0x15/0x20 include/net/udp.h:325 inet_release+0xce/0xf0 net/ipv4/af_inet.c:437 __sock_release net/socket.c:662 [inline] sock_close+0x6b/0x150 net/socket.c:1455 __fput+0x29b/0x650 fs/file_table.c:468 ____fput+0x1c/0x30 fs/file_table.c:496 task_work_run+0x131/0x1a0 kernel/task_work.c:233 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] __exit_to_user_mode_loop kernel/entry/common.c:44 [inline] exit_to_user_mode_loop+0x1fe/0x740 kernel/entry/common.c:75 __exit_to_user_mode_prepare include/linux/irq-entry-common.h:226 [inline] syscall_exit_to_user_mode_prepare include/linux/irq-entry-common.h:256 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:159 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:194 [inline] do_syscall_64+0x1e1/0x2b0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f read to 0xffff88811c182b20 of 8 bytes by task 827 on cpu 1: l2tp_tunnel_del_work+0x2f/0x1a0 net/l2tp/l2tp_core.c:1418 process_one_work kernel/workqueue.c:3257 [inline] process_scheduled_works+0x4ce/0x9d0 kernel/workqueue.c:3340 worker_thread+0x582/0x770 kernel/workqueue.c:3421 kthread+0x489/0x510 kernel/kthread.c:463 ret_from_fork+0x149/0x290 arch/x86/kernel/process.c:158 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246 value changed: 0xffff88811b818000 -> 0x0000000000000000 Fixes: d00fa9adc528 ("l2tp: fix races with tunnel socket close") Reported-by: syzbot+7312e82745f7fa2526db@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6968b029.050a0220.58bed.0016.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: James Chapman Reviewed-by: Guillaume Nault Link: https://patch.msgid.link/20260115092139.3066180-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 70335667ef03..f9b0f666600f 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1416,8 +1416,6 @@ static void l2tp_tunnel_del_work(struct work_struct *work) { struct l2tp_tunnel *tunnel = container_of(work, struct l2tp_tunnel, del_work); - struct sock *sk = tunnel->sock; - struct socket *sock = sk->sk_socket; l2tp_tunnel_closeall(tunnel); @@ -1425,6 +1423,8 @@ static void l2tp_tunnel_del_work(struct work_struct *work) * the sk API to release it here. */ if (tunnel->fd < 0) { + struct socket *sock = tunnel->sock->sk_socket; + if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); From d3ba32162488283c0a4c5bedd8817aec91748802 Mon Sep 17 00:00:00 2001 From: Dmitry Skorodumov Date: Mon, 12 Jan 2026 17:24:06 +0300 Subject: [PATCH 45/96] ipvlan: Make the addrs_lock be per port Make the addrs_lock be per port, not per ipvlan dev. Initial code seems to be written in the assumption, that any address change must occur under RTNL. But it is not so for the case of IPv6. So 1) Introduce per-port addrs_lock. 2) It was needed to fix places where it was forgotten to take lock (ipvlan_open/ipvlan_close) This appears to be a very minor problem though. Since it's highly unlikely that ipvlan_add_addr() will be called on 2 CPU simultaneously. But nevertheless, this could cause: 1) False-negative of ipvlan_addr_busy(): one interface iterated through all port->ipvlans + ipvlan->addrs under some ipvlan spinlock, and another added IP under its own lock. Though this is only possible for IPv6, since looks like only ipvlan_addr6_event() can be called without rtnl_lock. 2) Race since ipvlan_ht_addr_add(port) is called under different ipvlan->addrs_lock locks This should not affect performance, since add/remove IP is a rare situation and spinlock is not taken on fast paths. Fixes: 8230819494b3 ("ipvlan: use per device spinlock to protect addrs list updates") Signed-off-by: Dmitry Skorodumov Reviewed-by: Paolo Abeni Link: https://patch.msgid.link/20260112142417.4039566-2-skorodumov.dmitry@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvlan.h | 2 +- drivers/net/ipvlan/ipvlan_core.c | 16 +++++------ drivers/net/ipvlan/ipvlan_main.c | 49 +++++++++++++++++++------------- 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 50de3ee204db..80f84fc87008 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -69,7 +69,6 @@ struct ipvl_dev { DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); netdev_features_t sfeatures; u32 msg_enable; - spinlock_t addrs_lock; }; struct ipvl_addr { @@ -90,6 +89,7 @@ struct ipvl_port { struct net_device *dev; possible_net_t pnet; struct hlist_head hlhead[IPVLAN_HASH_SIZE]; + spinlock_t addrs_lock; /* guards hash-table and addrs */ struct list_head ipvlans; u16 mode; u16 flags; diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 2efa3ba148aa..bdb3a46b327c 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -107,17 +107,15 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr) struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6) { - struct ipvl_addr *addr, *ret = NULL; + struct ipvl_addr *addr; - rcu_read_lock(); - list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) { - if (addr_equal(is_v6, addr, iaddr)) { - ret = addr; - break; - } + assert_spin_locked(&ipvlan->port->addrs_lock); + + list_for_each_entry(addr, &ipvlan->addrs, anode) { + if (addr_equal(is_v6, addr, iaddr)) + return addr; } - rcu_read_unlock(); - return ret; + return NULL; } bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 660f3db11766..baccdad695fd 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -75,6 +75,7 @@ static int ipvlan_port_create(struct net_device *dev) for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) INIT_HLIST_HEAD(&port->hlhead[idx]); + spin_lock_init(&port->addrs_lock); skb_queue_head_init(&port->backlog); INIT_WORK(&port->wq, ipvlan_process_multicast); ida_init(&port->ida); @@ -181,6 +182,7 @@ static void ipvlan_uninit(struct net_device *dev) static int ipvlan_open(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan->port; struct ipvl_addr *addr; if (ipvlan->port->mode == IPVLAN_MODE_L3 || @@ -189,10 +191,10 @@ static int ipvlan_open(struct net_device *dev) else dev->flags &= ~IFF_NOARP; - rcu_read_lock(); - list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) + spin_lock_bh(&port->addrs_lock); + list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_add(ipvlan, addr); - rcu_read_unlock(); + spin_unlock_bh(&port->addrs_lock); return 0; } @@ -206,10 +208,10 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_unsync(phy_dev, dev); dev_mc_unsync(phy_dev, dev); - rcu_read_lock(); - list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) + spin_lock_bh(&ipvlan->port->addrs_lock); + list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); - rcu_read_unlock(); + spin_unlock_bh(&ipvlan->port->addrs_lock); return 0; } @@ -579,7 +581,6 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params, if (!tb[IFLA_MTU]) ipvlan_adjust_mtu(ipvlan, phy_dev); INIT_LIST_HEAD(&ipvlan->addrs); - spin_lock_init(&ipvlan->addrs_lock); /* TODO Probably put random address here to be presented to the * world but keep using the physical-dev address for the outgoing @@ -657,13 +658,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr, *next; - spin_lock_bh(&ipvlan->addrs_lock); + spin_lock_bh(&ipvlan->port->addrs_lock); list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { ipvlan_ht_addr_del(addr); list_del_rcu(&addr->anode); kfree_rcu(addr, rcu); } - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); ida_free(&ipvlan->port->ida, dev->dev_id); list_del_rcu(&ipvlan->pnode); @@ -817,6 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; + assert_spin_locked(&ipvlan->port->addrs_lock); + addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC); if (!addr) return -ENOMEM; @@ -847,16 +850,16 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; - spin_lock_bh(&ipvlan->addrs_lock); + spin_lock_bh(&ipvlan->port->addrs_lock); addr = ipvlan_find_addr(ipvlan, iaddr, is_v6); if (!addr) { - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); return; } ipvlan_ht_addr_del(addr); list_del_rcu(&addr->anode); - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); kfree_rcu(addr, rcu); } @@ -878,14 +881,14 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) { int ret = -EINVAL; - spin_lock_bh(&ipvlan->addrs_lock); + spin_lock_bh(&ipvlan->port->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv6=%pI6c addr for %s intf\n", ip6_addr, ipvlan->dev->name); else ret = ipvlan_add_addr(ipvlan, ip6_addr, true); - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); return ret; } @@ -924,21 +927,24 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused, struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr; struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); + int ret = NOTIFY_OK; if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: + spin_lock_bh(&ipvlan->port->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) { NL_SET_ERR_MSG(i6vi->extack, "Address already assigned to an ipvlan device"); - return notifier_from_errno(-EADDRINUSE); + ret = notifier_from_errno(-EADDRINUSE); } + spin_unlock_bh(&ipvlan->port->addrs_lock); break; } - return NOTIFY_OK; + return ret; } #endif @@ -946,14 +952,14 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) { int ret = -EINVAL; - spin_lock_bh(&ipvlan->addrs_lock); + spin_lock_bh(&ipvlan->port->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv4=%pI4 on %s intf.\n", ip4_addr, ipvlan->dev->name); else ret = ipvlan_add_addr(ipvlan, ip4_addr, false); - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); return ret; } @@ -995,21 +1001,24 @@ static int ipvlan_addr4_validator_event(struct notifier_block *unused, struct in_validator_info *ivi = (struct in_validator_info *)ptr; struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); + int ret = NOTIFY_OK; if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: + spin_lock_bh(&ipvlan->port->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) { NL_SET_ERR_MSG(ivi->extack, "Address already assigned to an ipvlan device"); - return notifier_from_errno(-EADDRINUSE); + ret = notifier_from_errno(-EADDRINUSE); } + spin_unlock_bh(&ipvlan->port->addrs_lock); break; } - return NOTIFY_OK; + return ret; } static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { From 8becfe16e4a12218c703a98f5bfc15b6f0fbd99c Mon Sep 17 00:00:00 2001 From: Dmitry Skorodumov Date: Mon, 12 Jan 2026 17:24:07 +0300 Subject: [PATCH 46/96] selftests: net: simple selftest for ipvtap This is a simple ipvtap test to test handling IP-address add/remove on ipvlan interface. It creates a veth-interface and then creates several network-namespace with ipvlan0 interface in it linked to veth. Then it starts to add/remove addresses on ipvlan0 interfaces in several threads. At finish, it checks that there is no duplicated addresses. Signed-off-by: Dmitry Skorodumov Link: https://patch.msgid.link/20260112142417.4039566-3-skorodumov.dmitry@huawei.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/config | 2 + tools/testing/selftests/net/ipvtap_test.sh | 168 +++++++++++++++++++++ 3 files changed, 171 insertions(+) create mode 100755 tools/testing/selftests/net/ipvtap_test.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b66ba04f19d9..45c4ea381bc3 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -48,6 +48,7 @@ TEST_PROGS := \ ipv6_flowlabel.sh \ ipv6_force_forwarding.sh \ ipv6_route_update_soft_lockup.sh \ + ipvtap_test.sh \ l2_tos_ttl_inherit.sh \ l2tp.sh \ link_netns.py \ diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 1e1f253118f5..b84362b9b508 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -48,6 +48,7 @@ CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPV6_SIT=y CONFIG_IPV6_VTI=y CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_KALLSYMS=y CONFIG_L2TP=m CONFIG_L2TP_ETH=m @@ -116,6 +117,7 @@ CONFIG_PROC_SYSCTL=y CONFIG_PSAMPLE=m CONFIG_RPS=y CONFIG_SYSFS=y +CONFIG_TAP=m CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_TEST_BPF=m diff --git a/tools/testing/selftests/net/ipvtap_test.sh b/tools/testing/selftests/net/ipvtap_test.sh new file mode 100755 index 000000000000..354ca7ce8584 --- /dev/null +++ b/tools/testing/selftests/net/ipvtap_test.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Simple tests for ipvtap + + +# +# The testing environment looks this way: +# +# |------HNS-------| |------PHY-------| +# | veth<----------------->veth | +# |------|--|------| |----------------| +# | | +# | | |-----TST0-------| +# | |------------|----ipvlan | +# | |----------------| +# | +# | |-----TST1-------| +# |---------------|----ipvlan | +# |----------------| +# + +ALL_TESTS=" + test_ip_set +" + +source lib.sh + +DEBUG=0 + +VETH_HOST=vethtst.h +VETH_PHY=vethtst.p + +NS_COUNT=32 +IP_ITERATIONS=1024 +IPSET_TIMEOUT="60s" + +ns_run() { + ns=$1 + shift + if [[ "$ns" == "global" ]]; then + "$@" >/dev/null + else + ip netns exec "$ns" "$@" >/dev/null + fi +} + +test_ip_setup_env() { + setup_ns NS_PHY + setup_ns HST_NS + + # setup simulated other-host (phy) and host itself + ns_run "$HST_NS" ip link add $VETH_HOST type veth peer name $VETH_PHY \ + netns "$NS_PHY" >/dev/null + ns_run "$HST_NS" ip link set $VETH_HOST up + ns_run "$NS_PHY" ip link set $VETH_PHY up + + for ((i=0; i/dev/null + ip a a "fc00::$v/64" dev ipvlan0 2>/dev/null + v=$(rnd) + ip a d "172.25.0.$v/24" dev ipvlan0 2>/dev/null + ip a d "fc00::$v/64" dev ipvlan0 2>/dev/null + done +} + +test_ip_set() { + RET=0 + + trap test_ip_cleanup_env EXIT + + test_ip_setup_env + + declare -A ns_pids + for ((i=0; i Date: Wed, 14 Jan 2026 22:03:23 +0000 Subject: [PATCH 47/96] rxrpc: Fix recvmsg() unconditional requeue If rxrpc_recvmsg() fails because MSG_DONTWAIT was specified but the call at the front of the recvmsg queue already has its mutex locked, it requeues the call - whether or not the call is already queued. The call may be on the queue because MSG_PEEK was also passed and so the call was not dequeued or because the I/O thread requeued it. The unconditional requeue may then corrupt the recvmsg queue, leading to things like UAFs or refcount underruns. Fix this by only requeuing the call if it isn't already on the queue - and moving it to the front if it is already queued. If we don't queue it, we have to put the ref we obtained by dequeuing it. Also, MSG_PEEK doesn't dequeue the call so shouldn't call rxrpc_notify_socket() for the call if we didn't use up all the data on the queue, so fix that also. Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg") Reported-by: Faith Reported-by: Pumpkin Chang Signed-off-by: David Howells Acked-by: Marc Dionne cc: Nir Ohfeld cc: Willy Tarreau cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/95163.1768428203@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 4 ++++ net/rxrpc/recvmsg.c | 19 +++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index de6f6d25767c..869f97c9bf73 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -322,6 +322,7 @@ EM(rxrpc_call_put_kernel, "PUT kernel ") \ EM(rxrpc_call_put_poke, "PUT poke ") \ EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \ + EM(rxrpc_call_put_recvmsg_peek_nowait, "PUT peek-nwt") \ EM(rxrpc_call_put_release_recvmsg_q, "PUT rls-rcmq") \ EM(rxrpc_call_put_release_sock, "PUT rls-sock") \ EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \ @@ -340,6 +341,9 @@ EM(rxrpc_call_see_input, "SEE input ") \ EM(rxrpc_call_see_notify_released, "SEE nfy-rlsd") \ EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \ + EM(rxrpc_call_see_recvmsg_requeue, "SEE recv-rqu") \ + EM(rxrpc_call_see_recvmsg_requeue_first, "SEE recv-rqF") \ + EM(rxrpc_call_see_recvmsg_requeue_move, "SEE recv-rqM") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 7fa7e77f6bb9..e1f7513a46db 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -518,7 +518,8 @@ try_again: if (rxrpc_call_has_failed(call)) goto call_failed; - if (!skb_queue_empty(&call->recvmsg_queue)) + if (!(flags & MSG_PEEK) && + !skb_queue_empty(&call->recvmsg_queue)) rxrpc_notify_socket(call); goto not_yet_complete; @@ -549,11 +550,21 @@ error_unlock_call: error_requeue_call: if (!(flags & MSG_PEEK)) { spin_lock_irq(&rx->recvmsg_lock); - list_add(&call->recvmsg_link, &rx->recvmsg_q); - spin_unlock_irq(&rx->recvmsg_lock); + if (list_empty(&call->recvmsg_link)) { + list_add(&call->recvmsg_link, &rx->recvmsg_q); + rxrpc_see_call(call, rxrpc_call_see_recvmsg_requeue); + spin_unlock_irq(&rx->recvmsg_lock); + } else if (list_is_first(&call->recvmsg_link, &rx->recvmsg_q)) { + spin_unlock_irq(&rx->recvmsg_lock); + rxrpc_put_call(call, rxrpc_call_see_recvmsg_requeue_first); + } else { + list_move(&call->recvmsg_link, &rx->recvmsg_q); + spin_unlock_irq(&rx->recvmsg_lock); + rxrpc_put_call(call, rxrpc_call_see_recvmsg_requeue_move); + } trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0); } else { - rxrpc_put_call(call, rxrpc_call_put_recvmsg); + rxrpc_put_call(call, rxrpc_call_put_recvmsg_peek_nowait); } error_no_call: release_sock(&rx->sk); From ab9b218a1521133a4410722907fa7189566be9bc Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 16 Jan 2026 08:47:12 -0800 Subject: [PATCH 48/96] octeontx2: cn10k: fix RX flowid TCAM mask handling The RX flowid programming initializes the TCAM mask to all ones, but then overwrites it when clearing the MAC DA mask bits. This results in losing the intended initialization and may affect other match fields. Update the code to clear the MAC DA bits using an AND operation, making the handling of mask[0] consistent with mask[1], where the field-specific bits are cleared after initializing the mask to ~0ULL. Fixes: 57d00d4364f3 ("octeontx2-pf: mcs: Match macsec ethertype along with DMAC") Signed-off-by: Alok Tiwari Reviewed-by: Subbaraya Sundeep Link: https://patch.msgid.link/20260116164724.2733511-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 4c7e0f345cb5..060c715ebad0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -328,7 +328,7 @@ static int cn10k_mcs_write_rx_flowid(struct otx2_nic *pfvf, req->data[0] = FIELD_PREP(MCS_TCAM0_MAC_DA_MASK, mac_da); req->mask[0] = ~0ULL; - req->mask[0] = ~MCS_TCAM0_MAC_DA_MASK; + req->mask[0] &= ~MCS_TCAM0_MAC_DA_MASK; req->data[1] = FIELD_PREP(MCS_TCAM1_ETYPE_MASK, ETH_P_MACSEC); req->mask[1] = ~0ULL; From 50da4b9d07a7a463e2cfb738f3ad4cff6b2c9c3b Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 14 Jan 2026 11:02:41 -0500 Subject: [PATCH 49/96] net/sched: Enforce that teql can only be used as root qdisc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Design intent of teql is that it is only supposed to be used as root qdisc. We need to check for that constraint. Although not important, I will describe the scenario that unearthed this issue for the curious. GangMin Kim managed to concot a scenario as follows: ROOT qdisc 1:0 (QFQ) ├── class 1:1 (weight=15, lmax=16384) netem with delay 6.4s └── class 1:2 (weight=1, lmax=1514) teql GangMin sends a packet which is enqueued to 1:1 (netem). Any invocation of dequeue by QFQ from this class will not return a packet until after 6.4s. In the meantime, a second packet is sent and it lands on 1:2. teql's enqueue will return success and this will activate class 1:2. Main issue is that teql only updates the parent visible qlen (sch->q.qlen) at dequeue. Since QFQ will only call dequeue if peek succeeds (and teql's peek always returns NULL), dequeue will never be called and thus the qlen will remain as 0. With that in mind, when GangMin updates 1:2's lmax value, the qfq_change_class calls qfq_deact_rm_from_agg. Since the child qdisc's qlen was not incremented, qfq fails to deactivate the class, but still frees its pointers from the aggregate. So when the first packet is rescheduled after 6.4 seconds (netem's delay), a dangling pointer is accessed causing GangMin's causing a UAF. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: GangMin Kim Tested-by: Victor Nogueira Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260114160243.913069-2-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_teql.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 8badec6d82a2..6e4bdaa876ed 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -178,6 +178,11 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt, if (m->dev == dev) return -ELOOP; + if (sch->parent != TC_H_ROOT) { + NL_SET_ERR_MSG_MOD(extack, "teql can only be used as root"); + return -EOPNOTSUPP; + } + q->m = m; skb_queue_head_init(&q->q); From d837fbee92453fbb829f950c8e7cf76207d73f33 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 14 Jan 2026 11:02:42 -0500 Subject: [PATCH 50/96] net/sched: qfq: Use cl_is_active to determine whether class is active in qfq_rm_from_ag This is more of a preventive patch to make the code more consistent and to prevent possible exploits that employ child qlen manipulations on qfq. use cl_is_active instead of relying on the child qdisc's qlen to determine class activation. Fixes: 462dbc9101acd ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost") Signed-off-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260114160243.913069-3-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_qfq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 9d59090bbe93..e7778413e72f 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -373,7 +373,7 @@ static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl) /* Deschedule class and remove it from its parent aggregate. */ static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl) { - if (cl->qdisc->q.qlen > 0) /* class is active */ + if (cl_is_active(cl)) /* class is active */ qfq_deactivate_class(q, cl); qfq_rm_from_agg(q, cl); From 2460f31e6e444a52a4e718e4fe64cff29ffaab05 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Wed, 14 Jan 2026 11:02:43 -0500 Subject: [PATCH 51/96] selftests/tc-testing: Try to add teql as a child qdisc Add a selftest that attempts to add a teql qdisc as a qfq child. Since teql _must_ be added as a root qdisc, the kernel should reject this. Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20260114160243.913069-4-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/qdiscs/teql.json | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json index e5cc31f265f8..0179c57104ad 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json @@ -81,5 +81,30 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP link del dev $DUMMY" ] + }, + { + "id": "124e", + "name": "Try to add teql as a child qdisc", + "category": [ + "qdisc", + "ets", + "tbf" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: qfq", + "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 15 maxpkt 16384" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:1 handle 2:1 teql0", + "expExitCode": "2", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:1", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1:" + ] } ] From 5dc6975566f5d142ec53eb7e97af688c45dd314d Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 20 Jan 2026 14:11:21 +1100 Subject: [PATCH 52/96] wifi: mac80211: don't perform DA check on S1G beacon S1G beacons don't contain the DA field as per IEEE80211-2024 9.3.4.3, so the DA broadcast check reads the SA address of the S1G beacon which will subsequently lead to the beacon being dropped. As a result, passive scanning is not possible. Fix this by only performing the check on non-S1G beacons to allow S1G long beacons to be processed during a passive scan. Fixes: ddf82e752f8a ("wifi: mac80211: Allow beacons to update BSS table regardless of scan") Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20260120031122.309942-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5ef315ed3b0f..4823c8d45639 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -347,8 +347,13 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) mgmt->da)) return; } else { - /* Beacons are expected only with broadcast address */ - if (!is_broadcast_ether_addr(mgmt->da)) + /* + * Non-S1G beacons are expected only with broadcast address. + * S1G beacons only carry the SA so no DA check is required + * nor possible. + */ + if (!ieee80211_is_s1g_beacon(mgmt->frame_control) && + !is_broadcast_ether_addr(mgmt->da)) return; } From 3f3d8ff31496874a69b131866f62474eb24ed20a Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 18 Jan 2026 09:28:29 +0200 Subject: [PATCH 53/96] wifi: mac80211: don't increment crypto_tx_tailroom_needed_cnt twice In reconfig, in case the driver asks to disconnect during the reconfig, all the keys of the interface are marked as tainted. Then ieee80211_reenable_keys will loop over all the interface keys, and for each one it will a) increment crypto_tx_tailroom_needed_cnt b) call ieee80211_key_enable_hw_accel, which in turn will detect that this key is tainted, so it will mark it as "not in hardware", which is paired with crypto_tx_tailroom_needed_cnt incrementation, so we get two incrementations for each tainted key. Then we get a warning in ieee80211_free_keys. To fix it, don't increment the count in ieee80211_reenable_keys for tainted keys Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260118092821.4ca111fddcda.Id6e554f4b1c83760aa02d5a9e4e3080edb197aa2@changeid Signed-off-by: Johannes Berg --- net/mac80211/key.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index d5da7ccea66e..04c8809173d7 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -987,7 +987,8 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) if (ieee80211_sdata_running(sdata)) { list_for_each_entry(key, &sdata->key_list, list) { - increment_tailroom_need_count(sdata); + if (!(key->flags & KEY_FLAG_TAINTED)) + increment_tailroom_need_count(sdata); ieee80211_key_enable_hw_accel(key); } } From 3fa2886d11d4545dc0dcfd0759ffbd03f88b5410 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 18 Jan 2026 09:51:13 +0200 Subject: [PATCH 54/96] wifi: mac80211: parse all TTLM entries For the follow up patch, we need to properly parse TTLM entries that do not have a switch time. Change the logic so that ieee80211_parse_adv_t2l returns usable values in all non-error cases. Before the values filled in were technically incorrect but enough for ieee80211_process_adv_ttlm. Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260118093904.ccd324e2dd59.I69f0bee0a22e9b11bb95beef313e305dab17c051@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ad53dedd929c..3f6bbe4e0175 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7015,10 +7015,6 @@ ieee80211_parse_adv_t2l(struct ieee80211_sub_if_data *sdata, pos = (void *)ttlm->optional; control = ttlm->control; - if ((control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) || - !(control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT)) - return 0; - if ((control & IEEE80211_TTLM_CONTROL_DIRECTION) != IEEE80211_TTLM_DIRECTION_BOTH) { sdata_info(sdata, "Invalid advertised T2L map direction\n"); @@ -7028,21 +7024,28 @@ ieee80211_parse_adv_t2l(struct ieee80211_sub_if_data *sdata, link_map_presence = *pos; pos++; - ttlm_info->switch_time = get_unaligned_le16(pos); + if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) { + ttlm_info->switch_time = get_unaligned_le16(pos); - /* Since ttlm_info->switch_time == 0 means no switch time, bump it - * by 1. - */ - if (!ttlm_info->switch_time) - ttlm_info->switch_time = 1; + /* Since ttlm_info->switch_time == 0 means no switch time, bump + * it by 1. + */ + if (!ttlm_info->switch_time) + ttlm_info->switch_time = 1; - pos += 2; + pos += 2; + } if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) { ttlm_info->duration = pos[0] | pos[1] << 8 | pos[2] << 16; pos += 3; } + if (control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) { + ttlm_info->map = 0xffff; + return 0; + } + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) map_size = 1; else From aebc29dec67aa998a9ea6d34aacba7b5c6a74d33 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 18 Jan 2026 09:51:14 +0200 Subject: [PATCH 55/96] wifi: mac80211: apply advertised TTLM from association response When the AP has a disabled link that the station can include in the association, the fact that the link is dormant needs to be advertised in the TID to Link Mapping (TTLM). Section 35.3.7.2.3 ("Negotiation of TTLM") of Draft P802.11REVmf_D1.0 also states that the mapping needs to be included in the association response frame. As such, we can simply rely on the TTLM from the association response. Before this change mac80211 would not properly track that an advertised TTLM was effectively active, resulting in it not enabling the link once it became available again. For the link reconfiguration case, the data was not used at all. This behaviour is actually correct because Draft P802.11REVmf_D1.0 states in section 35.3.6.4 that we "shall operate with all the TIDs mapped to the newly added links ..." Fixes: 6d543b34dbcf ("wifi: mac80211: Support disabled links during association") Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260118093904.43c861424543.I067f702ac46b84ac3f8b4ea16fb0db9cbbfae7e2@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 - net/mac80211/mlme.c | 216 ++++++++++++++++++++----------------- 2 files changed, 119 insertions(+), 99 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9d9313eee59f..bd573f8e61fb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -451,8 +451,6 @@ struct ieee80211_mgd_assoc_data { struct ieee80211_conn_settings conn; u16 status; - - bool disabled; } link[IEEE80211_MLD_MAX_NUM_LINKS]; u8 ap_addr[ETH_ALEN] __aligned(2); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3f6bbe4e0175..b72345c779c0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6161,6 +6161,98 @@ static bool ieee80211_get_dtim(const struct cfg80211_bss_ies *ies, return true; } +static u16 ieee80211_get_ttlm(u8 bm_size, u8 *data) +{ + if (bm_size == 1) + return *data; + + return get_unaligned_le16(data); +} + +static int +ieee80211_parse_adv_t2l(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_ttlm_elem *ttlm, + struct ieee80211_adv_ttlm_info *ttlm_info) +{ + /* The element size was already validated in + * ieee80211_tid_to_link_map_size_ok() + */ + u8 control, link_map_presence, map_size, tid; + u8 *pos; + + memset(ttlm_info, 0, sizeof(*ttlm_info)); + pos = (void *)ttlm->optional; + control = ttlm->control; + + if ((control & IEEE80211_TTLM_CONTROL_DIRECTION) != + IEEE80211_TTLM_DIRECTION_BOTH) { + sdata_info(sdata, "Invalid advertised T2L map direction\n"); + return -EINVAL; + } + + link_map_presence = *pos; + pos++; + + if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) { + ttlm_info->switch_time = get_unaligned_le16(pos); + + /* Since ttlm_info->switch_time == 0 means no switch time, bump + * it by 1. + */ + if (!ttlm_info->switch_time) + ttlm_info->switch_time = 1; + + pos += 2; + } + + if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) { + ttlm_info->duration = pos[0] | pos[1] << 8 | pos[2] << 16; + pos += 3; + } + + if (control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) { + ttlm_info->map = 0xffff; + return 0; + } + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + map_size = 1; + else + map_size = 2; + + /* According to Draft P802.11be_D3.0 clause 35.3.7.1.7, an AP MLD shall + * not advertise a TID-to-link mapping that does not map all TIDs to the + * same link set, reject frame if not all links have mapping + */ + if (link_map_presence != 0xff) { + sdata_info(sdata, + "Invalid advertised T2L mapping presence indicator\n"); + return -EINVAL; + } + + ttlm_info->map = ieee80211_get_ttlm(map_size, pos); + if (!ttlm_info->map) { + sdata_info(sdata, + "Invalid advertised T2L map for TID 0\n"); + return -EINVAL; + } + + pos += map_size; + + for (tid = 1; tid < 8; tid++) { + u16 map = ieee80211_get_ttlm(map_size, pos); + + if (map != ttlm_info->map) { + sdata_info(sdata, "Invalid advertised T2L map for tid %d\n", + tid); + return -EINVAL; + } + + pos += map_size; + } + return 0; +} + static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, struct ieee802_11_elems *elems, @@ -6192,8 +6284,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, continue; valid_links |= BIT(link_id); - if (assoc_data->link[link_id].disabled) - dormant_links |= BIT(link_id); if (link_id != assoc_data->assoc_link_id) { err = ieee80211_sta_allocate_link(sta, link_id); @@ -6202,6 +6292,33 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, } } + /* + * We do not support setting a negotiated TTLM during + * association. As such, we can assume that if there is a TTLM, + * then it is the currently active advertised TTLM. + * In that case, there must be exactly one TTLM that does not + * have a switch time set. This mapping should also leave us + * with at least one usable link. + */ + if (elems->ttlm_num > 1) { + sdata_info(sdata, + "More than one advertised TTLM in association response\n"); + goto out_err; + } else if (elems->ttlm_num == 1) { + if (ieee80211_parse_adv_t2l(sdata, elems->ttlm[0], + &sdata->u.mgd.ttlm_info) || + sdata->u.mgd.ttlm_info.switch_time != 0 || + !(valid_links & sdata->u.mgd.ttlm_info.map)) { + sdata_info(sdata, + "Invalid advertised TTLM in association response\n"); + goto out_err; + } + + sdata->u.mgd.ttlm_info.active = true; + dormant_links = + valid_links & ~sdata->u.mgd.ttlm_info.map; + } + ieee80211_vif_set_links(sdata, valid_links, dormant_links); } @@ -6992,98 +7109,6 @@ static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy, sdata->u.mgd.ttlm_info.switch_time = 0; } -static u16 ieee80211_get_ttlm(u8 bm_size, u8 *data) -{ - if (bm_size == 1) - return *data; - else - return get_unaligned_le16(data); -} - -static int -ieee80211_parse_adv_t2l(struct ieee80211_sub_if_data *sdata, - const struct ieee80211_ttlm_elem *ttlm, - struct ieee80211_adv_ttlm_info *ttlm_info) -{ - /* The element size was already validated in - * ieee80211_tid_to_link_map_size_ok() - */ - u8 control, link_map_presence, map_size, tid; - u8 *pos; - - memset(ttlm_info, 0, sizeof(*ttlm_info)); - pos = (void *)ttlm->optional; - control = ttlm->control; - - if ((control & IEEE80211_TTLM_CONTROL_DIRECTION) != - IEEE80211_TTLM_DIRECTION_BOTH) { - sdata_info(sdata, "Invalid advertised T2L map direction\n"); - return -EINVAL; - } - - link_map_presence = *pos; - pos++; - - if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) { - ttlm_info->switch_time = get_unaligned_le16(pos); - - /* Since ttlm_info->switch_time == 0 means no switch time, bump - * it by 1. - */ - if (!ttlm_info->switch_time) - ttlm_info->switch_time = 1; - - pos += 2; - } - - if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) { - ttlm_info->duration = pos[0] | pos[1] << 8 | pos[2] << 16; - pos += 3; - } - - if (control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) { - ttlm_info->map = 0xffff; - return 0; - } - - if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) - map_size = 1; - else - map_size = 2; - - /* According to Draft P802.11be_D3.0 clause 35.3.7.1.7, an AP MLD shall - * not advertise a TID-to-link mapping that does not map all TIDs to the - * same link set, reject frame if not all links have mapping - */ - if (link_map_presence != 0xff) { - sdata_info(sdata, - "Invalid advertised T2L mapping presence indicator\n"); - return -EINVAL; - } - - ttlm_info->map = ieee80211_get_ttlm(map_size, pos); - if (!ttlm_info->map) { - sdata_info(sdata, - "Invalid advertised T2L map for TID 0\n"); - return -EINVAL; - } - - pos += map_size; - - for (tid = 1; tid < 8; tid++) { - u16 map = ieee80211_get_ttlm(map_size, pos); - - if (map != ttlm_info->map) { - sdata_info(sdata, "Invalid advertised T2L map for tid %d\n", - tid); - return -EINVAL; - } - - pos += map_size; - } - return 0; -} - static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, u64 beacon_ts) @@ -9740,7 +9765,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, req, true, i, &assoc_data->link[i].conn); assoc_data->link[i].bss = link_cbss; - assoc_data->link[i].disabled = req->links[i].disabled; if (!bss->uapsd_supported) uapsd_supported = false; @@ -10722,8 +10746,6 @@ int ieee80211_mgd_assoc_ml_reconf(struct ieee80211_sub_if_data *sdata, &data->link[link_id].conn); data->link[link_id].bss = link_cbss; - data->link[link_id].disabled = - req->add_links[link_id].disabled; data->link[link_id].elems = (u8 *)req->add_links[link_id].elems; data->link[link_id].elems_len = From 50b359896fe55d0443ed550e1fabba71d242031a Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 18 Jan 2026 09:51:15 +0200 Subject: [PATCH 56/96] wifi: cfg80211: ignore link disabled flag from userspace When the AP has an advertised TID to Link Mapping (TTLM) it shall include the element in the association response. As such, when this element is present it needs to be used for the currently dormant links. See Draft P802.11REVmf_D1.0 section 35.3.7.2.3 ("Negotiation of TTLM") for the details. The flag is also not usable in case userspace wants to specify a negotiated TTLM during association. Note that for the link reconfiguration case, mac80211 did not use the information. Draft P802.11REVmf_D1.0 states in section 35.3.6.4 ("Link reconfiguration to the setup links) that we "shall operate with all the TIDs mapped to the newly added links ..." All this means that the flag is not needed. The implementation should parse the information from the association response. Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Reviewed-by: Ilan Peer Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20260118093904.754e057896a5.Ifd06f5ef839a93bfd54d0593dc932870f95f3242@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 --- include/uapi/linux/nl80211.h | 5 +++-- net/wireless/nl80211.c | 10 ---------- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 899f267b7cf9..2900202588a5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3221,8 +3221,6 @@ struct cfg80211_auth_request { * if this is %NULL for a link, that link is not requested * @elems: extra elements for the per-STA profile for this link * @elems_len: length of the elements - * @disabled: If set this link should be included during association etc. but it - * should not be used until enabled by the AP MLD. * @error: per-link error code, must be <= 0. If there is an error, then the * operation as a whole must fail. */ @@ -3230,7 +3228,6 @@ struct cfg80211_assoc_link { struct cfg80211_bss *bss; const u8 *elems; size_t elems_len; - bool disabled; int error; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8134f10e4e6c..8433bac48112 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2880,8 +2880,9 @@ enum nl80211_commands { * index. If the userspace includes more RNR elements than number of * MBSSID elements then these will be added in every EMA beacon. * - * @NL80211_ATTR_MLO_LINK_DISABLED: Flag attribute indicating that the link is - * disabled. + * @NL80211_ATTR_MLO_LINK_DISABLED: Unused. It was used to indicate that a link + * is disabled during association. However, the AP will send the + * information by including a TTLM in the association response. * * @NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA: Include BSS usage data, i.e. * include BSSes that can only be used in restricted scenarios and/or diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c961cd42a832..03efd45c007f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12241,9 +12241,6 @@ static int nl80211_process_links(struct cfg80211_registered_device *rdev, return -EINVAL; } } - - links[link_id].disabled = - nla_get_flag(attrs[NL80211_ATTR_MLO_LINK_DISABLED]); } return 0; @@ -12423,13 +12420,6 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) goto free; } - if (req.links[req.link_id].disabled) { - GENL_SET_ERR_MSG(info, - "cannot have assoc link disabled"); - err = -EINVAL; - goto free; - } - if (info->attrs[NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS]) req.ext_mld_capa_ops = nla_get_u16(info->attrs[NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS]); From 5d54aa40c7b7e9dee5746cca99e9ddbcca13e895 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Fri, 16 Jan 2026 09:52:36 +0100 Subject: [PATCH 57/96] vsock/test: Do not filter kallsyms by symbol type Blamed commit implemented logic to discover available vsock transports by grepping /proc/kallsyms for known symbols. It incorrectly filtered entries by type 'd'. For some kernel configs having CONFIG_VIRTIO_VSOCKETS=m CONFIG_VSOCKETS_LOOPBACK=y kallsyms reports 0000000000000000 d virtio_transport [vmw_vsock_virtio_transport] 0000000000000000 t loopback_transport Overzealous filtering might have affected vsock test suit, resulting in insufficient/misleading testing. Do not filter symbols by type. It never helped much. Fixes: 3070c05b7afd ("vsock/test: Introduce get_transports()") Signed-off-by: Michal Luczaj Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20260116-vsock_test-kallsyms-grep-v1-1-3320bc3346f2@rbox.co Signed-off-by: Paolo Abeni --- tools/testing/vsock/util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index 142c02a6834a..bf633cde82b0 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -25,7 +25,7 @@ enum transport { }; static const char * const transport_ksyms[] = { - #define x(name, symbol) "d " symbol "_transport", + #define x(name, symbol) " " symbol "_transport", KNOWN_TRANSPORTS(x) #undef x }; From b97d5eedf4976cc94321243be83b39efe81a0e15 Mon Sep 17 00:00:00 2001 From: Yun Lu Date: Fri, 16 Jan 2026 17:53:08 +0800 Subject: [PATCH 58/96] netdevsim: fix a race issue related to the operation on bpf_bound_progs list The netdevsim driver lacks a protection mechanism for operations on the bpf_bound_progs list. When the nsim_bpf_create_prog() performs list_add_tail, it is possible that nsim_bpf_destroy_prog() is simultaneously performs list_del. Concurrent operations on the list may lead to list corruption and trigger a kernel crash as follows: [ 417.290971] kernel BUG at lib/list_debug.c:62! [ 417.290983] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 417.290992] CPU: 10 PID: 168 Comm: kworker/10:1 Kdump: loaded Not tainted 6.19.0-rc5 #1 [ 417.291003] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 417.291007] Workqueue: events bpf_prog_free_deferred [ 417.291021] RIP: 0010:__list_del_entry_valid_or_report+0xa7/0xc0 [ 417.291034] Code: a8 ff 0f 0b 48 89 fe 48 89 ca 48 c7 c7 48 a1 eb ae e8 ed fb a8 ff 0f 0b 48 89 fe 48 89 c2 48 c7 c7 80 a1 eb ae e8 d9 fb a8 ff <0f> 0b 48 89 d1 48 c7 c7 d0 a1 eb ae 48 89 f2 48 89 c6 e8 c2 fb a8 [ 417.291040] RSP: 0018:ffffb16a40807df8 EFLAGS: 00010246 [ 417.291046] RAX: 000000000000006d RBX: ffff8e589866f500 RCX: 0000000000000000 [ 417.291051] RDX: 0000000000000000 RSI: ffff8e59f7b23180 RDI: ffff8e59f7b23180 [ 417.291055] RBP: ffffb16a412c9000 R08: 0000000000000000 R09: 0000000000000003 [ 417.291059] R10: ffffb16a40807c80 R11: ffffffffaf9edce8 R12: ffff8e594427ac20 [ 417.291063] R13: ffff8e59f7b44780 R14: ffff8e58800b7a05 R15: 0000000000000000 [ 417.291074] FS: 0000000000000000(0000) GS:ffff8e59f7b00000(0000) knlGS:0000000000000000 [ 417.291079] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 417.291083] CR2: 00007fc4083efe08 CR3: 00000001c3626006 CR4: 0000000000770ee0 [ 417.291088] PKRU: 55555554 [ 417.291091] Call Trace: [ 417.291096] [ 417.291103] nsim_bpf_destroy_prog+0x31/0x80 [netdevsim] [ 417.291154] __bpf_prog_offload_destroy+0x2a/0x80 [ 417.291163] bpf_prog_dev_bound_destroy+0x6f/0xb0 [ 417.291171] bpf_prog_free_deferred+0x18e/0x1a0 [ 417.291178] process_one_work+0x18a/0x3a0 [ 417.291188] worker_thread+0x27b/0x3a0 [ 417.291197] ? __pfx_worker_thread+0x10/0x10 [ 417.291207] kthread+0xe5/0x120 [ 417.291214] ? __pfx_kthread+0x10/0x10 [ 417.291221] ret_from_fork+0x31/0x50 [ 417.291230] ? __pfx_kthread+0x10/0x10 [ 417.291236] ret_from_fork_asm+0x1a/0x30 [ 417.291246] Add a mutex lock, to prevent simultaneous addition and deletion operations on the list. Fixes: 31d3ad832948 ("netdevsim: add bpf offload support") Reported-by: Yinhao Hu Reported-by: Kaiyan Mei Signed-off-by: Yun Lu Link: https://patch.msgid.link/20260116095308.11441-1-luyun_611@163.com Signed-off-by: Paolo Abeni --- drivers/net/netdevsim/bpf.c | 6 ++++++ drivers/net/netdevsim/dev.c | 2 ++ drivers/net/netdevsim/netdevsim.h | 1 + 3 files changed, 9 insertions(+) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 49537d3c4120..5f17f68f3c08 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -244,7 +244,9 @@ static int nsim_bpf_create_prog(struct nsim_dev *nsim_dev, &state->state, &nsim_bpf_string_fops); debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded); + mutex_lock(&nsim_dev->progs_list_lock); list_add_tail(&state->l, &nsim_dev->bpf_bound_progs); + mutex_unlock(&nsim_dev->progs_list_lock); prog->aux->offload->dev_priv = state; @@ -273,12 +275,16 @@ static int nsim_bpf_translate(struct bpf_prog *prog) static void nsim_bpf_destroy_prog(struct bpf_prog *prog) { struct nsim_bpf_bound_prog *state; + struct nsim_dev *nsim_dev; state = prog->aux->offload->dev_priv; + nsim_dev = state->nsim_dev; WARN(state->is_loaded, "offload state destroyed while program still bound"); debugfs_remove_recursive(state->ddir); + mutex_lock(&nsim_dev->progs_list_lock); list_del(&state->l); + mutex_unlock(&nsim_dev->progs_list_lock); kfree(state); } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 2683a989873e..dfd571b22107 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1647,6 +1647,7 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT; nsim_dev->test2 = NSIM_DEV_TEST2_DEFAULT; spin_lock_init(&nsim_dev->fa_cookie_lock); + mutex_init(&nsim_dev->progs_list_lock); dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev); @@ -1785,6 +1786,7 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) devl_unregister(devlink); kfree(nsim_dev->vfconfigs); kfree(nsim_dev->fa_cookie); + mutex_destroy(&nsim_dev->progs_list_lock); devl_unlock(devlink); devlink_free(devlink); dev_set_drvdata(&nsim_bus_dev->dev, NULL); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index d1a941e2b18f..46c67983c517 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -324,6 +324,7 @@ struct nsim_dev { u32 prog_id_gen; struct list_head bpf_bound_progs; struct list_head bpf_bound_maps; + struct mutex progs_list_lock; struct netdev_phys_item_id switch_id; struct list_head port_list; bool fw_update_status; From f406220eb8e227ca344eef1a6d30aff53706b196 Mon Sep 17 00:00:00 2001 From: Cody Haas Date: Fri, 12 Dec 2025 16:22:26 -0800 Subject: [PATCH 59/96] ice: Fix persistent failure in ice_get_rxfh Several ioctl functions have the ability to call ice_get_rxfh, however all of these ioctl functions do not provide all of the expected information in ethtool_rxfh_param. For example, ethtool_get_rxfh_indir does not provide an rss_key. This previously caused ethtool_get_rxfh_indir to always fail with -EINVAL. This change draws inspiration from i40e_get_rss to handle this situation, by only calling the appropriate rss helpers when the necessary information has been provided via ethtool_rxfh_param. Fixes: b66a972abb6b ("ice: Refactor ice_set/get_rss into LUT and key specific functions") Signed-off-by: Cody Haas Closes: https://lore.kernel.org/intel-wired-lan/CAH7f-UKkJV8MLY7zCdgCrGE55whRhbGAXvgkDnwgiZ9gUZT7_w@mail.gmail.com/ Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 +---- drivers/net/ethernet/intel/ice/ice_main.c | 28 ++++++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 147aaee192a7..00f75d87c73f 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -979,6 +979,7 @@ void ice_map_xdp_rings(struct ice_vsi *vsi); int ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); +int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 969d4f8f9c02..3565a5d96c6d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3626,11 +3626,7 @@ ice_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) if (!lut) return -ENOMEM; - err = ice_get_rss_key(vsi, rxfh->key); - if (err) - goto out; - - err = ice_get_rss_lut(vsi, lut, vsi->rss_table_size); + err = ice_get_rss(vsi, rxfh->key, lut, vsi->rss_table_size); if (err) goto out; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4bb68e7a00f5..6a9278487ccb 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7988,6 +7988,34 @@ int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed) return status; } +/** + * ice_get_rss - Get RSS LUT and/or key + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the key in + * @lut: Buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries + * + * Return: 0 on success, negative on failure + */ +int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + int err; + + if (seed) { + err = ice_get_rss_key(vsi, seed); + if (err) + return err; + } + + if (lut) { + err = ice_get_rss_lut(vsi, lut, lut_size); + if (err) + return err; + } + + return 0; +} + /** * ice_set_rss_hfunc - Set RSS HASH function * @vsi: Pointer to VSI structure From 42fb5f3deb582cb96440e4683745017dbabb83d6 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Thu, 18 Dec 2025 08:36:53 -0500 Subject: [PATCH 60/96] ice: add missing ice_deinit_hw() in devlink reinit path devlink-reload results in ice_init_hw failed error, and then removing the ice driver causes a NULL pointer dereference. [ +0.102213] ice 0000:ca:00.0: ice_init_hw failed: -16 ... [ +0.000001] Call Trace: [ +0.000003] [ +0.000006] ice_unload+0x8f/0x100 [ice] [ +0.000081] ice_remove+0xba/0x300 [ice] Commit 1390b8b3d2be ("ice: remove duplicate call to ice_deinit_hw() on error paths") removed ice_deinit_hw() from ice_deinit_dev(). As a result ice_devlink_reinit_down() no longer calls ice_deinit_hw(), but ice_devlink_reinit_up() still calls ice_init_hw(). Since the control queues are not uninitialized, ice_init_hw() fails with -EBUSY. Add ice_deinit_hw() to ice_devlink_reinit_down() to correspond with ice_init_hw() in ice_devlink_reinit_up(). Fixes: 1390b8b3d2be ("ice: remove duplicate call to ice_deinit_hw() on error paths") Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Signed-off-by: Paul Greenwalt Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/devlink/devlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index d88b7f3fd1f9..2ef39cc70c21 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -460,6 +460,7 @@ static void ice_devlink_reinit_down(struct ice_pf *pf) ice_vsi_decfg(ice_get_main_vsi(pf)); rtnl_unlock(); ice_deinit_pf(pf); + ice_deinit_hw(&pf->hw); ice_deinit_dev(pf); } From d3f867e7a04678640ebcbfb81893c59f4af48586 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Mon, 29 Dec 2025 03:52:34 -0500 Subject: [PATCH 61/96] ice: fix devlink reload call trace Commit 4da71a77fc3b ("ice: read internal temperature sensor") introduced internal temperature sensor reading via HWMON. ice_hwmon_init() was added to ice_init_feature() and ice_hwmon_exit() was added to ice_remove(). As a result if devlink reload is used to reinit the device and then the driver is removed, a call trace can occur. BUG: unable to handle page fault for address: ffffffffc0fd4b5d Call Trace: string+0x48/0xe0 vsnprintf+0x1f9/0x650 sprintf+0x62/0x80 name_show+0x1f/0x30 dev_attr_show+0x19/0x60 The call trace repeats approximately every 10 minutes when system monitoring tools (e.g., sadc) attempt to read the orphaned hwmon sysfs attributes that reference freed module memory. The sequence is: 1. Driver load, ice_hwmon_init() gets called from ice_init_feature() 2. Devlink reload down, flow does not call ice_remove() 3. Devlink reload up, ice_hwmon_init() gets called from ice_init_feature() resulting in a second instance 4. Driver unload, ice_hwmon_exit() called from ice_remove() leaving the first hwmon instance orphaned with dangling pointer Fix this by moving ice_hwmon_exit() from ice_remove() to ice_deinit_features() to ensure proper cleanup symmetry with ice_hwmon_init(). Fixes: 4da71a77fc3b ("ice: read internal temperature sensor") Reviewed-by: Aleksandr Loktionov Signed-off-by: Paul Greenwalt Reviewed-by: Paul Menzel Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6a9278487ccb..de488185cd4a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4836,6 +4836,7 @@ static void ice_deinit_features(struct ice_pf *pf) ice_dpll_deinit(pf); if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) xa_destroy(&pf->eswitch.reprs); + ice_hwmon_exit(pf); } static void ice_init_wakeup(struct ice_pf *pf) @@ -5437,8 +5438,6 @@ static void ice_remove(struct pci_dev *pdev) ice_free_vfs(pf); } - ice_hwmon_exit(pf); - if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); From bdfc7b55adcd04834ccc1b6b13e55e3fd7eaa789 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 11 Dec 2025 10:19:29 +0000 Subject: [PATCH 62/96] idpf: read lower clock bits inside the time sandwich PCIe reads need to be done inside the time sandwich because PCIe writes may get buffered in the PCIe fabric and posted to the device after the _postts completes. Doing the PCIe read inside the time sandwich guarantees that the write gets flushed before the _postts timestamp is taken. Cc: lrizzo@google.com Cc: namangulati@google.com Cc: willemb@google.com Cc: intel-wired-lan@lists.osuosl.org Cc: milena.olech@intel.com Cc: jacob.e.keller@intel.com Fixes: 5cb8805d2366 ("idpf: negotiate PTP capabilities and get PTP clock") Suggested-by: Shachar Raindel Signed-off-by: Mina Almasry Reviewed-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_ptp.c index 3e1052d070cf..0a8b50350b86 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ptp.c @@ -108,11 +108,11 @@ static u64 idpf_ptp_read_src_clk_reg_direct(struct idpf_adapter *adapter, ptp_read_system_prets(sts); idpf_ptp_enable_shtime(adapter); + lo = readl(ptp->dev_clk_regs.dev_clk_ns_l); /* Read the system timestamp post PHC read */ ptp_read_system_postts(sts); - lo = readl(ptp->dev_clk_regs.dev_clk_ns_l); hi = readl(ptp->dev_clk_regs.dev_clk_ns_h); spin_unlock(&ptp->read_dev_clk_lock); From 79912b256e14054e6ba177d7e7e631485ce23dbe Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 19 Jan 2026 00:41:54 +0000 Subject: [PATCH 63/96] net: phy: intel-xway: fix OF node refcount leakage Automated review spotted am OF node reference count leakage when checking if the 'leds' child node exists. Call of_put_node() to correctly maintain the refcount. Link: https://netdev-ai.bots.linux.dev/ai-review.html?id=20f173ba-0c64-422b-a663-fea4b4ad01d0 Fixes: 1758af47b98c1 ("net: phy: intel-xway: add support for PHY LEDs") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/e3275e1c1cdca7e6426bb9c11f33bd84b8d900c8.1768783208.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/intel-xway.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c index 9766dd99afaa..12ff4c1f285d 100644 --- a/drivers/net/phy/intel-xway.c +++ b/drivers/net/phy/intel-xway.c @@ -277,7 +277,7 @@ static int xway_gphy_init_leds(struct phy_device *phydev) static int xway_gphy_config_init(struct phy_device *phydev) { - struct device_node *np = phydev->mdio.dev.of_node; + struct device_node *np; int err; /* Mask all interrupts */ @@ -286,7 +286,10 @@ static int xway_gphy_config_init(struct phy_device *phydev) return err; /* Use default LED configuration if 'leds' node isn't defined */ - if (!of_get_child_by_name(np, "leds")) + np = of_get_child_by_name(phydev->mdio.dev.of_node, "leds"); + if (np) + of_node_put(np); + else xway_gphy_init_leds(phydev); /* Clear all pending interrupts */ From a9f470594c50ab1ddf25b21a00ca4d3166057f3b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 18 Jan 2026 10:48:23 +0100 Subject: [PATCH 64/96] nfc: MAINTAINERS: Orphan the NFC and look for new maintainers NFC stack in Linux is in poor shape, with several bugs being discovered last years via fuzzing, not much new development happening and limited review and testing. It requires some more effort than drive-by reviews I have been offering last one or two years. I don't have much time nor business interests to keep looking at NFC, so let's drop me from the maintainers to clearly indicate that more hands are needed. Acked-by: Mark Greer Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20260118094822.10126-2-krzysztof.kozlowski@oss.qualcomm.com Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 383809bc4b7a..ec8a2acf1947 100644 --- a/CREDITS +++ b/CREDITS @@ -2231,6 +2231,10 @@ S: Markham, Ontario S: L3R 8B2 S: Canada +N: Krzysztof Kozlowski +E: krzk@kernel.org +D: NFC network subsystem and drivers maintainer + N: Christian Krafft D: PowerPC Cell support diff --git a/MAINTAINERS b/MAINTAINERS index 61bc5c566552..69be4c98e43e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18481,9 +18481,8 @@ F: include/uapi/linux/nexthop.h F: net/ipv4/nexthop.c NFC SUBSYSTEM -M: Krzysztof Kozlowski L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/devicetree/bindings/net/nfc/ F: drivers/nfc/ F: include/net/nfc/ From a917cd0a23fae160a85b0e8a0dd1d548c5d5242e Mon Sep 17 00:00:00 2001 From: Michel Lind Date: Fri, 16 Jan 2026 21:21:58 +0000 Subject: [PATCH 65/96] tools/net/ynl: Makefile's install target now installs ynltool This tool is built by default, but was not being installed by default when running `make install`. Fix this by calling ynltool's install target. Signed-off-by: Michel Lind Link: https://patch.msgid.link/aWqr9gUT4hWZwwcI@mbp-m3-fedora.vm Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index c2f3e8b3f2ac..9b692f368be7 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -41,7 +41,7 @@ clean distclean: rm -rf pyynl.egg-info rm -rf build -install: libynl.a lib/*.h +install: libynl.a lib/*.h ynltool @echo -e "\tINSTALL libynl.a" @$(INSTALL) -d $(DESTDIR)$(libdir) @$(INSTALL) -m 0644 libynl.a $(DESTDIR)$(libdir)/libynl.a @@ -51,6 +51,7 @@ install: libynl.a lib/*.h @echo -e "\tINSTALL pyynl" @pip install --prefix=$(DESTDIR)$(prefix) . @make -C generated install + @make -C ynltool install run_tests: @$(MAKE) -C tests run_tests From 748a81c8ceda1fdbdcd0af595947422e810442aa Mon Sep 17 00:00:00 2001 From: David Yang Date: Tue, 20 Jan 2026 00:07:37 +0800 Subject: [PATCH 66/96] net: hns3: fix data race in hns3_fetch_stats In hns3_fetch_stats(), ring statistics, protected by u64_stats_sync, are read and accumulated in ignorance of possible u64_stats_fetch_retry() events. These statistics are already accumulated by hns3_ring_stats_update(). Fix this by reading them into a temporary buffer first. Fixes: b20d7fe51e0d ("net: hns3: add some statitics info to tx process") Signed-off-by: David Yang Link: https://patch.msgid.link/20260119160759.1455950-1-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 69 ++++++++++--------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 7a0654e2d3dd..7a9573dcab74 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2529,44 +2529,47 @@ static netdev_features_t hns3_features_check(struct sk_buff *skb, static void hns3_fetch_stats(struct rtnl_link_stats64 *stats, struct hns3_enet_ring *ring, bool is_tx) { + struct ring_stats ring_stats; unsigned int start; do { start = u64_stats_fetch_begin(&ring->syncp); - if (is_tx) { - stats->tx_bytes += ring->stats.tx_bytes; - stats->tx_packets += ring->stats.tx_pkts; - stats->tx_dropped += ring->stats.sw_err_cnt; - stats->tx_dropped += ring->stats.tx_vlan_err; - stats->tx_dropped += ring->stats.tx_l4_proto_err; - stats->tx_dropped += ring->stats.tx_l2l3l4_err; - stats->tx_dropped += ring->stats.tx_tso_err; - stats->tx_dropped += ring->stats.over_max_recursion; - stats->tx_dropped += ring->stats.hw_limitation; - stats->tx_dropped += ring->stats.copy_bits_err; - stats->tx_dropped += ring->stats.skb2sgl_err; - stats->tx_dropped += ring->stats.map_sg_err; - stats->tx_errors += ring->stats.sw_err_cnt; - stats->tx_errors += ring->stats.tx_vlan_err; - stats->tx_errors += ring->stats.tx_l4_proto_err; - stats->tx_errors += ring->stats.tx_l2l3l4_err; - stats->tx_errors += ring->stats.tx_tso_err; - stats->tx_errors += ring->stats.over_max_recursion; - stats->tx_errors += ring->stats.hw_limitation; - stats->tx_errors += ring->stats.copy_bits_err; - stats->tx_errors += ring->stats.skb2sgl_err; - stats->tx_errors += ring->stats.map_sg_err; - } else { - stats->rx_bytes += ring->stats.rx_bytes; - stats->rx_packets += ring->stats.rx_pkts; - stats->rx_dropped += ring->stats.l2_err; - stats->rx_errors += ring->stats.l2_err; - stats->rx_errors += ring->stats.l3l4_csum_err; - stats->rx_crc_errors += ring->stats.l2_err; - stats->multicast += ring->stats.rx_multicast; - stats->rx_length_errors += ring->stats.err_pkt_len; - } + ring_stats = ring->stats; } while (u64_stats_fetch_retry(&ring->syncp, start)); + + if (is_tx) { + stats->tx_bytes += ring_stats.tx_bytes; + stats->tx_packets += ring_stats.tx_pkts; + stats->tx_dropped += ring_stats.sw_err_cnt; + stats->tx_dropped += ring_stats.tx_vlan_err; + stats->tx_dropped += ring_stats.tx_l4_proto_err; + stats->tx_dropped += ring_stats.tx_l2l3l4_err; + stats->tx_dropped += ring_stats.tx_tso_err; + stats->tx_dropped += ring_stats.over_max_recursion; + stats->tx_dropped += ring_stats.hw_limitation; + stats->tx_dropped += ring_stats.copy_bits_err; + stats->tx_dropped += ring_stats.skb2sgl_err; + stats->tx_dropped += ring_stats.map_sg_err; + stats->tx_errors += ring_stats.sw_err_cnt; + stats->tx_errors += ring_stats.tx_vlan_err; + stats->tx_errors += ring_stats.tx_l4_proto_err; + stats->tx_errors += ring_stats.tx_l2l3l4_err; + stats->tx_errors += ring_stats.tx_tso_err; + stats->tx_errors += ring_stats.over_max_recursion; + stats->tx_errors += ring_stats.hw_limitation; + stats->tx_errors += ring_stats.copy_bits_err; + stats->tx_errors += ring_stats.skb2sgl_err; + stats->tx_errors += ring_stats.map_sg_err; + } else { + stats->rx_bytes += ring_stats.rx_bytes; + stats->rx_packets += ring_stats.rx_pkts; + stats->rx_dropped += ring_stats.l2_err; + stats->rx_errors += ring_stats.l2_err; + stats->rx_errors += ring_stats.l3l4_csum_err; + stats->rx_crc_errors += ring_stats.l2_err; + stats->multicast += ring_stats.rx_multicast; + stats->rx_length_errors += ring_stats.err_pkt_len; + } } static void hns3_nic_get_stats64(struct net_device *netdev, From 5fbe395cd1fdbc883584e7f38369e4ba5ca778d2 Mon Sep 17 00:00:00 2001 From: David Yang Date: Tue, 20 Jan 2026 00:27:16 +0800 Subject: [PATCH 67/96] idpf: Fix data race in idpf_net_dim In idpf_net_dim(), some statistics protected by u64_stats_sync, are read and accumulated in ignorance of possible u64_stats_fetch_retry() events. The correct way to copy statistics is already illustrated by idpf_add_queue_stats(). Fix this by reading them into temporary variables first. Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") Fixes: 3a8845af66ed ("idpf: add RX splitq napi poll support") Signed-off-by: David Yang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260119162720.1463859-1-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 7f3933ca9edc..f58f616d87fc 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3941,7 +3941,7 @@ static void idpf_update_dim_sample(struct idpf_q_vector *q_vector, static void idpf_net_dim(struct idpf_q_vector *q_vector) { struct dim_sample dim_sample = { }; - u64 packets, bytes; + u64 packets, bytes, pkts, bts; u32 i; if (!IDPF_ITR_IS_DYNAMIC(q_vector->tx_intr_mode)) @@ -3953,9 +3953,12 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector) do { start = u64_stats_fetch_begin(&txq->stats_sync); - packets += u64_stats_read(&txq->q_stats.packets); - bytes += u64_stats_read(&txq->q_stats.bytes); + pkts = u64_stats_read(&txq->q_stats.packets); + bts = u64_stats_read(&txq->q_stats.bytes); } while (u64_stats_fetch_retry(&txq->stats_sync, start)); + + packets += pkts; + bytes += bts; } idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->tx_dim, @@ -3972,9 +3975,12 @@ check_rx_itr: do { start = u64_stats_fetch_begin(&rxq->stats_sync); - packets += u64_stats_read(&rxq->q_stats.packets); - bytes += u64_stats_read(&rxq->q_stats.bytes); + pkts = u64_stats_read(&rxq->q_stats.packets); + bts = u64_stats_read(&rxq->q_stats.bytes); } while (u64_stats_fetch_retry(&rxq->stats_sync, start)); + + packets += pkts; + bytes += bts; } idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->rx_dim, From 302e5b481caa7b3d11ec0e058434c1fc95195e50 Mon Sep 17 00:00:00 2001 From: David Yang Date: Mon, 19 Jan 2026 23:34:36 +0800 Subject: [PATCH 68/96] be2net: fix data race in be_get_new_eqd In be_get_new_eqd(), statistics of pkts, protected by u64_stats_sync, are read and accumulated in ignorance of possible u64_stats_fetch_retry() events. Before the commit in question, these statistics were retrieved one by one directly from queues. Fix this by reading them into temporary variables first. Fixes: 209477704187 ("be2net: set interrupt moderation for Skyhawk-R using EQ-DB") Signed-off-by: David Yang Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20260119153440.1440578-1-mmyangfl@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/emulex/benet/be_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 5bb31c8fab39..995c159003d7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2141,7 +2141,7 @@ static int be_get_new_eqd(struct be_eq_obj *eqo) struct be_aic_obj *aic; struct be_rx_obj *rxo; struct be_tx_obj *txo; - u64 rx_pkts = 0, tx_pkts = 0; + u64 rx_pkts = 0, tx_pkts = 0, pkts; ulong now; u32 pps, delta; int i; @@ -2157,15 +2157,17 @@ static int be_get_new_eqd(struct be_eq_obj *eqo) for_all_rx_queues_on_eq(adapter, eqo, rxo, i) { do { start = u64_stats_fetch_begin(&rxo->stats.sync); - rx_pkts += rxo->stats.rx_pkts; + pkts = rxo->stats.rx_pkts; } while (u64_stats_fetch_retry(&rxo->stats.sync, start)); + rx_pkts += pkts; } for_all_tx_queues_on_eq(adapter, eqo, txo, i) { do { start = u64_stats_fetch_begin(&txo->stats.sync); - tx_pkts += txo->stats.tx_reqs; + pkts = txo->stats.tx_reqs; } while (u64_stats_fetch_retry(&txo->stats.sync, start)); + tx_pkts += pkts; } /* Skip, if wrapped around or first calculation */ From 5228e9faaed4e55f6291e1bd138bffea90252fbd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 19 Jan 2026 14:21:36 +0000 Subject: [PATCH 69/96] net: stmmac: fix resume: calculate tso last_segment Tao Wang reports that sometimes, after resume, stmmac can watchdog: NETDEV WATCHDOG: CPU: x: transmit queue x timed out xx ms When this occurs, the DMA transmit descriptors contain: eth0: 221 [0x0000000876d10dd0]: 0x73660cbe 0x8 0x42 0xb04416a0 eth0: 222 [0x0000000876d10de0]: 0x77731d40 0x8 0x16a0 0x90000000 where descriptor 221 is the TSO header and 222 is the TSO payload. tdes3 for descriptor 221 (0xb04416a0) has both bit 29 (first descriptor) and bit 28 (last descriptor) set, which is incorrect. The following packet also has bit 28 set, but isn't marked as a first descriptor, and this causes the transmit DMA to stall. This occurs because stmmac_tso_allocator() populates the first descriptor, but does not set .last_segment correctly. There are two places where this matters: one is later in stmmac_tso_xmit() where we use it to update the TSO header descriptor. The other is in the ring/chain mode clean_desc3() which is a performance optimisation. Rather than using tx_q->tx_skbuff_dma[].last_segment to determine whether the first descriptor entry is the only segment, calculate the number of descriptor entries used. If there is only one descriptor, then the first is also the last, so mark it as such. Further work will be necessary to either eliminate .last_segment entirely or set it correctly. Code analysis also indicates that a similar issue exists with .is_jumbo. These will be the subject of a future patch. Reported-by: Tao Wang Fixes: c2837423cb54 ("net: stmmac: Rework TX Coalesce logic") Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1vhq8O-00000005N5s-0Ke5@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b3730312aeed..3f42843cd9ed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4359,11 +4359,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int first_entry, tx_packets; struct stmmac_txq_stats *txq_stats; struct stmmac_tx_queue *tx_q; + bool set_ic, is_last_segment; u32 pay_len, mss, queue; int i, first_tx, nfrags; u8 proto_hdr_len, hdr; dma_addr_t des; - bool set_ic; /* Always insert VLAN tag to SKB payload for TSO frames. * @@ -4551,10 +4551,16 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_enable_tx_timestamp(priv, first); } + /* If we only have one entry used, then the first entry is the last + * segment. + */ + is_last_segment = ((tx_q->cur_tx - first_entry) & + (priv->dma_conf.dma_tx_size - 1)) == 1; + /* Complete the first descriptor before granting the DMA */ stmmac_prepare_tso_tx_desc(priv, first, 1, proto_hdr_len, 0, 1, - tx_q->tx_skbuff_dma[first_entry].last_segment, - hdr / 4, (skb->len - proto_hdr_len)); + is_last_segment, hdr / 4, + skb->len - proto_hdr_len); /* If context desc is used to change MSS */ if (mss_desc) { From d57c67c956a1bad15115eba6e59d77a6dfeba01d Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 19 Jan 2026 21:28:39 +0800 Subject: [PATCH 70/96] net: hns3: fix wrong GENMASK() for HCLGE_FD_AD_COUNTER_NUM_M HCLGE_FD_AD_COUNTER_NUM_M should be at GENMASK(19, 13), rather than at GENMASK(20, 13), because bit 20 is HCLGE_FD_AD_NXT_STEP_B. This patch corrects the wrong definition. Fixes: 117328680288 ("net: hns3: Add input key and action config support for flow director") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20260119132840.410513-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 9bb708fa42f2..416e02e7b995 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -731,7 +731,7 @@ struct hclge_fd_tcam_config_3_cmd { #define HCLGE_FD_AD_QID_M GENMASK(11, 2) #define HCLGE_FD_AD_USE_COUNTER_B 12 #define HCLGE_FD_AD_COUNTER_NUM_S 13 -#define HCLGE_FD_AD_COUNTER_NUM_M GENMASK(20, 13) +#define HCLGE_FD_AD_COUNTER_NUM_M GENMASK(19, 13) #define HCLGE_FD_AD_NXT_STEP_B 20 #define HCLGE_FD_AD_NXT_KEY_S 21 #define HCLGE_FD_AD_NXT_KEY_M GENMASK(25, 21) From f87e034d16e43af984380a95c32c25201b7759a7 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 19 Jan 2026 21:28:40 +0800 Subject: [PATCH 71/96] net: hns3: fix the HCLGE_FD_AD_NXT_KEY error setting issue Use next_input_key instead of counter_id to set HCLGE_FD_AD_NXT_KEY. Fixes: 117328680288 ("net: hns3: Add input key and action config support for flow director") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20260119132840.410513-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c589baea7c77..b8e2aa19f9e6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -5690,7 +5690,7 @@ static int hclge_fd_ad_config(struct hclge_dev *hdev, u8 stage, int loc, HCLGE_FD_AD_COUNTER_NUM_S, action->counter_id); hnae3_set_bit(ad_data, HCLGE_FD_AD_NXT_STEP_B, action->use_next_stage); hnae3_set_field(ad_data, HCLGE_FD_AD_NXT_KEY_M, HCLGE_FD_AD_NXT_KEY_S, - action->counter_id); + action->next_input_key); req->ad_data = cpu_to_le64(ad_data); ret = hclge_cmd_send(&hdev->hw, &desc, 1); From 3d778e65b4f44c6af4901d83020bb8a0a010f39e Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Mon, 19 Jan 2026 14:59:35 +0800 Subject: [PATCH 72/96] net: txgbe: remove the redundant data return in SW-FW mailbox For these two firmware mailbox commands, in txgbe_test_hostif() and txgbe_set_phy_link_hostif(), there is no need to read data from the buffer. Under the current setting, OEM firmware will cause the driver to fail to probe. Because OEM firmware returns more link information, with a larger OEM structure txgbe_hic_ephy_getlink. However, the current driver does not support the OEM function. So just fix it in the way that does not involve reading the returned data. Fixes: d84a3ff9aae8 ("net: txgbe: Restrict the use of mismatched FW versions") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/2914AB0BC6158DDA+20260119065935.6015-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c index 62d7f47d4f8d..f0514251d4f3 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c @@ -70,7 +70,7 @@ int txgbe_test_hostif(struct wx *wx) buffer.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED; return wx_host_interface_command(wx, (u32 *)&buffer, sizeof(buffer), - WX_HI_COMMAND_TIMEOUT, true); + WX_HI_COMMAND_TIMEOUT, false); } int txgbe_read_eeprom_hostif(struct wx *wx, @@ -148,7 +148,7 @@ static int txgbe_set_phy_link_hostif(struct wx *wx, int speed, int autoneg, int buffer.duplex = duplex; return wx_host_interface_command(wx, (u32 *)&buffer, sizeof(buffer), - WX_HI_COMMAND_TIMEOUT, true); + WX_HI_COMMAND_TIMEOUT, false); } static void txgbe_get_link_capabilities(struct wx *wx, int *speed, From 8175dbf174d487afab81e936a862a8d9b8a1ccb6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 18 Jan 2026 13:25:28 +0000 Subject: [PATCH 73/96] mISDN: annotate data-race around dev->work dev->work can re read locklessly in mISDN_read() and mISDN_poll(). Add READ_ONCE()/WRITE_ONCE() annotations. BUG: KCSAN: data-race in mISDN_ioctl / mISDN_read write to 0xffff88812d848280 of 4 bytes by task 10864 on cpu 1: misdn_add_timer drivers/isdn/mISDN/timerdev.c:175 [inline] mISDN_ioctl+0x2fb/0x550 drivers/isdn/mISDN/timerdev.c:233 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl+0xce/0x140 fs/ioctl.c:583 __x64_sys_ioctl+0x43/0x50 fs/ioctl.c:583 x64_sys_call+0x14b0/0x3000 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xd8/0x2c0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f read to 0xffff88812d848280 of 4 bytes by task 10857 on cpu 0: mISDN_read+0x1f2/0x470 drivers/isdn/mISDN/timerdev.c:112 do_loop_readv_writev fs/read_write.c:847 [inline] vfs_readv+0x3fb/0x690 fs/read_write.c:1020 do_readv+0xe7/0x210 fs/read_write.c:1080 __do_sys_readv fs/read_write.c:1165 [inline] __se_sys_readv fs/read_write.c:1162 [inline] __x64_sys_readv+0x45/0x50 fs/read_write.c:1162 x64_sys_call+0x2831/0x3000 arch/x86/include/generated/asm/syscalls_64.h:20 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xd8/0x2c0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f value changed: 0x00000000 -> 0x00000001 Fixes: 1b2b03f8e514 ("Add mISDN core files") Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20260118132528.2349573-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/isdn/mISDN/timerdev.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index df98144a9539..33521c328a82 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -109,14 +109,14 @@ mISDN_read(struct file *filep, char __user *buf, size_t count, loff_t *off) spin_unlock_irq(&dev->lock); if (filep->f_flags & O_NONBLOCK) return -EAGAIN; - wait_event_interruptible(dev->wait, (dev->work || + wait_event_interruptible(dev->wait, (READ_ONCE(dev->work) || !list_empty(list))); if (signal_pending(current)) return -ERESTARTSYS; spin_lock_irq(&dev->lock); } if (dev->work) - dev->work = 0; + WRITE_ONCE(dev->work, 0); if (!list_empty(list)) { timer = list_first_entry(list, struct mISDNtimer, list); list_del(&timer->list); @@ -141,13 +141,16 @@ mISDN_poll(struct file *filep, poll_table *wait) if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s(%p, %p)\n", __func__, filep, wait); if (dev) { + u32 work; + poll_wait(filep, &dev->wait, wait); mask = 0; - if (dev->work || !list_empty(&dev->expired)) + work = READ_ONCE(dev->work); + if (work || !list_empty(&dev->expired)) mask |= (EPOLLIN | EPOLLRDNORM); if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s work(%d) empty(%d)\n", __func__, - dev->work, list_empty(&dev->expired)); + work, list_empty(&dev->expired)); } return mask; } @@ -172,7 +175,7 @@ misdn_add_timer(struct mISDNtimerdev *dev, int timeout) struct mISDNtimer *timer; if (!timeout) { - dev->work = 1; + WRITE_ONCE(dev->work, 1); wake_up_interruptible(&dev->wait); id = 0; } else { From 9a063f96d87efc3a6cc667f8de096a3d38d74bb5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 18 Jan 2026 15:29:41 +0000 Subject: [PATCH 74/96] ipv6: annotate data-race in ndisc_router_discovery() syzbot found that ndisc_router_discovery() could read and write in6_dev->ra_mtu without holding a lock [1] This looks fine, IFLA_INET6_RA_MTU is best effort. Add READ_ONCE()/WRITE_ONCE() to document the race. Note that we might also reject illegal MTU values (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) in a future patch. [1] BUG: KCSAN: data-race in ndisc_router_discovery / ndisc_router_discovery read to 0xffff888119809c20 of 4 bytes by task 25817 on cpu 1: ndisc_router_discovery+0x151d/0x1c90 net/ipv6/ndisc.c:1558 ndisc_rcv+0x2ad/0x3d0 net/ipv6/ndisc.c:1841 icmpv6_rcv+0xe5a/0x12f0 net/ipv6/icmp.c:989 ip6_protocol_deliver_rcu+0xb2a/0x10d0 net/ipv6/ip6_input.c:438 ip6_input_finish+0xf0/0x1d0 net/ipv6/ip6_input.c:489 NF_HOOK include/linux/netfilter.h:318 [inline] ip6_input+0x5e/0x140 net/ipv6/ip6_input.c:500 ip6_mc_input+0x27c/0x470 net/ipv6/ip6_input.c:590 dst_input include/net/dst.h:474 [inline] ip6_rcv_finish+0x336/0x340 net/ipv6/ip6_input.c:79 ... write to 0xffff888119809c20 of 4 bytes by task 25816 on cpu 0: ndisc_router_discovery+0x155a/0x1c90 net/ipv6/ndisc.c:1559 ndisc_rcv+0x2ad/0x3d0 net/ipv6/ndisc.c:1841 icmpv6_rcv+0xe5a/0x12f0 net/ipv6/icmp.c:989 ip6_protocol_deliver_rcu+0xb2a/0x10d0 net/ipv6/ip6_input.c:438 ip6_input_finish+0xf0/0x1d0 net/ipv6/ip6_input.c:489 NF_HOOK include/linux/netfilter.h:318 [inline] ip6_input+0x5e/0x140 net/ipv6/ip6_input.c:500 ip6_mc_input+0x27c/0x470 net/ipv6/ip6_input.c:590 dst_input include/net/dst.h:474 [inline] ip6_rcv_finish+0x336/0x340 net/ipv6/ip6_input.c:79 ... value changed: 0x00000000 -> 0xe5400659 Fixes: 49b99da2c9ce ("ipv6: add IFLA_INET6_RA_MTU to expose mtu value") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Rocco Yue Link: https://patch.msgid.link/20260118152941.2563857-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ndisc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 59d17b6f06bf..f6a5d8c73af9 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1555,8 +1555,8 @@ skip_routeinfo: memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); mtu = ntohl(n); - if (in6_dev->ra_mtu != mtu) { - in6_dev->ra_mtu = mtu; + if (READ_ONCE(in6_dev->ra_mtu) != mtu) { + WRITE_ONCE(in6_dev->ra_mtu, mtu); send_ifinfo_notify = true; } From c7159e960f1472a5493ac99aff0086ab1d683594 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 19 Jan 2026 08:55:18 +0100 Subject: [PATCH 75/96] usbnet: limit max_mtu based on device's hard_mtu The usbnet driver initializes net->max_mtu to ETH_MAX_MTU before calling the device's bind() callback. When the bind() callback sets dev->hard_mtu based the device's actual capability (from CDC Ethernet's wMaxSegmentSize descriptor), max_mtu is never updated to reflect this hardware limitation). This allows userspace (DHCP or IPv6 RA) to configure MTU larger than the device can handle, leading to silent packet drops when the backend sends packet exceeding the device's buffer size. Fix this by limiting net->max_mtu to the device's hard_mtu after the bind callback returns. See https://gitlab.com/qemu-project/qemu/-/issues/3268 and https://bugs.passt.top/attachment.cgi?bugid=189 Fixes: f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers") Signed-off-by: Laurent Vivier Link: https://bugs.passt.top/show_bug.cgi?id=189 Reviewed-by: Stefano Brivio Link: https://patch.msgid.link/20260119075518.2774373-1-lvivier@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 35789ff4dd55..9280ef544bbb 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1821,9 +1821,12 @@ usbnet_probe(struct usb_interface *udev, const struct usb_device_id *prod) if ((dev->driver_info->flags & FLAG_NOARP) != 0) net->flags |= IFF_NOARP; - /* maybe the remote can't receive an Ethernet MTU */ - if (net->mtu > (dev->hard_mtu - net->hard_header_len)) - net->mtu = dev->hard_mtu - net->hard_header_len; + if (net->max_mtu > (dev->hard_mtu - net->hard_header_len)) + net->max_mtu = dev->hard_mtu - net->hard_header_len; + + if (net->mtu > net->max_mtu) + net->mtu = net->max_mtu; + } else if (!info->in || !info->out) status = usbnet_get_endpoints(dev, udev); else { From cdf8de9c6bfe94508d251cb290ee66e34e6f3368 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Mon, 19 Jan 2026 15:32:22 +0530 Subject: [PATCH 76/96] Octeontx2-pf: Update xdp features In recent testing, verification of XDP_REDIRECT and zero-copy features failed because the driver is not setting the corresponding feature flags. Fixes: efabce290151 ("octeontx2-pf: AF_XDP zero copy receive support") Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Signed-off-by: Hariprasad Kelam Link: https://patch.msgid.link/20260119100222.2267925-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a7feb4c392b3..6b2d8559f0eb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -3249,7 +3249,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) netdev->watchdog_timeo = OTX2_TX_TIMEOUT; netdev->netdev_ops = &otx2_netdev_ops; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; netdev->min_mtu = OTX2_MIN_MTU; netdev->max_mtu = otx2_get_max_mtu(pf); From ba1096c315283ee3292765f6aea4cca15816c4f7 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Mon, 19 Jan 2026 15:33:59 +0900 Subject: [PATCH 77/96] netrom: fix double-free in nr_route_frame() In nr_route_frame(), old_skb is immediately freed without checking if nr_neigh->ax25 pointer is NULL. Therefore, if nr_neigh->ax25 is NULL, the caller function will free old_skb again, causing a double-free bug. Therefore, to prevent this, we need to modify it to check whether nr_neigh->ax25 is NULL before freeing old_skb. Cc: Reported-by: syzbot+999115c3bf275797dc27@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/69694d6f.050a0220.58bed.0029.GAE@google.com/ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jeongjun Park Link: https://patch.msgid.link/20260119063359.10604-1-aha310510@gmail.com Signed-off-by: Jakub Kicinski --- net/netrom/nr_route.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b94cb2ffbaf8..9cc29ae85b06 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -752,7 +752,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) unsigned char *dptr; ax25_cb *ax25s; int ret; - struct sk_buff *skbn; + struct sk_buff *nskb, *oskb; /* * Reject malformed packets early. Check that it contains at least 2 @@ -811,14 +811,16 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) /* We are going to change the netrom headers so we should get our own skb, we also did not know until now how much header space we had to reserve... - RXQ */ - if ((skbn=skb_copy_expand(skb, dev->hard_header_len, 0, GFP_ATOMIC)) == NULL) { + nskb = skb_copy_expand(skb, dev->hard_header_len, 0, GFP_ATOMIC); + + if (!nskb) { nr_node_unlock(nr_node); nr_node_put(nr_node); dev_put(dev); return 0; } - kfree_skb(skb); - skb=skbn; + oskb = skb; + skb = nskb; skb->data[14]--; dptr = skb_push(skb, 1); @@ -837,6 +839,9 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) nr_node_unlock(nr_node); nr_node_put(nr_node); + if (ret) + kfree_skb(oskb); + return ret; } From 2030c4358bd8451583f2e010108607de5cdac5dc Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Tue, 20 Jan 2026 15:20:18 +0800 Subject: [PATCH 78/96] Revert "net: wwan: mhi_wwan_mbim: Avoid -Wflex-array-member-not-at-end warning" This reverts commit eeecf5d3a3a484cedfa3f2f87e6d51a7390ed960. This change lead to MHI WWAN device can't connect to internet. I found a netwrok issue with kernel 6.19-rc4, but network works well with kernel 6.18-rc1. After checking, this commit is the root cause. Before appliing this serial changes on MHI WWAN network, we shall revert this change in case of v6.19 being impacted. Fixes: eeecf5d3a3a4 ("net: wwan: mhi_wwan_mbim: Avoid -Wflex-array-member-not-at-end warning") Signed-off-by: Slark Xiao Link: https://patch.msgid.link/20260120072018.29375-1-slark_xiao@163.com Signed-off-by: Jakub Kicinski --- drivers/net/wwan/mhi_wwan_mbim.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index 0dace12f5ad0..f8bc9a39bfa3 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -78,9 +78,8 @@ struct mhi_mbim_context { struct mbim_tx_hdr { struct usb_cdc_ncm_nth16 nth16; - - /* Must be last as it ends in a flexible-array member. */ struct usb_cdc_ncm_ndp16 ndp16; + struct usb_cdc_ncm_dpe16 dpe16[2]; } __packed; static struct mhi_mbim_link *mhi_mbim_get_link_rcu(struct mhi_mbim_context *mbim, @@ -108,20 +107,20 @@ static int mhi_mbim_get_link_mux_id(struct mhi_controller *cntrl) static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session, u16 tx_seq) { - DEFINE_RAW_FLEX(struct mbim_tx_hdr, mbim_hdr, ndp16.dpe16, 2); unsigned int dgram_size = skb->len; struct usb_cdc_ncm_nth16 *nth16; struct usb_cdc_ncm_ndp16 *ndp16; + struct mbim_tx_hdr *mbim_hdr; /* Only one NDP is sent, containing the IP packet (no aggregation) */ /* Ensure we have enough headroom for crafting MBIM header */ - if (skb_cow_head(skb, __struct_size(mbim_hdr))) { + if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) { dev_kfree_skb_any(skb); return NULL; } - mbim_hdr = skb_push(skb, __struct_size(mbim_hdr)); + mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr)); /* Fill NTB header */ nth16 = &mbim_hdr->nth16; @@ -134,11 +133,12 @@ static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session, /* Fill the unique NDP */ ndp16 = &mbim_hdr->ndp16; ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN | (session << 24)); - ndp16->wLength = cpu_to_le16(struct_size(ndp16, dpe16, 2)); + ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + + sizeof(struct usb_cdc_ncm_dpe16) * 2); ndp16->wNextNdpIndex = 0; /* Datagram follows the mbim header */ - ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(__struct_size(mbim_hdr)); + ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr)); ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size); /* null termination */ @@ -584,8 +584,7 @@ static void mhi_mbim_setup(struct net_device *ndev) { ndev->header_ops = NULL; /* No header */ ndev->type = ARPHRD_RAWIP; - ndev->needed_headroom = - struct_size_t(struct mbim_tx_hdr, ndp16.dpe16, 2); + ndev->needed_headroom = sizeof(struct mbim_tx_hdr); ndev->hard_header_len = 0; ndev->addr_len = 0; ndev->flags = IFF_POINTOPOINT | IFF_NOARP; From 8215794403d264739cc676668087512950b2ff31 Mon Sep 17 00:00:00 2001 From: Andrey Vatoropin Date: Tue, 20 Jan 2026 11:37:47 +0000 Subject: [PATCH 79/96] be2net: Fix NULL pointer dereference in be_cmd_get_mac_from_list When the parameter pmac_id_valid argument of be_cmd_get_mac_from_list() is set to false, the driver may request the PMAC_ID from the firmware of the network card, and this function will store that PMAC_ID at the provided address pmac_id. This is the contract of this function. However, there is a location within the driver where both pmac_id_valid == false and pmac_id == NULL are being passed. This could result in dereferencing a NULL pointer. To resolve this issue, it is necessary to pass the address of a stub variable to the function. Fixes: 95046b927a54 ("be2net: refactor MAC-addr setup code") Signed-off-by: Andrey Vatoropin Link: https://patch.msgid.link/20260120113734.20193-1-a.vatoropin@crpt.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/emulex/benet/be_cmds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index bb5d2fa15736..8ed45bceb537 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -3801,6 +3801,7 @@ int be_cmd_get_perm_mac(struct be_adapter *adapter, u8 *mac) { int status; bool pmac_valid = false; + u32 pmac_id; eth_zero_addr(mac); @@ -3813,7 +3814,7 @@ int be_cmd_get_perm_mac(struct be_adapter *adapter, u8 *mac) adapter->if_handle, 0); } else { status = be_cmd_get_mac_from_list(adapter, mac, &pmac_valid, - NULL, adapter->if_handle, 0); + &pmac_id, adapter->if_handle, 0); } return status; From 04708606fd7bdc34b69089a4ff848ff36d7088f9 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 20 Jan 2026 13:39:30 +0000 Subject: [PATCH 80/96] selftests: net: amt: wait longer for connection before sending packets Both send_mcast4() and send_mcast6() use sleep 2 to wait for the tunnel connection between the gateway and the relay, and for the listener socket to be created in the LISTENER namespace. However, tests sometimes fail because packets are sent before the connection is fully established. Increase the waiting time to make the tests more reliable, and use wait_local_port_listen() to explicitly wait for the listener socket. Fixes: c08e8baea78e ("selftests: add amt interface selftest script") Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20260120133930.863845-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/amt.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index 3ef209cacb8e..663744305e52 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -73,6 +73,8 @@ # +------------------------+ #============================================================================== +source lib.sh + readonly LISTENER=$(mktemp -u listener-XXXXXXXX) readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX) readonly RELAY=$(mktemp -u relay-XXXXXXXX) @@ -246,14 +248,15 @@ test_ipv6_forward() send_mcast4() { - sleep 2 + sleep 5 + wait_local_port_listen ${LISTENER} 4000 udp ip netns exec "${SOURCE}" bash -c \ 'printf "%s %128s" 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & } send_mcast6() { - sleep 2 + wait_local_port_listen ${LISTENER} 6000 udp ip netns exec "${SOURCE}" bash -c \ 'printf "%s %128s" 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & } From 5f9b329096596b7e53e07d041d7fca4cbe1be752 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Jan 2026 16:17:44 +0000 Subject: [PATCH 81/96] bonding: provide a net pointer to __skb_flow_dissect() After 3cbf4ffba5ee ("net: plumb network namespace into __skb_flow_dissect") we have to provide a net pointer to __skb_flow_dissect(), either via skb->dev, skb->sk, or a user provided pointer. In the following case, syzbot was able to cook a bare skb. WARNING: net/core/flow_dissector.c:1131 at __skb_flow_dissect+0xb57/0x68b0 net/core/flow_dissector.c:1131, CPU#1: syz.2.1418/11053 Call Trace: bond_flow_dissect drivers/net/bonding/bond_main.c:4093 [inline] __bond_xmit_hash+0x2d7/0xba0 drivers/net/bonding/bond_main.c:4157 bond_xmit_hash_xdp drivers/net/bonding/bond_main.c:4208 [inline] bond_xdp_xmit_3ad_xor_slave_get drivers/net/bonding/bond_main.c:5139 [inline] bond_xdp_get_xmit_slave+0x1fd/0x710 drivers/net/bonding/bond_main.c:5515 xdp_master_redirect+0x13f/0x2c0 net/core/filter.c:4388 bpf_prog_run_xdp include/net/xdp.h:700 [inline] bpf_test_run+0x6b2/0x7d0 net/bpf/test_run.c:421 bpf_prog_test_run_xdp+0x795/0x10e0 net/bpf/test_run.c:1390 bpf_prog_test_run+0x2c7/0x340 kernel/bpf/syscall.c:4703 __sys_bpf+0x562/0x860 kernel/bpf/syscall.c:6182 __do_sys_bpf kernel/bpf/syscall.c:6274 [inline] __se_sys_bpf kernel/bpf/syscall.c:6272 [inline] __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:6272 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94 Fixes: 58deb77cc52d ("bonding: balance ICMP echoes in layer3+4 mode") Reported-by: syzbot+c46409299c70a221415e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/696faa23.050a0220.4cb9c.001f.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Matteo Croce Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260120161744.1893263-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0aca6c937297..e7caf400a59c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4096,8 +4096,9 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const v case BOND_XMIT_POLICY_ENCAP23: case BOND_XMIT_POLICY_ENCAP34: memset(fk, 0, sizeof(*fk)); - return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, - fk, data, l2_proto, nhoff, hlen, 0); + return __skb_flow_dissect(dev_net(bond->dev), skb, + &flow_keys_bonding, fk, data, + l2_proto, nhoff, hlen, 0); default: break; } From bbb11b8d758d17a4ce34b8ed0b49de150568265b Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Tue, 20 Jan 2026 11:23:39 -0800 Subject: [PATCH 82/96] net: bcmasp: Fix network filter wake for asp-3.0 We need to apply the tx_chan_offset to the netfilter cfg channel or the output channel will be incorrect for asp-3.0 and newer. Fixes: e9f31435ee7d ("net: bcmasp: Add support for asp-v3.0") Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20260120192339.2031648-1-justin.chen@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/asp2/bcmasp.c | 5 +++-- drivers/net/ethernet/broadcom/asp2/bcmasp.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index fd35f4b4dc50..014340f33345 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -156,7 +156,7 @@ static void bcmasp_netfilt_hw_en_wake(struct bcmasp_priv *priv, ASP_RX_FILTER_NET_OFFSET_L4(32), ASP_RX_FILTER_NET_OFFSET(nfilt->hw_index + 1)); - rx_filter_core_wl(priv, ASP_RX_FILTER_NET_CFG_CH(nfilt->port + 8) | + rx_filter_core_wl(priv, ASP_RX_FILTER_NET_CFG_CH(nfilt->ch) | ASP_RX_FILTER_NET_CFG_EN | ASP_RX_FILTER_NET_CFG_L2_EN | ASP_RX_FILTER_NET_CFG_L3_EN | @@ -166,7 +166,7 @@ static void bcmasp_netfilt_hw_en_wake(struct bcmasp_priv *priv, ASP_RX_FILTER_NET_CFG_UMC(nfilt->port), ASP_RX_FILTER_NET_CFG(nfilt->hw_index)); - rx_filter_core_wl(priv, ASP_RX_FILTER_NET_CFG_CH(nfilt->port + 8) | + rx_filter_core_wl(priv, ASP_RX_FILTER_NET_CFG_CH(nfilt->ch) | ASP_RX_FILTER_NET_CFG_EN | ASP_RX_FILTER_NET_CFG_L2_EN | ASP_RX_FILTER_NET_CFG_L3_EN | @@ -714,6 +714,7 @@ struct bcmasp_net_filter *bcmasp_netfilt_get_init(struct bcmasp_intf *intf, nfilter = &priv->net_filters[open_index]; nfilter->claimed = true; nfilter->port = intf->port; + nfilter->ch = intf->channel + priv->tx_chan_offset; nfilter->hw_index = open_index; } diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.h b/drivers/net/ethernet/broadcom/asp2/bcmasp.h index 74adfdb50e11..e238507be40a 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.h +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.h @@ -348,6 +348,7 @@ struct bcmasp_net_filter { bool wake_filter; int port; + int ch; unsigned int hw_index; }; From dfca045cd4d0ea07ff4198ba392be3e718acaddc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 20 Jan 2026 23:10:39 +0200 Subject: [PATCH 83/96] net: dsa: fix off-by-one in maximum bridge ID determination Prior to the blamed commit, the bridge_num range was from 0 to ds->max_num_bridges - 1. After the commit, it is from 1 to ds->max_num_bridges. So this check: if (bridge_num >= max) return 0; must be updated to: if (bridge_num > max) return 0; in order to allow the last bridge_num value (==max) to be used. This is easiest visible when a driver sets ds->max_num_bridges=1. The observed behaviour is that even the first created bridge triggers the netlink extack "Range of offloadable bridges exceeded" warning, and is handled in software rather than being offloaded. Fixes: 3f9bb0301d50 ("net: dsa: make dp->bridge_num one-based") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20260120211039.3228999-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 99ede37698ac..35ce3941fae3 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -158,7 +158,7 @@ unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) bridge_num = find_next_zero_bit(&dsa_fwd_offloading_bridges, DSA_MAX_NUM_OFFLOADING_BRIDGES, 1); - if (bridge_num >= max) + if (bridge_num > max) return 0; set_bit(bridge_num, &dsa_fwd_offloading_bridges); From 5d5fe8bcd331f1e34e0943ec7c18432edfcf0e8b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 20 Jan 2026 10:13:05 +0000 Subject: [PATCH 84/96] rxrpc: Fix data-race warning and potential load/store tearing Fix the following: BUG: KCSAN: data-race in rxrpc_peer_keepalive_worker / rxrpc_send_data_packet which is reporting an issue with the reads and writes to ->last_tx_at in: conn->peer->last_tx_at = ktime_get_seconds(); and: keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; The lockless accesses to these to values aren't actually a problem as the read only needs an approximate time of last transmission for the purposes of deciding whether or not the transmission of a keepalive packet is warranted yet. Also, as ->last_tx_at is a 64-bit value, tearing can occur on a 32-bit arch. Fix both of these by switching to an unsigned int for ->last_tx_at and only storing the LSW of the time64_t. It can then be reconstructed at need provided no more than 68 years has elapsed since the last transmission. Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive") Reported-by: syzbot+6182afad5045e6703b3d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/695e7cfb.050a0220.1c677c.036b.GAE@google.com/ Signed-off-by: David Howells cc: Marc Dionne cc: Simon Horman cc: linux-afs@lists.infradead.org cc: stable@kernel.org Link: https://patch.msgid.link/1107124.1768903985@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 9 ++++++++- net/rxrpc/conn_event.c | 2 +- net/rxrpc/output.c | 14 +++++++------- net/rxrpc/peer_event.c | 17 ++++++++++++++++- net/rxrpc/proc.c | 4 ++-- net/rxrpc/rxgk.c | 2 +- net/rxrpc/rxkad.c | 2 +- 7 files changed, 36 insertions(+), 14 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5b7342d43486..36d6ca0d1089 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -387,7 +387,7 @@ struct rxrpc_peer { struct rb_root service_conns; /* Service connections */ struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ unsigned long app_data; /* Application data (e.g. afs_server) */ - time64_t last_tx_at; /* Last time packet sent here */ + unsigned int last_tx_at; /* Last time packet sent here (time64_t LSW) */ seqlock_t service_conn_lock; spinlock_t lock; /* access lock */ int debug_id; /* debug ID for printks */ @@ -1379,6 +1379,13 @@ void rxrpc_peer_keepalive_worker(struct work_struct *); void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial, bool sendmsg_fail); +/* Update the last transmission time on a peer for keepalive purposes. */ +static inline void rxrpc_peer_mark_tx(struct rxrpc_peer *peer) +{ + /* To avoid tearing on 32-bit systems, we only keep the LSW. */ + WRITE_ONCE(peer->last_tx_at, ktime_get_seconds()); +} + /* * peer_object.c */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 232b6986da83..98ad9b51ca2c 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -194,7 +194,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, } ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len); - conn->peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(conn->peer); if (ret < 0) trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret, rxrpc_tx_point_call_final_resend); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8b5903b6e481..d70db367e358 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -275,7 +275,7 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe); ret = do_udp_sendmsg(conn->local->socket, &msg, len); - call->peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(call->peer); if (ret < 0) { trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_ack); @@ -411,7 +411,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt)); ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt)); - conn->peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(conn->peer); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_abort); @@ -698,7 +698,7 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req ret = 0; trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, rxrpc_txdata_inject_loss); - conn->peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(conn->peer); goto done; } } @@ -711,7 +711,7 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req */ rxrpc_inc_stat(call->rxnet, stat_tx_data_send); ret = do_udp_sendmsg(conn->local->socket, &msg, len); - conn->peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(conn->peer); if (ret == -EMSGSIZE) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize); @@ -797,7 +797,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn) trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); - conn->peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(conn->peer); } /* @@ -917,7 +917,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) trace_rxrpc_tx_packet(peer->debug_id, &whdr, rxrpc_tx_point_version_keepalive); - peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(peer); _leave(""); } @@ -973,7 +973,7 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response if (ret < 0) goto fail; - conn->peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(conn->peer); return; fail: diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 7f4729234957..9d02448ac062 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -237,6 +237,21 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb, spin_unlock_irq(&peer->lock); } +/* + * Reconstruct the last transmission time. The difference calculated should be + * valid provided no more than ~68 years elapsed since the last transmission. + */ +static time64_t rxrpc_peer_get_tx_mark(const struct rxrpc_peer *peer, time64_t base) +{ + s32 last_tx_at = READ_ONCE(peer->last_tx_at); + s32 base_lsw = base; + s32 diff = last_tx_at - base_lsw; + + diff = clamp(diff, -RXRPC_KEEPALIVE_TIME, RXRPC_KEEPALIVE_TIME); + + return diff + base; +} + /* * Perform keep-alive pings. */ @@ -265,7 +280,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, spin_unlock_bh(&rxnet->peer_hash_lock); if (use) { - keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; + keepalive_at = rxrpc_peer_get_tx_mark(peer, base) + RXRPC_KEEPALIVE_TIME; slot = keepalive_at - base; _debug("%02x peer %u t=%d {%pISp}", cursor, peer->debug_id, slot, &peer->srx.transport); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index d803562ca0ac..59292f7f9205 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -296,13 +296,13 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) now = ktime_get_seconds(); seq_printf(seq, - "UDP %-47.47s %-47.47s %3u %4u %5u %6llus %8d %8d\n", + "UDP %-47.47s %-47.47s %3u %4u %5u %6ds %8d %8d\n", lbuff, rbuff, refcount_read(&peer->ref), peer->cong_ssthresh, peer->max_data, - now - peer->last_tx_at, + (s32)now - (s32)READ_ONCE(peer->last_tx_at), READ_ONCE(peer->recent_srtt_us), READ_ONCE(peer->recent_rto_us)); diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index dce5a3d8a964..43cbf9efd89f 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -678,7 +678,7 @@ static int rxgk_issue_challenge(struct rxrpc_connection *conn) ret = do_udp_sendmsg(conn->local->socket, &msg, len); if (ret > 0) - conn->peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(conn->peer); __free_page(page); if (ret < 0) { diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 3657c0661cdc..a756855a0a62 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -694,7 +694,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) return -EAGAIN; } - conn->peer->last_tx_at = ktime_get_seconds(); + rxrpc_peer_mark_tx(conn->peer); trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_rxkad_challenge); _leave(" = 0"); From e8ca461f7d19464b47c64fe4cf2f83162421bcc0 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 21 Jan 2026 02:23:17 +0000 Subject: [PATCH 85/96] net: pcs: pcs-mtk-lynxi: report in-band capability for 2500Base-X It turns out that 2500Base-X actually works fine with in-band status on MediaTek's LynxI PCS -- I wrongly concluded it didn't because it is broken in all the copper SFP modules and GPON sticks I used for testing. Hence report LINK_INBAND_ENABLE also for 2500Base-X mode. This reverts most of commit a003c38d9bbb ("net: pcs: pcs-mtk-lynxi: correctly report in-band status capabilities"). The removal of the QSGMII interface mode was correct and is left untouched. Link: https://github.com/openwrt/openwrt/issues/21436 Fixes: a003c38d9bbb ("net: pcs: pcs-mtk-lynxi: correctly report in-band status capabilities") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/b1cf26157b63fee838be09ae810497fb22fd8104.1768961746.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-mtk-lynxi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c index 149ddf51d785..87df3a9dfc9b 100644 --- a/drivers/net/pcs/pcs-mtk-lynxi.c +++ b/drivers/net/pcs/pcs-mtk-lynxi.c @@ -93,12 +93,10 @@ static unsigned int mtk_pcs_lynxi_inband_caps(struct phylink_pcs *pcs, { switch (interface) { case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: case PHY_INTERFACE_MODE_SGMII: return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; - case PHY_INTERFACE_MODE_2500BASEX: - return LINK_INBAND_DISABLE; - default: return 0; } From 19e4175e997a5b85eab97d522f00cc99abd1873c Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 21 Jan 2026 09:09:34 +0530 Subject: [PATCH 86/96] octeontx2-af: Fix error handling This commit adds error handling and rollback logic to rvu_mbox_handler_attach_resources() to properly clean up partially attached resources when rvu_attach_block() fails. Fixes: 746ea74241fa0 ("octeontx2-af: Add RVU block LF provisioning support") Signed-off-by: Ratheesh Kannoth Link: https://patch.msgid.link/20260121033934.1900761-1-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/rvu.c | 86 ++++++++++++++----- 1 file changed, 64 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 2d78e08f985f..747fbdf2a908 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1551,8 +1551,8 @@ static int rvu_get_attach_blkaddr(struct rvu *rvu, int blktype, return -ENODEV; } -static void rvu_attach_block(struct rvu *rvu, int pcifunc, int blktype, - int num_lfs, struct rsrc_attach *attach) +static int rvu_attach_block(struct rvu *rvu, int pcifunc, int blktype, + int num_lfs, struct rsrc_attach *attach) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); struct rvu_hwinfo *hw = rvu->hw; @@ -1562,21 +1562,21 @@ static void rvu_attach_block(struct rvu *rvu, int pcifunc, int blktype, u64 cfg; if (!num_lfs) - return; + return -EINVAL; blkaddr = rvu_get_attach_blkaddr(rvu, blktype, pcifunc, attach); if (blkaddr < 0) - return; + return -EFAULT; block = &hw->block[blkaddr]; if (!block->lf.bmap) - return; + return -ESRCH; for (slot = 0; slot < num_lfs; slot++) { /* Allocate the resource */ lf = rvu_alloc_rsrc(&block->lf); if (lf < 0) - return; + return -EFAULT; cfg = (1ULL << 63) | (pcifunc << 8) | slot; rvu_write64(rvu, blkaddr, block->lfcfg_reg | @@ -1587,6 +1587,8 @@ static void rvu_attach_block(struct rvu *rvu, int pcifunc, int blktype, /* Set start MSIX vector for this LF within this PF/VF */ rvu_set_msix_offset(rvu, pfvf, block, lf); } + + return 0; } static int rvu_check_rsrc_availability(struct rvu *rvu, @@ -1724,22 +1726,31 @@ int rvu_mbox_handler_attach_resources(struct rvu *rvu, int err; /* If first request, detach all existing attached resources */ - if (!attach->modify) - rvu_detach_rsrcs(rvu, NULL, pcifunc); + if (!attach->modify) { + err = rvu_detach_rsrcs(rvu, NULL, pcifunc); + if (err) + return err; + } mutex_lock(&rvu->rsrc_lock); /* Check if the request can be accommodated */ err = rvu_check_rsrc_availability(rvu, attach, pcifunc); if (err) - goto exit; + goto fail1; /* Now attach the requested resources */ - if (attach->npalf) - rvu_attach_block(rvu, pcifunc, BLKTYPE_NPA, 1, attach); + if (attach->npalf) { + err = rvu_attach_block(rvu, pcifunc, BLKTYPE_NPA, 1, attach); + if (err) + goto fail1; + } - if (attach->nixlf) - rvu_attach_block(rvu, pcifunc, BLKTYPE_NIX, 1, attach); + if (attach->nixlf) { + err = rvu_attach_block(rvu, pcifunc, BLKTYPE_NIX, 1, attach); + if (err) + goto fail2; + } if (attach->sso) { /* RVU func doesn't know which exact LF or slot is attached @@ -1749,33 +1760,64 @@ int rvu_mbox_handler_attach_resources(struct rvu *rvu, */ if (attach->modify) rvu_detach_block(rvu, pcifunc, BLKTYPE_SSO); - rvu_attach_block(rvu, pcifunc, BLKTYPE_SSO, - attach->sso, attach); + err = rvu_attach_block(rvu, pcifunc, BLKTYPE_SSO, + attach->sso, attach); + if (err) + goto fail3; } if (attach->ssow) { if (attach->modify) rvu_detach_block(rvu, pcifunc, BLKTYPE_SSOW); - rvu_attach_block(rvu, pcifunc, BLKTYPE_SSOW, - attach->ssow, attach); + err = rvu_attach_block(rvu, pcifunc, BLKTYPE_SSOW, + attach->ssow, attach); + if (err) + goto fail4; } if (attach->timlfs) { if (attach->modify) rvu_detach_block(rvu, pcifunc, BLKTYPE_TIM); - rvu_attach_block(rvu, pcifunc, BLKTYPE_TIM, - attach->timlfs, attach); + err = rvu_attach_block(rvu, pcifunc, BLKTYPE_TIM, + attach->timlfs, attach); + if (err) + goto fail5; } if (attach->cptlfs) { if (attach->modify && rvu_attach_from_same_block(rvu, BLKTYPE_CPT, attach)) rvu_detach_block(rvu, pcifunc, BLKTYPE_CPT); - rvu_attach_block(rvu, pcifunc, BLKTYPE_CPT, - attach->cptlfs, attach); + err = rvu_attach_block(rvu, pcifunc, BLKTYPE_CPT, + attach->cptlfs, attach); + if (err) + goto fail6; } -exit: + mutex_unlock(&rvu->rsrc_lock); + return 0; + +fail6: + if (attach->timlfs) + rvu_detach_block(rvu, pcifunc, BLKTYPE_TIM); + +fail5: + if (attach->ssow) + rvu_detach_block(rvu, pcifunc, BLKTYPE_SSOW); + +fail4: + if (attach->sso) + rvu_detach_block(rvu, pcifunc, BLKTYPE_SSO); + +fail3: + if (attach->nixlf) + rvu_detach_block(rvu, pcifunc, BLKTYPE_NIX); + +fail2: + if (attach->npalf) + rvu_detach_block(rvu, pcifunc, BLKTYPE_NPA); + +fail1: mutex_unlock(&rvu->rsrc_lock); return err; } From cc4816bdb08639e5cd9acb295a02d6f0f09736b4 Mon Sep 17 00:00:00 2001 From: David Yang Date: Wed, 21 Jan 2026 15:29:26 +0800 Subject: [PATCH 87/96] net: openvswitch: fix data race in ovs_vport_get_upcall_stats In ovs_vport_get_upcall_stats(), some statistics protected by u64_stats_sync, are read and accumulated in ignorance of possible u64_stats_fetch_retry() events. These statistics are already accumulated by u64_stats_inc(). Fix this by reading them into temporary variables first. Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets") Signed-off-by: David Yang Acked-by: Ilya Maximets Reviewed-by: Eric Dumazet Reviewed-by: Aaron Conole Link: https://patch.msgid.link/20260121072932.2360971-1-mmyangfl@gmail.com Signed-off-by: Paolo Abeni --- net/openvswitch/vport.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6bbbc16ab778..f0ce8ce1dce0 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -310,22 +310,23 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) */ int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb) { + u64 tx_success = 0, tx_fail = 0; struct nlattr *nla; int i; - __u64 tx_success = 0; - __u64 tx_fail = 0; - for_each_possible_cpu(i) { const struct vport_upcall_stats_percpu *stats; + u64 n_success, n_fail; unsigned int start; stats = per_cpu_ptr(vport->upcall_stats, i); do { start = u64_stats_fetch_begin(&stats->syncp); - tx_success += u64_stats_read(&stats->n_success); - tx_fail += u64_stats_read(&stats->n_fail); + n_success = u64_stats_read(&stats->n_success); + n_fail = u64_stats_read(&stats->n_fail); } while (u64_stats_fetch_retry(&stats->syncp, start)); + tx_success += n_success; + tx_fail += n_fail; } nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_UPCALL_STATS); From ca1bb3fedf26a08ed31974131bc0064d4fe33649 Mon Sep 17 00:00:00 2001 From: Clemens Gruber Date: Wed, 21 Jan 2026 09:37:51 +0100 Subject: [PATCH 88/96] net: fec: account for VLAN header in frame length calculations The MAX_FL (maximum frame length) and related calculations used ETH_HLEN, which does not account for the 4-byte VLAN tag in tagged frames. This caused the hardware to reject valid VLAN frames as oversized, resulting in RX errors and dropped packets. Use VLAN_ETH_HLEN instead of ETH_HLEN in the MAX_FL register setup, cut-through mode threshold, buffer allocation, and max_mtu calculation. Cc: stable@kernel.org # v6.18+ Fixes: 62b5bb7be7bc ("net: fec: update MAX_FL based on the current MTU") Fixes: d466c16026e9 ("net: fec: enable the Jumbo frame support for i.MX8QM") Fixes: 59e9bf037d75 ("net: fec: add change_mtu to support dynamic buffer allocation") Fixes: ec2a1681ed4f ("net: fec: use a member variable for maximum buffer size") Signed-off-by: Clemens Gruber Reviewed-by: Wei Fang Link: https://patch.msgid.link/20260121083751.66997-1-mail@clemensgruber.at Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec_main.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a753265961af..797ef6899657 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1150,7 +1150,7 @@ fec_restart(struct net_device *ndev) u32 rcntl = FEC_RCR_MII; if (OPT_ARCH_HAS_MAX_FL) - rcntl |= (fep->netdev->mtu + ETH_HLEN + ETH_FCS_LEN) << 16; + rcntl |= (fep->netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN) << 16; if (fep->bufdesc_ex) fec_ptp_save_state(fep); @@ -1285,12 +1285,13 @@ fec_restart(struct net_device *ndev) /* When Jumbo Frame is enabled, the FIFO may not be large enough * to hold an entire frame. In such cases, if the MTU exceeds - * (PKT_MAXBUF_SIZE - ETH_HLEN - ETH_FCS_LEN), configure the interface - * to operate in cut-through mode, triggered by the FIFO threshold. + * (PKT_MAXBUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN), configure + * the interface to operate in cut-through mode, triggered by + * the FIFO threshold. * Otherwise, enable the ENET store-and-forward mode. */ if ((fep->quirks & FEC_QUIRK_JUMBO_FRAME) && - (ndev->mtu > (PKT_MAXBUF_SIZE - ETH_HLEN - ETH_FCS_LEN))) + (ndev->mtu > (PKT_MAXBUF_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN))) writel(0xF, fep->hwp + FEC_X_WMRK); else writel(FEC_TXWMRK_STRFWD, fep->hwp + FEC_X_WMRK); @@ -4037,7 +4038,7 @@ static int fec_change_mtu(struct net_device *ndev, int new_mtu) if (netif_running(ndev)) return -EBUSY; - order = get_order(new_mtu + ETH_HLEN + ETH_FCS_LEN + order = get_order(new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN + FEC_DRV_RESERVE_SPACE); fep->rx_frame_size = (PAGE_SIZE << order) - FEC_DRV_RESERVE_SPACE; fep->pagepool_order = order; @@ -4588,7 +4589,7 @@ fec_probe(struct platform_device *pdev) else fep->max_buf_size = PKT_MAXBUF_SIZE; - ndev->max_mtu = fep->max_buf_size - ETH_HLEN - ETH_FCS_LEN; + ndev->max_mtu = fep->max_buf_size - VLAN_ETH_HLEN - ETH_FCS_LEN; ret = register_netdev(ndev); if (ret) From 3ef3d52a1a9860d094395c7a3e593f3aa26ff012 Mon Sep 17 00:00:00 2001 From: Melbin K Mathew Date: Wed, 21 Jan 2026 10:36:25 +0100 Subject: [PATCH 89/96] vsock/virtio: fix potential underflow in virtio_transport_get_credit() The credit calculation in virtio_transport_get_credit() uses unsigned arithmetic: ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); If the peer shrinks its advertised buffer (peer_buf_alloc) while bytes are in flight, the subtraction can underflow and produce a large positive value, potentially allowing more data to be queued than the peer can handle. Reuse virtio_transport_has_space() which already handles this case and add a comment to make it clear why we are doing that. Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Suggested-by: Stefano Garzarella Signed-off-by: Melbin K Mathew [Stefano: use virtio_transport_has_space() instead of duplicating the code] [Stefano: tweak the commit message] Signed-off-by: Stefano Garzarella Reviewed-by: Luigi Leonardi Link: https://patch.msgid.link/20260121093628.9941-2-sgarzare@redhat.com Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 26b979ad71f0..6175124d63d3 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -28,6 +28,7 @@ static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, bool cancel_timeout); +static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs); static const struct virtio_transport * virtio_transport_get_ops(struct vsock_sock *vsk) @@ -499,9 +500,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) return 0; spin_lock_bh(&vvs->tx_lock); - ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); - if (ret > credit) - ret = credit; + ret = min_t(u32, credit, virtio_transport_has_space(vvs)); vvs->tx_cnt += ret; vvs->bytes_unsent += ret; spin_unlock_bh(&vvs->tx_lock); @@ -877,11 +876,14 @@ u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data); -static s64 virtio_transport_has_space(struct vsock_sock *vsk) +static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs) { - struct virtio_vsock_sock *vvs = vsk->trans; s64 bytes; + /* Use s64 arithmetic so if the peer shrinks peer_buf_alloc while + * we have bytes in flight (tx_cnt - peer_fwd_cnt), the subtraction + * does not underflow. + */ bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); if (bytes < 0) bytes = 0; @@ -895,7 +897,7 @@ s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) s64 bytes; spin_lock_bh(&vvs->tx_lock); - bytes = virtio_transport_has_space(vsk); + bytes = virtio_transport_has_space(vvs); spin_unlock_bh(&vvs->tx_lock); return bytes; @@ -1492,7 +1494,7 @@ static bool virtio_transport_space_update(struct sock *sk, spin_lock_bh(&vvs->tx_lock); vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc); vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt); - space_available = virtio_transport_has_space(vsk); + space_available = virtio_transport_has_space(vvs); spin_unlock_bh(&vvs->tx_lock); return space_available; } From 0a98de80136968bab7db37b16282b37f044694d3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 21 Jan 2026 10:36:26 +0100 Subject: [PATCH 90/96] vsock/test: fix seqpacket message bounds test The test requires the sender (client) to send all messages before waking up the receiver (server). Since virtio-vsock had a bug and did not respect the size of the TX buffer, this test worked, but now that we are going to fix the bug, the test hangs because the sender would fill the TX buffer before waking up the receiver. Set the buffer size in the sender (client) as well, as we already do for the receiver (server). Fixes: 5c338112e48a ("test/vsock: rework message bounds test") Signed-off-by: Stefano Garzarella Link: https://patch.msgid.link/20260121093628.9941-3-sgarzare@redhat.com Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- tools/testing/vsock/vsock_test.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 27e39354499a..668fbe9eb3cc 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -351,6 +351,7 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { + unsigned long long sock_buf_size; unsigned long curr_hash; size_t max_msg_size; int page_size; @@ -363,6 +364,16 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) exit(EXIT_FAILURE); } + sock_buf_size = SOCK_BUF_SIZE; + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + /* Wait, until receiver sets buffer size. */ control_expectln("SRVREADY"); From 8ee784fdf006cbe8739cfa093f54d326cbf54037 Mon Sep 17 00:00:00 2001 From: Melbin K Mathew Date: Wed, 21 Jan 2026 10:36:27 +0100 Subject: [PATCH 91/96] vsock/virtio: cap TX credit to local buffer size The virtio transports derives its TX credit directly from peer_buf_alloc, which is set from the remote endpoint's SO_VM_SOCKETS_BUFFER_SIZE value. On the host side this means that the amount of data we are willing to queue for a connection is scaled by a guest-chosen buffer size, rather than the host's own vsock configuration. A malicious guest can advertise a large buffer and read slowly, causing the host to allocate a correspondingly large amount of sk_buff memory. The same thing would happen in the guest with a malicious host, since virtio transports share the same code base. Introduce a small helper, virtio_transport_tx_buf_size(), that returns min(peer_buf_alloc, buf_alloc), and use it wherever we consume peer_buf_alloc. This ensures the effective TX window is bounded by both the peer's advertised buffer and our own buf_alloc (already clamped to buffer_max_size via SO_VM_SOCKETS_BUFFER_MAX_SIZE), so a remote peer cannot force the other to queue more data than allowed by its own vsock settings. On an unpatched Ubuntu 22.04 host (~64 GiB RAM), running a PoC with 32 guest vsock connections advertising 2 GiB each and reading slowly drove Slab/SUnreclaim from ~0.5 GiB to ~57 GiB; the system only recovered after killing the QEMU process. That said, if QEMU memory is limited with cgroups, the maximum memory used will be limited. With this patch applied: Before: MemFree: ~61.6 GiB Slab: ~142 MiB SUnreclaim: ~117 MiB After 32 high-credit connections: MemFree: ~61.5 GiB Slab: ~178 MiB SUnreclaim: ~152 MiB Only ~35 MiB increase in Slab/SUnreclaim, no host OOM, and the guest remains responsive. Compatibility with non-virtio transports: - VMCI uses the AF_VSOCK buffer knobs to size its queue pairs per socket based on the local vsk->buffer_* values; the remote side cannot enlarge those queues beyond what the local endpoint configured. - Hyper-V's vsock transport uses fixed-size VMBus ring buffers and an MTU bound; there is no peer-controlled credit field comparable to peer_buf_alloc, and the remote endpoint cannot drive in-flight kernel memory above those ring sizes. - The loopback path reuses virtio_transport_common.c, so it naturally follows the same semantics as the virtio transport. This change is limited to virtio_transport_common.c and thus affects virtio-vsock, vhost-vsock, and loopback, bringing them in line with the "remote window intersected with local policy" behaviour that VMCI and Hyper-V already effectively have. Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Suggested-by: Stefano Garzarella Signed-off-by: Melbin K Mathew [Stefano: small adjustments after changing the previous patch] [Stefano: tweak the commit message] Signed-off-by: Stefano Garzarella Reviewed-by: Luigi Leonardi Link: https://patch.msgid.link/20260121093628.9941-4-sgarzare@redhat.com Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- net/vmw_vsock/virtio_transport_common.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 6175124d63d3..d3e26025ef58 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -821,6 +821,15 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, } EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); +static u32 virtio_transport_tx_buf_size(struct virtio_vsock_sock *vvs) +{ + /* The peer advertises its receive buffer via peer_buf_alloc, but we + * cap it to our local buf_alloc so a remote peer cannot force us to + * queue more data than our own buffer configuration allows. + */ + return min(vvs->peer_buf_alloc, vvs->buf_alloc); +} + int virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, struct msghdr *msg, @@ -830,7 +839,7 @@ virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, spin_lock_bh(&vvs->tx_lock); - if (len > vvs->peer_buf_alloc) { + if (len > virtio_transport_tx_buf_size(vvs)) { spin_unlock_bh(&vvs->tx_lock); return -EMSGSIZE; } @@ -884,7 +893,8 @@ static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs) * we have bytes in flight (tx_cnt - peer_fwd_cnt), the subtraction * does not underflow. */ - bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); + bytes = (s64)virtio_transport_tx_buf_size(vvs) - + (vvs->tx_cnt - vvs->peer_fwd_cnt); if (bytes < 0) bytes = 0; From 2a689f76edd04a53137bd01d4618343f4cdd7e23 Mon Sep 17 00:00:00 2001 From: Melbin K Mathew Date: Wed, 21 Jan 2026 10:36:28 +0100 Subject: [PATCH 92/96] vsock/test: add stream TX credit bounds test Add a regression test for the TX credit bounds fix. The test verifies that a sender with a small local buffer size cannot queue excessive data even when the peer advertises a large receive buffer. The client: - Sets a small buffer size (64 KiB) - Connects to server (which advertises 2 MiB buffer) - Sends in non-blocking mode until EAGAIN - Verifies total queued data is bounded This guards against the original vulnerability where a remote peer could cause unbounded kernel memory allocation by advertising a large buffer and reading slowly. Suggested-by: Stefano Garzarella Signed-off-by: Melbin K Mathew [Stefano: use sock_buf_size to check the bytes sent + small fixes] Signed-off-by: Stefano Garzarella Link: https://patch.msgid.link/20260121093628.9941-5-sgarzare@redhat.com Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Abeni --- tools/testing/vsock/vsock_test.c | 101 +++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 668fbe9eb3cc..5bd20ccd9335 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -347,6 +347,7 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) } #define SOCK_BUF_SIZE (2 * 1024 * 1024) +#define SOCK_BUF_SIZE_SMALL (64 * 1024) #define MAX_MSG_PAGES 4 static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) @@ -2230,6 +2231,101 @@ static void test_stream_accepted_setsockopt_server(const struct test_opts *opts) close(fd); } +static void test_stream_tx_credit_bounds_client(const struct test_opts *opts) +{ + unsigned long long sock_buf_size; + size_t total = 0; + char buf[4096]; + int fd; + + memset(buf, 'A', sizeof(buf)); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + sock_buf_size = SOCK_BUF_SIZE_SMALL; + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + + if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK) < 0) { + perror("fcntl(F_SETFL)"); + exit(EXIT_FAILURE); + } + + control_expectln("SRVREADY"); + + for (;;) { + ssize_t sent = send(fd, buf, sizeof(buf), 0); + + if (sent == 0) { + fprintf(stderr, "unexpected EOF while sending bytes\n"); + exit(EXIT_FAILURE); + } + + if (sent < 0) { + if (errno == EINTR) + continue; + + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + + perror("send"); + exit(EXIT_FAILURE); + } + + total += sent; + } + + control_writeln("CLIDONE"); + close(fd); + + /* We should not be able to send more bytes than the value set as + * local buffer size. + */ + if (total > sock_buf_size) { + fprintf(stderr, + "TX credit too large: queued %zu bytes (expected <= %llu)\n", + total, sock_buf_size); + exit(EXIT_FAILURE); + } +} + +static void test_stream_tx_credit_bounds_server(const struct test_opts *opts) +{ + unsigned long long sock_buf_size; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + sock_buf_size = SOCK_BUF_SIZE; + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + + control_writeln("SRVREADY"); + control_expectln("CLIDONE"); + + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -2419,6 +2515,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_msgzcopy_mangle_client, .run_server = test_stream_msgzcopy_mangle_server, }, + { + .name = "SOCK_STREAM TX credit bounds", + .run_client = test_stream_tx_credit_bounds_client, + .run_server = test_stream_tx_credit_bounds_server, + }, {}, }; From fb2bb2a1ebf7b9514c32b03bb5c3be5d518d437b Mon Sep 17 00:00:00 2001 From: Fan Gong Date: Thu, 22 Jan 2026 17:41:55 +0800 Subject: [PATCH 93/96] hinic3: Fix netif_queue_set_napi queue_index input parameter error Incorrectly transmitted interrupt number instead of queue number when using netif_queue_set_napi. Besides, move this to appropriate code location to set napi. Remove redundant netif_stop_subqueue beacuase it is not part of the hinic3_send_one_skb process. Fixes: 17fcb3dc12bb ("hinic3: module initialization and tx/rx logic") Co-developed-by: Zhu Yikai Signed-off-by: Zhu Yikai Signed-off-by: Fan Gong Link: https://patch.msgid.link/7b8e4eb5c53cbd873ee9aaefeb3d9dbbaff52deb.1769070766.git.zhuyikai1@h-partners.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/huawei/hinic3/hinic3_irq.c | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c index a69b361225e9..84bee5d6e638 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c @@ -43,21 +43,12 @@ static void qp_add_napi(struct hinic3_irq_cfg *irq_cfg) struct hinic3_nic_dev *nic_dev = netdev_priv(irq_cfg->netdev); netif_napi_add(nic_dev->netdev, &irq_cfg->napi, hinic3_poll); - netif_queue_set_napi(irq_cfg->netdev, irq_cfg->irq_id, - NETDEV_QUEUE_TYPE_RX, &irq_cfg->napi); - netif_queue_set_napi(irq_cfg->netdev, irq_cfg->irq_id, - NETDEV_QUEUE_TYPE_TX, &irq_cfg->napi); napi_enable(&irq_cfg->napi); } static void qp_del_napi(struct hinic3_irq_cfg *irq_cfg) { napi_disable(&irq_cfg->napi); - netif_queue_set_napi(irq_cfg->netdev, irq_cfg->irq_id, - NETDEV_QUEUE_TYPE_RX, NULL); - netif_queue_set_napi(irq_cfg->netdev, irq_cfg->irq_id, - NETDEV_QUEUE_TYPE_TX, NULL); - netif_stop_subqueue(irq_cfg->netdev, irq_cfg->irq_id); netif_napi_del(&irq_cfg->napi); } @@ -150,6 +141,11 @@ int hinic3_qps_irq_init(struct net_device *netdev) goto err_release_irqs; } + netif_queue_set_napi(irq_cfg->netdev, q_id, + NETDEV_QUEUE_TYPE_RX, &irq_cfg->napi); + netif_queue_set_napi(irq_cfg->netdev, q_id, + NETDEV_QUEUE_TYPE_TX, &irq_cfg->napi); + hinic3_set_msix_auto_mask_state(nic_dev->hwdev, irq_cfg->msix_entry_idx, HINIC3_SET_MSIX_AUTO_MASK); @@ -164,6 +160,10 @@ err_release_irqs: q_id--; irq_cfg = &nic_dev->q_params.irq_cfg[q_id]; qp_del_napi(irq_cfg); + netif_queue_set_napi(irq_cfg->netdev, q_id, + NETDEV_QUEUE_TYPE_RX, NULL); + netif_queue_set_napi(irq_cfg->netdev, q_id, + NETDEV_QUEUE_TYPE_TX, NULL); hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx, HINIC3_MSIX_DISABLE); hinic3_set_msix_auto_mask_state(nic_dev->hwdev, @@ -184,6 +184,10 @@ void hinic3_qps_irq_uninit(struct net_device *netdev) for (q_id = 0; q_id < nic_dev->q_params.num_qps; q_id++) { irq_cfg = &nic_dev->q_params.irq_cfg[q_id]; qp_del_napi(irq_cfg); + netif_queue_set_napi(irq_cfg->netdev, q_id, + NETDEV_QUEUE_TYPE_RX, NULL); + netif_queue_set_napi(irq_cfg->netdev, q_id, + NETDEV_QUEUE_TYPE_TX, NULL); hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx, HINIC3_MSIX_DISABLE); hinic3_set_msix_auto_mask_state(nic_dev->hwdev, From 27880b0b0d35ad1c98863d09788254e36f874968 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Jan 2026 13:37:24 +0000 Subject: [PATCH 94/96] net/sched: act_ife: avoid possible NULL deref tcf_ife_encode() must make sure ife_encode() does not return NULL. syzbot reported: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:ife_tlv_meta_encode+0x41/0xa0 net/ife/ife.c:166 CPU: 3 UID: 0 PID: 8990 Comm: syz.0.696 Not tainted syzkaller #0 PREEMPT(full) Call Trace: ife_encode_meta_u32+0x153/0x180 net/sched/act_ife.c:101 tcf_ife_encode net/sched/act_ife.c:841 [inline] tcf_ife_act+0x1022/0x1de0 net/sched/act_ife.c:877 tc_act include/net/tc_wrapper.h:130 [inline] tcf_action_exec+0x1c0/0xa20 net/sched/act_api.c:1152 tcf_exts_exec include/net/pkt_cls.h:349 [inline] mall_classify+0x1a0/0x2a0 net/sched/cls_matchall.c:42 tc_classify include/net/tc_wrapper.h:197 [inline] __tcf_classify net/sched/cls_api.c:1764 [inline] tcf_classify+0x7f2/0x1380 net/sched/cls_api.c:1860 multiq_classify net/sched/sch_multiq.c:39 [inline] multiq_enqueue+0xe0/0x510 net/sched/sch_multiq.c:66 dev_qdisc_enqueue+0x45/0x250 net/core/dev.c:4147 __dev_xmit_skb net/core/dev.c:4262 [inline] __dev_queue_xmit+0x2998/0x46c0 net/core/dev.c:4798 Fixes: 295a6e06d21e ("net/sched: act_ife: Change to use ife module") Reported-by: syzbot+5cf914f193dffde3bd3c@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6970d61d.050a0220.706b.0010.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Yotam Gigi Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20260121133724.3400020-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/act_ife.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 1dfdda6c2d4c..8e8f6af731d5 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -821,6 +821,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, /* could be stupid policy setup or mtu config * so lets be conservative.. */ if ((action == TC_ACT_SHOT) || exceed_mtu) { +drop: qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -829,6 +830,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, skb_push(skb, skb->dev->hard_header_len); ife_meta = ife_encode(skb, metalen); + if (!ife_meta) + goto drop; spin_lock(&ife->tcf_lock); @@ -844,8 +847,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, if (err < 0) { /* too corrupt to keep around if overwritten */ spin_unlock(&ife->tcf_lock); - qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); - return TC_ACT_SHOT; + goto drop; } skboff += err; } From f3ddbaaaaf4d0633b40482f471753f9c71294a4a Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 21 Jan 2026 14:00:11 +0100 Subject: [PATCH 95/96] dpll: Prevent duplicate registrations Modify the internal registration helpers dpll_xa_ref_{dpll,pin}_add() to reject duplicate registration attempts. Previously, if a caller attempted to register the same pin multiple times (with the same ops, priv, and cookie) on the same device, the core silently increments the reference count and return success. This behavior is incorrect because if the caller makes these duplicate registrations then for the first one dpll_pin_registration is allocated and for others the associated dpll_pin_ref.refcount is incremented. During the first unregistration the associated dpll_pin_registration is freed and for others WARN is fired. Fix this by updating the logic to return `-EEXIST` if a matching registration is found to enforce a strict "register once" policy. Fixes: 9431063ad323 ("dpll: core: Add DPLL framework base functions") Signed-off-by: Ivan Vecera Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20260121130012.112606-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_core.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index a461095efd8a..8879a7235156 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -83,10 +83,8 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, if (ref->pin != pin) continue; reg = dpll_pin_registration_find(ref, ops, priv, cookie); - if (reg) { - refcount_inc(&ref->refcount); - return 0; - } + if (reg) + return -EEXIST; ref_exists = true; break; } @@ -164,10 +162,8 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, if (ref->dpll != dpll) continue; reg = dpll_pin_registration_find(ref, ops, priv, cookie); - if (reg) { - refcount_inc(&ref->refcount); - return 0; - } + if (reg) + return -EEXIST; ref_exists = true; break; } From 4a3dba48188208e4f66822800e042686784d29d1 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Wed, 21 Jan 2026 15:18:19 +0530 Subject: [PATCH 96/96] Octeontx2-af: Add proper checks for fwdata firmware populates MAC address, link modes (supported, advertised) and EEPROM data in shared firmware structure which kernel access via MAC block(CGX/RPM). Accessing fwdata, on boards booted with out MAC block leading to kernel panics. Internal error: Oops: 0000000096000005 [#1] SMP [ 10.460721] Modules linked in: [ 10.463779] CPU: 0 UID: 0 PID: 174 Comm: kworker/0:3 Not tainted 6.19.0-rc5-00154-g76ec646abdf7-dirty #3 PREEMPT [ 10.474045] Hardware name: Marvell OcteonTX CN98XX board (DT) [ 10.479793] Workqueue: events work_for_cpu_fn [ 10.484159] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 10.491124] pc : rvu_sdp_init+0x18/0x114 [ 10.495051] lr : rvu_probe+0xe58/0x1d18 Fixes: 997814491cee ("Octeontx2-af: Fetch MAC channel info from firmware") Fixes: 5f21226b79fd ("Octeontx2-pf: ethtool: support multi advertise mode") Signed-off-by: Hariprasad Kelam Link: https://patch.msgid.link/20260121094819.2566786-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 3 +++ drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 3abd750a4bd7..3d91a34f8b57 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -1222,6 +1222,9 @@ int rvu_mbox_handler_cgx_set_link_mode(struct rvu *rvu, u8 cgx_idx, lmac; void *cgxd; + if (!rvu->fwdata) + return LMAC_AF_ERR_FIRMWARE_DATA_NOT_MAPPED; + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) return -EPERM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c index e4a5f9fa6fd4..bbfd8231aed5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c @@ -56,7 +56,7 @@ int rvu_sdp_init(struct rvu *rvu) struct rvu_pfvf *pfvf; u32 i = 0; - if (rvu->fwdata->channel_data.valid) { + if (rvu->fwdata && rvu->fwdata->channel_data.valid) { sdp_pf_num[0] = 0; pfvf = &rvu->pf[sdp_pf_num[0]]; pfvf->sdp_info = &rvu->fwdata->channel_data.info;