diff --git a/debian/bin/genpatch-pf b/debian/bin/genpatch-pf index 773bb5c..35aa909 100755 --- a/debian/bin/genpatch-pf +++ b/debian/bin/genpatch-pf @@ -7,7 +7,7 @@ w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w" dst='debian/patches/tmp-pf' src='../linux-extras' -branches='fixes archlinux cpuidle kbuild nfs smb xfs' +branches='fixes archlinux cpuidle exfat kbuild nfs smb xfs' if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi mkdir -p "${dst}" diff --git a/debian/changelog b/debian/changelog index a2d34a1..82e7a40 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +linux (6.15.4-1) sid; urgency=medium + + * New upstream stable update: + https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.15.4 + + -- Konstantin Demin Fri, 27 Jun 2025 14:05:47 +0300 + linux (6.15.3-1) sid; urgency=medium * New upstream stable update: diff --git a/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch index 5827b1d..afad3ab 100644 --- a/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch +++ b/debian/patches/features/all/security-perf-allow-further-restriction-of-perf_event_open.patch @@ -36,7 +36,7 @@ Signed-off-by: Ben Hutchings --- a/kernel/events/core.c +++ b/kernel/events/core.c -@@ -450,8 +450,13 @@ static struct kmem_cache *perf_event_cac +@@ -463,8 +463,13 @@ static struct kmem_cache *perf_event_cac * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv @@ -50,7 +50,7 @@ Signed-off-by: Ben Hutchings /* Minimum for 512 kiB + 1 user control page. 'free' kiB per user. */ static int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); -@@ -13110,6 +13115,9 @@ SYSCALL_DEFINE5(perf_event_open, +@@ -13144,6 +13149,9 @@ SYSCALL_DEFINE5(perf_event_open, if (err) return err; diff --git a/debian/patches/features/x86/intel-iommu-add-option-to-exclude-integrated-gpu-only.patch b/debian/patches/features/x86/intel-iommu-add-option-to-exclude-integrated-gpu-only.patch index 2347d08..358e307 100644 --- a/debian/patches/features/x86/intel-iommu-add-option-to-exclude-integrated-gpu-only.patch +++ b/debian/patches/features/x86/intel-iommu-add-option-to-exclude-integrated-gpu-only.patch @@ -68,7 +68,7 @@ Signed-off-by: Ben Hutchings } else if (!strncmp(str, "forcedac", 8)) { pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n"); iommu_dma_forcedac = true; -@@ -1935,6 +1943,9 @@ static int device_def_domain_type(struct +@@ -1936,6 +1944,9 @@ static int device_def_domain_type(struct if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) return IOMMU_DOMAIN_IDENTITY; @@ -78,7 +78,7 @@ Signed-off-by: Ben Hutchings } return 0; -@@ -2229,6 +2240,9 @@ static int __init init_dmars(void) +@@ -2230,6 +2241,9 @@ static int __init init_dmars(void) iommu_set_root_entry(iommu); } diff --git a/debian/patches/misc-openwrt/0002-mac80211-avoid-crashing-missing-band.patch b/debian/patches/misc-openwrt/0002-mac80211-avoid-crashing-missing-band.patch index e85207c..91599de 100644 --- a/debian/patches/misc-openwrt/0002-mac80211-avoid-crashing-missing-band.patch +++ b/debian/patches/misc-openwrt/0002-mac80211-avoid-crashing-missing-band.patch @@ -18,7 +18,7 @@ Signed-off-by: David Bauer --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c -@@ -2474,6 +2474,13 @@ static void sta_stats_decode_rate(struct +@@ -2467,6 +2467,13 @@ static void sta_stats_decode_rate(struct sband = local->hw.wiphy->bands[band]; diff --git a/debian/patches/misc-openwrt/0003-mac80211-sta-randomize-BA-session-dialog-token-alloc.patch b/debian/patches/misc-openwrt/0003-mac80211-sta-randomize-BA-session-dialog-token-alloc.patch index 00a3bb3..3588f00 100644 --- a/debian/patches/misc-openwrt/0003-mac80211-sta-randomize-BA-session-dialog-token-alloc.patch +++ b/debian/patches/misc-openwrt/0003-mac80211-sta-randomize-BA-session-dialog-token-alloc.patch @@ -28,7 +28,7 @@ Signed-off-by: Johannes Berg --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c -@@ -583,6 +583,7 @@ __sta_info_alloc(struct ieee80211_sub_if +@@ -582,6 +582,7 @@ __sta_info_alloc(struct ieee80211_sub_if spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); wiphy_work_init(&sta->ampdu_mlme.work, ieee80211_ba_session_work); diff --git a/debian/patches/misc-openwrt/0007-mac80211-increase-quantum-for-airtime-scheduler.patch b/debian/patches/misc-openwrt/0007-mac80211-increase-quantum-for-airtime-scheduler.patch index 8b82d2f..74b1229 100644 --- a/debian/patches/misc-openwrt/0007-mac80211-increase-quantum-for-airtime-scheduler.patch +++ b/debian/patches/misc-openwrt/0007-mac80211-increase-quantum-for-airtime-scheduler.patch @@ -23,7 +23,7 @@ Signed-off-by: Felix Fietkau --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c -@@ -4084,7 +4084,7 @@ struct ieee80211_txq *ieee80211_next_txq +@@ -4077,7 +4077,7 @@ struct ieee80211_txq *ieee80211_next_txq if (deficit < 0) sta->airtime[txqi->txq.ac].deficit += @@ -32,7 +32,7 @@ Signed-off-by: Felix Fietkau if (deficit < 0 || !aql_check) { list_move_tail(&txqi->schedule_order, -@@ -4227,7 +4227,8 @@ bool ieee80211_txq_may_transmit(struct i +@@ -4220,7 +4220,8 @@ bool ieee80211_txq_may_transmit(struct i } sta = container_of(iter->txq.sta, struct sta_info, sta); if (ieee80211_sta_deficit(sta, ac) < 0) @@ -42,7 +42,7 @@ Signed-off-by: Felix Fietkau list_move_tail(&iter->schedule_order, &local->active_txqs[ac]); } -@@ -4235,7 +4236,7 @@ bool ieee80211_txq_may_transmit(struct i +@@ -4228,7 +4229,7 @@ bool ieee80211_txq_may_transmit(struct i if (sta->airtime[ac].deficit >= 0) goto out; diff --git a/debian/patches/misc-openwrt/0008-mac80211-add-AQL-support-for-broadcast-packets.patch b/debian/patches/misc-openwrt/0008-mac80211-add-AQL-support-for-broadcast-packets.patch index 5d01b34..fabde38 100644 --- a/debian/patches/misc-openwrt/0008-mac80211-add-AQL-support-for-broadcast-packets.patch +++ b/debian/patches/misc-openwrt/0008-mac80211-add-AQL-support-for-broadcast-packets.patch @@ -95,7 +95,7 @@ Signed-off-by: Felix Fietkau spin_lock_init(&local->active_txq_lock[i]); --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c -@@ -2388,13 +2388,28 @@ EXPORT_SYMBOL(ieee80211_sta_recalc_aggre +@@ -2381,13 +2381,28 @@ EXPORT_SYMBOL(ieee80211_sta_recalc_aggre void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, struct sta_info *sta, u8 ac, @@ -127,7 +127,7 @@ Signed-off-by: Felix Fietkau atomic_add(tx_airtime, --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c -@@ -2556,7 +2556,7 @@ static u16 ieee80211_store_ack_skb(struc +@@ -2549,7 +2549,7 @@ static u16 ieee80211_store_ack_skb(struc spin_lock_irqsave(&local->ack_status_lock, flags); id = idr_alloc(&local->ack_status_frames, ack_skb, @@ -136,7 +136,7 @@ Signed-off-by: Felix Fietkau spin_unlock_irqrestore(&local->ack_status_lock, flags); if (id >= 0) { -@@ -3985,20 +3985,20 @@ begin: +@@ -3978,20 +3978,20 @@ begin: encap_out: info->control.vif = vif; @@ -167,7 +167,7 @@ Signed-off-by: Felix Fietkau } return skb; -@@ -4050,6 +4050,7 @@ struct ieee80211_txq *ieee80211_next_txq +@@ -4043,6 +4043,7 @@ struct ieee80211_txq *ieee80211_next_txq struct ieee80211_txq *ret = NULL; struct txq_info *txqi = NULL, *head = NULL; bool found_eligible_txq = false; @@ -175,7 +175,7 @@ Signed-off-by: Felix Fietkau spin_lock_bh(&local->active_txq_lock[ac]); -@@ -4073,26 +4074,26 @@ struct ieee80211_txq *ieee80211_next_txq +@@ -4066,26 +4067,26 @@ struct ieee80211_txq *ieee80211_next_txq if (!head) head = txqi; @@ -214,7 +214,7 @@ Signed-off-by: Felix Fietkau if (txqi->schedule_round == local->schedule_round[ac]) goto out; -@@ -4157,7 +4158,8 @@ bool ieee80211_txq_airtime_check(struct +@@ -4150,7 +4151,8 @@ bool ieee80211_txq_airtime_check(struct return true; if (!txq->sta) @@ -224,7 +224,7 @@ Signed-off-by: Felix Fietkau if (unlikely(txq->tid == IEEE80211_NUM_TIDS)) return true; -@@ -4206,15 +4208,15 @@ bool ieee80211_txq_may_transmit(struct i +@@ -4199,15 +4201,15 @@ bool ieee80211_txq_may_transmit(struct i spin_lock_bh(&local->active_txq_lock[ac]); diff --git a/debian/patches/misc-openwrt/0009-mac80211-revert-dynamically-set-codel-parameters-per-station.patch b/debian/patches/misc-openwrt/0009-mac80211-revert-dynamically-set-codel-parameters-per-station.patch deleted file mode 100644 index 1f76d3a..0000000 --- a/debian/patches/misc-openwrt/0009-mac80211-revert-dynamically-set-codel-parameters-per-station.patch +++ /dev/null @@ -1,221 +0,0 @@ -This reverts commit 484a54c2e597dbc4ace79c1687022282905afba0. The CoDel -parameter change essentially disables CoDel on slow stations, with some -questionable assumptions, as Dave pointed out in [0]. Quoting from -there: - - But here are my pithy comments as to why this part of mac80211 is so - wrong... - - static void sta_update_codel_params(struct sta_info *sta, u32 thr) - { - - if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { - - 1) sta->local->num_sta is the number of associated, rather than - active, stations. "Active" stations in the last 50ms or so, might have - been a better thing to use, but as most people have far more than that - associated, we end up with really lousy codel parameters, all the - time. Mistake numero uno! - - 2) The STA_SLOW_THRESHOLD was completely arbitrary in 2016. - - - sta->cparams.target = MS2TIME(50); - - This, by itself, was probably not too bad. 30ms might have been - better, at the time, when we were battling powersave etc, but 20ms was - enough, really, to cover most scenarios, even where we had low rate - 2Ghz multicast to cope with. Even then, codel has a hard time finding - any sane drop rate at all, with a target this high. - - - sta->cparams.interval = MS2TIME(300); - - But this was horrible, a total mistake, that is leading to codel being - completely ineffective in almost any scenario on clients or APS. - 100ms, even 80ms, here, would be vastly better than this insanity. I'm - seeing 5+seconds of delay accumulated in a bunch of otherwise happily - fq-ing APs.... - - 100ms of observed jitter during a flow is enough. Certainly (in 2016) - there were interactions with powersave that I did not understand, and - still don't, but if you are transmitting in the first place, powersave - shouldn't be a problemmmm..... - - - sta->cparams.ecn = false; - - At the time we were pretty nervous about ecn, I'm kind of sanguine - about it now, and reliably indicating ecn seems better than turning it - off for any reason. - - [...] - - In production, on p2p wireless, I've had 8ms and 80ms for target and - interval for years now, and it works great. - -I think Dave's arguments above are basically sound on the face of it, -and various experimentation with tighter CoDel parameters in the OpenWrt -community have show promising results[1]. So I don't think there's any -reason to keep this parameter fiddling; hence this revert. - -[0] https://lore.kernel.org/linux-wireless/CAA93jw6NJ2cmLmMauz0xAgC2MGbBq6n0ZiZzAdkK0u4b+O2yXg@mail.gmail.com/ -[1] https://forum.openwrt.org/t/reducing-multiplexing-latencies-still-further-in-wifi/133605/130 - -Suggested-By: Dave Taht -In-memory-of: Dave Taht -Signed-off-by: Toke Høiland-Jørgensen - ---- a/include/net/mac80211.h -+++ b/include/net/mac80211.h -@@ -5347,22 +5347,6 @@ void ieee80211_get_tx_rates(struct ieee8 - int max_rates); - - /** -- * ieee80211_sta_set_expected_throughput - set the expected tpt for a station -- * -- * Call this function to notify mac80211 about a change in expected throughput -- * to a station. A driver for a device that does rate control in firmware can -- * call this function when the expected throughput estimate towards a station -- * changes. The information is used to tune the CoDel AQM applied to traffic -- * going towards that station (which can otherwise be too aggressive and cause -- * slow stations to starve). -- * -- * @pubsta: the station to set throughput for. -- * @thr: the current expected throughput in kbps. -- */ --void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, -- u32 thr); -- --/** - * ieee80211_tx_rate_update - transmit rate update callback - * - * Drivers should call this functions with a non-NULL pub sta ---- a/net/mac80211/debugfs_sta.c -+++ b/net/mac80211/debugfs_sta.c -@@ -152,12 +152,6 @@ static ssize_t sta_aqm_read(struct file - - p += scnprintf(p, - bufsz + buf - p, -- "target %uus interval %uus ecn %s\n", -- codel_time_to_us(sta->cparams.target), -- codel_time_to_us(sta->cparams.interval), -- sta->cparams.ecn ? "yes" : "no"); -- p += scnprintf(p, -- bufsz + buf - p, - "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n"); - - for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { ---- a/net/mac80211/rate.c -+++ b/net/mac80211/rate.c -@@ -990,8 +990,6 @@ int rate_control_set_rates(struct ieee80 - if (sta->uploaded) - drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); - -- ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta)); -- - return 0; - } - EXPORT_SYMBOL(rate_control_set_rates); ---- a/net/mac80211/sta_info.c -+++ b/net/mac80211/sta_info.c -@@ -18,7 +18,6 @@ - #include - #include - --#include - #include - #include "ieee80211_i.h" - #include "driver-ops.h" -@@ -702,13 +701,6 @@ __sta_info_alloc(struct ieee80211_sub_if - } - } - -- sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; -- sta->cparams.target = MS2TIME(20); -- sta->cparams.interval = MS2TIME(100); -- sta->cparams.ecn = true; -- sta->cparams.ce_threshold_selector = 0; -- sta->cparams.ce_threshold_mask = 0; -- - sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); - - return sta; -@@ -2928,27 +2920,6 @@ unsigned long ieee80211_sta_last_active( - return sta->deflink.status_stats.last_ack; - } - --static void sta_update_codel_params(struct sta_info *sta, u32 thr) --{ -- if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { -- sta->cparams.target = MS2TIME(50); -- sta->cparams.interval = MS2TIME(300); -- sta->cparams.ecn = false; -- } else { -- sta->cparams.target = MS2TIME(20); -- sta->cparams.interval = MS2TIME(100); -- sta->cparams.ecn = true; -- } --} -- --void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, -- u32 thr) --{ -- struct sta_info *sta = container_of(pubsta, struct sta_info, sta); -- -- sta_update_codel_params(sta, thr); --} -- - int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) - { - struct ieee80211_sub_if_data *sdata = sta->sdata; ---- a/net/mac80211/sta_info.h -+++ b/net/mac80211/sta_info.h -@@ -467,14 +467,6 @@ struct ieee80211_fragment_cache { - unsigned int next; - }; - --/* -- * The bandwidth threshold below which the per-station CoDel parameters will be -- * scaled to be more lenient (to prevent starvation of slow stations). This -- * value will be scaled by the number of active stations when it is being -- * applied. -- */ --#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */ -- - /** - * struct link_sta_info - Link STA information - * All link specific sta info are stored here for reference. This can be -@@ -627,7 +619,6 @@ struct link_sta_info { - * @sta: station information we share with the driver - * @sta_state: duplicates information about station state (for debug) - * @rcu_head: RCU head used for freeing this station struct -- * @cparams: CoDel parameters for this station. - * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) - * @amsdu_mesh_control: track the mesh A-MSDU format used by the peer: - * -@@ -718,8 +709,6 @@ struct sta_info { - struct dentry *debugfs_dir; - #endif - -- struct codel_params cparams; -- - u8 reserved_tid; - s8 amsdu_mesh_control; - ---- a/net/mac80211/tx.c -+++ b/net/mac80211/tx.c -@@ -1402,16 +1402,9 @@ static struct sk_buff *fq_tin_dequeue_fu - - local = container_of(fq, struct ieee80211_local, fq); - txqi = container_of(tin, struct txq_info, tin); -+ cparams = &local->cparams; - cstats = &txqi->cstats; - -- if (txqi->txq.sta) { -- struct sta_info *sta = container_of(txqi->txq.sta, -- struct sta_info, sta); -- cparams = &sta->cparams; -- } else { -- cparams = &local->cparams; -- } -- - if (flow == &tin->default_flow) - cvars = &txqi->def_cvars; - else diff --git a/debian/patches/misc-openwrt/0010-mac80211-txq-tune.patch b/debian/patches/misc-openwrt/0009-mac80211-txq-tune.patch similarity index 100% rename from debian/patches/misc-openwrt/0010-mac80211-txq-tune.patch rename to debian/patches/misc-openwrt/0009-mac80211-txq-tune.patch diff --git a/debian/patches/misc-openwrt/0011-cfg80211-aql-txq-limit.patch b/debian/patches/misc-openwrt/0010-cfg80211-aql-txq-limit.patch similarity index 100% rename from debian/patches/misc-openwrt/0011-cfg80211-aql-txq-limit.patch rename to debian/patches/misc-openwrt/0010-cfg80211-aql-txq-limit.patch diff --git a/debian/patches/patchset-pf/fixes/0010-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch b/debian/patches/patchset-pf/fixes/0001-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch similarity index 94% rename from debian/patches/patchset-pf/fixes/0010-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch rename to debian/patches/patchset-pf/fixes/0001-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch index ebb46b8..1f1f90a 100644 --- a/debian/patches/patchset-pf/fixes/0010-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch +++ b/debian/patches/patchset-pf/fixes/0001-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch @@ -1,4 +1,4 @@ -From 9c2fdcdf9d8963a6fa30005a859816639d0bbf95 Mon Sep 17 00:00:00 2001 +From b3dc27f64b5d62505ae9f03a6c342a43b0b7e0b2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:54 -0600 Subject: Revert "Disable FOP_DONTCACHE for now due to bugs" diff --git a/debian/patches/patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch b/debian/patches/patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch deleted file mode 100644 index b32f7c2..0000000 --- a/debian/patches/patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 1616d0edbdf3b36a8f4694d35bcf88fa1242c7e8 Mon Sep 17 00:00:00 2001 -From: Jinliang Zheng -Date: Tue, 15 Apr 2025 17:02:32 +0800 -Subject: mm: fix ratelimit_pages update error in dirty_ratio_handler() - -In dirty_ratio_handler(), vm_dirty_bytes must be set to zero before -calling writeback_set_ratelimit(), as global_dirty_limits() always -prioritizes the value of vm_dirty_bytes. - -It's domain_dirty_limits() that's relevant here, not node_dirty_ok: - - dirty_ratio_handler - writeback_set_ratelimit - global_dirty_limits(&dirty_thresh) <- ratelimit_pages based on dirty_thresh - domain_dirty_limits - if (bytes) <- bytes = vm_dirty_bytes <--------+ - thresh = f1(bytes) <- prioritizes vm_dirty_bytes | - else | - thresh = f2(ratio) | - ratelimit_pages = f3(dirty_thresh) | - vm_dirty_bytes = 0 <- it's late! ---------------------+ - -This causes ratelimit_pages to still use the value calculated based on -vm_dirty_bytes, which is wrong now. - - -The impact visible to userspace is difficult to capture directly because -there is no procfs/sysfs interface exported to user space. However, it -will have a real impact on the balance of dirty pages. - -For example: - -1. On default, we have vm_dirty_ratio=40, vm_dirty_bytes=0 - -2. echo 8192 > dirty_bytes, then vm_dirty_bytes=8192, - vm_dirty_ratio=0, and ratelimit_pages is calculated based on - vm_dirty_bytes now. - -3. echo 20 > dirty_ratio, then since vm_dirty_bytes is not reset to - zero when writeback_set_ratelimit() -> global_dirty_limits() -> - domain_dirty_limits() is called, reallimit_pages is still calculated - based on vm_dirty_bytes instead of vm_dirty_ratio. This does not - conform to the actual intent of the user. - -Link: https://lkml.kernel.org/r/20250415090232.7544-1-alexjlzheng@tencent.com -Fixes: 9d823e8f6b1b ("writeback: per task dirty rate limit") -Signed-off-by: Jinliang Zheng -Reviewed-by: MengEn Sun -Cc: Andrea Righi -Cc: Fenggaung Wu -Cc: Jinliang Zheng -Cc: Matthew Wilcox (Oracle) -Cc: -Signed-off-by: Andrew Morton ---- - mm/page-writeback.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/mm/page-writeback.c -+++ b/mm/page-writeback.c -@@ -520,8 +520,8 @@ static int dirty_ratio_handler(const str - - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (ret == 0 && write && vm_dirty_ratio != old_ratio) { -- writeback_set_ratelimit(); - vm_dirty_bytes = 0; -+ writeback_set_ratelimit(); - } - return ret; - } diff --git a/debian/patches/patchset-pf/fixes/0011-mm-filemap-unify-read-write-dropbehind-naming.patch b/debian/patches/patchset-pf/fixes/0002-mm-filemap-unify-read-write-dropbehind-naming.patch similarity index 95% rename from debian/patches/patchset-pf/fixes/0011-mm-filemap-unify-read-write-dropbehind-naming.patch rename to debian/patches/patchset-pf/fixes/0002-mm-filemap-unify-read-write-dropbehind-naming.patch index 74bad54..8d56cf6 100644 --- a/debian/patches/patchset-pf/fixes/0011-mm-filemap-unify-read-write-dropbehind-naming.patch +++ b/debian/patches/patchset-pf/fixes/0002-mm-filemap-unify-read-write-dropbehind-naming.patch @@ -1,4 +1,4 @@ -From 0274339dc053815d099e9c336f11c1e9e5641792 Mon Sep 17 00:00:00 2001 +From 0b8d9b7ae677a03629218f69037be3f342c5ee81 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:55 -0600 Subject: mm/filemap: unify read/write dropbehind naming diff --git a/debian/patches/patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch b/debian/patches/patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch deleted file mode 100644 index 327c670..0000000 --- a/debian/patches/patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 87f7435508fde20e21c6b744723a3203e2045f46 Mon Sep 17 00:00:00 2001 -From: GONG Ruiqi -Date: Sun, 27 Apr 2025 10:53:03 +0800 -Subject: vgacon: Add check for vc_origin address range in vgacon_scroll() - -Our in-house Syzkaller reported the following BUG (twice), which we -believed was the same issue with [1]: - -================================================================== -BUG: KASAN: slab-out-of-bounds in vcs_scr_readw+0xc2/0xd0 drivers/tty/vt/vt.c:4740 -Read of size 2 at addr ffff88800f5bef60 by task syz.7.2620/12393 -... -Call Trace: - - __dump_stack lib/dump_stack.c:88 [inline] - dump_stack_lvl+0x72/0xa0 lib/dump_stack.c:106 - print_address_description.constprop.0+0x6b/0x3d0 mm/kasan/report.c:364 - print_report+0xba/0x280 mm/kasan/report.c:475 - kasan_report+0xa9/0xe0 mm/kasan/report.c:588 - vcs_scr_readw+0xc2/0xd0 drivers/tty/vt/vt.c:4740 - vcs_write_buf_noattr drivers/tty/vt/vc_screen.c:493 [inline] - vcs_write+0x586/0x840 drivers/tty/vt/vc_screen.c:690 - vfs_write+0x219/0x960 fs/read_write.c:584 - ksys_write+0x12e/0x260 fs/read_write.c:639 - do_syscall_x64 arch/x86/entry/common.c:51 [inline] - do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81 - entry_SYSCALL_64_after_hwframe+0x78/0xe2 - ... - - -Allocated by task 5614: - kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 - kasan_set_track+0x25/0x30 mm/kasan/common.c:52 - ____kasan_kmalloc mm/kasan/common.c:374 [inline] - __kasan_kmalloc+0x8f/0xa0 mm/kasan/common.c:383 - kasan_kmalloc include/linux/kasan.h:201 [inline] - __do_kmalloc_node mm/slab_common.c:1007 [inline] - __kmalloc+0x62/0x140 mm/slab_common.c:1020 - kmalloc include/linux/slab.h:604 [inline] - kzalloc include/linux/slab.h:721 [inline] - vc_do_resize+0x235/0xf40 drivers/tty/vt/vt.c:1193 - vgacon_adjust_height+0x2d4/0x350 drivers/video/console/vgacon.c:1007 - vgacon_font_set+0x1f7/0x240 drivers/video/console/vgacon.c:1031 - con_font_set drivers/tty/vt/vt.c:4628 [inline] - con_font_op+0x4da/0xa20 drivers/tty/vt/vt.c:4675 - vt_k_ioctl+0xa10/0xb30 drivers/tty/vt/vt_ioctl.c:474 - vt_ioctl+0x14c/0x1870 drivers/tty/vt/vt_ioctl.c:752 - tty_ioctl+0x655/0x1510 drivers/tty/tty_io.c:2779 - vfs_ioctl fs/ioctl.c:51 [inline] - __do_sys_ioctl fs/ioctl.c:871 [inline] - __se_sys_ioctl+0x12d/0x190 fs/ioctl.c:857 - do_syscall_x64 arch/x86/entry/common.c:51 [inline] - do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81 - entry_SYSCALL_64_after_hwframe+0x78/0xe2 - -Last potentially related work creation: - kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 - __kasan_record_aux_stack+0x94/0xa0 mm/kasan/generic.c:492 - __call_rcu_common.constprop.0+0xc3/0xa10 kernel/rcu/tree.c:2713 - netlink_release+0x620/0xc20 net/netlink/af_netlink.c:802 - __sock_release+0xb5/0x270 net/socket.c:663 - sock_close+0x1e/0x30 net/socket.c:1425 - __fput+0x408/0xab0 fs/file_table.c:384 - __fput_sync+0x4c/0x60 fs/file_table.c:465 - __do_sys_close fs/open.c:1580 [inline] - __se_sys_close+0x68/0xd0 fs/open.c:1565 - do_syscall_x64 arch/x86/entry/common.c:51 [inline] - do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81 - entry_SYSCALL_64_after_hwframe+0x78/0xe2 - -Second to last potentially related work creation: - kasan_save_stack+0x20/0x40 mm/kasan/common.c:45 - __kasan_record_aux_stack+0x94/0xa0 mm/kasan/generic.c:492 - __call_rcu_common.constprop.0+0xc3/0xa10 kernel/rcu/tree.c:2713 - netlink_release+0x620/0xc20 net/netlink/af_netlink.c:802 - __sock_release+0xb5/0x270 net/socket.c:663 - sock_close+0x1e/0x30 net/socket.c:1425 - __fput+0x408/0xab0 fs/file_table.c:384 - task_work_run+0x154/0x240 kernel/task_work.c:239 - exit_task_work include/linux/task_work.h:45 [inline] - do_exit+0x8e5/0x1320 kernel/exit.c:874 - do_group_exit+0xcd/0x280 kernel/exit.c:1023 - get_signal+0x1675/0x1850 kernel/signal.c:2905 - arch_do_signal_or_restart+0x80/0x3b0 arch/x86/kernel/signal.c:310 - exit_to_user_mode_loop kernel/entry/common.c:111 [inline] - exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline] - __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] - syscall_exit_to_user_mode+0x1b3/0x1e0 kernel/entry/common.c:218 - do_syscall_64+0x66/0x110 arch/x86/entry/common.c:87 - entry_SYSCALL_64_after_hwframe+0x78/0xe2 - -The buggy address belongs to the object at ffff88800f5be000 - which belongs to the cache kmalloc-2k of size 2048 -The buggy address is located 2656 bytes to the right of - allocated 1280-byte region [ffff88800f5be000, ffff88800f5be500) - -... - -Memory state around the buggy address: - ffff88800f5bee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc - ffff88800f5bee80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ->ffff88800f5bef00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc - ^ - ffff88800f5bef80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc - ffff88800f5bf000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -================================================================== - -By analyzing the vmcore, we found that vc->vc_origin was somehow placed -one line prior to vc->vc_screenbuf when vc was in KD_TEXT mode, and -further writings to /dev/vcs caused out-of-bounds reads (and writes -right after) in vcs_write_buf_noattr(). - -Our further experiments show that in most cases, vc->vc_origin equals to -vga_vram_base when the console is in KD_TEXT mode, and it's around -vc->vc_screenbuf for the KD_GRAPHICS mode. But via triggerring a -TIOCL_SETVESABLANK ioctl beforehand, we can make vc->vc_origin be around -vc->vc_screenbuf while the console is in KD_TEXT mode, and then by -writing the special 'ESC M' control sequence to the tty certain times -(depends on the value of `vc->state.y - vc->vc_top`), we can eventually -move vc->vc_origin prior to vc->vc_screenbuf. Here's the PoC, tested on -QEMU: - -``` -int main() { - const int RI_NUM = 10; // should be greater than `vc->state.y - vc->vc_top` - int tty_fd, vcs_fd; - const char *tty_path = "/dev/tty0"; - const char *vcs_path = "/dev/vcs"; - const char escape_seq[] = "\x1bM"; // ESC + M - const char trigger_seq[] = "Let's trigger an OOB write."; - struct vt_sizes vt_size = { 70, 2 }; - int blank = TIOCL_BLANKSCREEN; - - tty_fd = open(tty_path, O_RDWR); - - char vesa_mode[] = { TIOCL_SETVESABLANK, 1 }; - ioctl(tty_fd, TIOCLINUX, vesa_mode); - - ioctl(tty_fd, TIOCLINUX, &blank); - ioctl(tty_fd, VT_RESIZE, &vt_size); - - for (int i = 0; i < RI_NUM; ++i) - write(tty_fd, escape_seq, sizeof(escape_seq) - 1); - - vcs_fd = open(vcs_path, O_RDWR); - write(vcs_fd, trigger_seq, sizeof(trigger_seq)); - - close(vcs_fd); - close(tty_fd); - return 0; -} -``` - -To solve this problem, add an address range validation check in -vgacon_scroll(), ensuring vc->vc_origin never precedes vc_screenbuf. - -Reported-by: syzbot+9c09fda97a1a65ea859b@syzkaller.appspotmail.com -Closes: https://syzkaller.appspot.com/bug?extid=9c09fda97a1a65ea859b [1] -Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") -Cc: stable@vger.kernel.org -Co-developed-by: Yi Yang -Signed-off-by: Yi Yang -Signed-off-by: GONG Ruiqi -Signed-off-by: Helge Deller ---- - drivers/video/console/vgacon.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/video/console/vgacon.c -+++ b/drivers/video/console/vgacon.c -@@ -1168,7 +1168,7 @@ static bool vgacon_scroll(struct vc_data - c->vc_screenbuf_size - delta); - c->vc_origin = vga_vram_end - c->vc_screenbuf_size; - vga_rolled_over = 0; -- } else -+ } else if (oldo - delta >= (unsigned long)c->vc_screenbuf) - c->vc_origin -= delta; - c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; - scr_memsetw((u16 *) (c->vc_origin), c->vc_video_erase_char, diff --git a/debian/patches/patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch b/debian/patches/patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch deleted file mode 100644 index 28fa6e9..0000000 --- a/debian/patches/patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 4aed4d2a911e165342a339c886101dbe3acad5e2 Mon Sep 17 00:00:00 2001 -From: Murad Masimov -Date: Mon, 28 Apr 2025 18:34:06 +0300 -Subject: fbdev: Fix do_register_framebuffer to prevent null-ptr-deref in - fb_videomode_to_var - -If fb_add_videomode() in do_register_framebuffer() fails to allocate -memory for fb_videomode, it will later lead to a null-ptr dereference in -fb_videomode_to_var(), as the fb_info is registered while not having the -mode in modelist that is expected to be there, i.e. the one that is -described in fb_info->var. - -================================================================ -general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI -KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] -CPU: 1 PID: 30371 Comm: syz-executor.1 Not tainted 5.10.226-syzkaller #0 -Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 -RIP: 0010:fb_videomode_to_var+0x24/0x610 drivers/video/fbdev/core/modedb.c:901 -Call Trace: - display_to_var+0x3a/0x7c0 drivers/video/fbdev/core/fbcon.c:929 - fbcon_resize+0x3e2/0x8f0 drivers/video/fbdev/core/fbcon.c:2071 - resize_screen drivers/tty/vt/vt.c:1176 [inline] - vc_do_resize+0x53a/0x1170 drivers/tty/vt/vt.c:1263 - fbcon_modechanged+0x3ac/0x6e0 drivers/video/fbdev/core/fbcon.c:2720 - fbcon_update_vcs+0x43/0x60 drivers/video/fbdev/core/fbcon.c:2776 - do_fb_ioctl+0x6d2/0x740 drivers/video/fbdev/core/fbmem.c:1128 - fb_ioctl+0xe7/0x150 drivers/video/fbdev/core/fbmem.c:1203 - vfs_ioctl fs/ioctl.c:48 [inline] - __do_sys_ioctl fs/ioctl.c:753 [inline] - __se_sys_ioctl fs/ioctl.c:739 [inline] - __x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:739 - do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 - entry_SYSCALL_64_after_hwframe+0x67/0xd1 -================================================================ - -Even though fbcon_init() checks beforehand if fb_match_mode() in -var_to_display() fails, it can not prevent the panic because fbcon_init() -does not return error code. Considering this and the comment in the code -about fb_match_mode() returning NULL - "This should not happen" - it is -better to prevent registering the fb_info if its mode was not set -successfully. Also move fb_add_videomode() closer to the beginning of -do_register_framebuffer() to avoid having to do the cleanup on fail. - -Found by Linux Verification Center (linuxtesting.org) with Syzkaller. - -Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") -Cc: stable@vger.kernel.org -Signed-off-by: Murad Masimov -Signed-off-by: Helge Deller ---- - drivers/video/fbdev/core/fbmem.c | 18 +++++++++++------- - 1 file changed, 11 insertions(+), 7 deletions(-) - ---- a/drivers/video/fbdev/core/fbmem.c -+++ b/drivers/video/fbdev/core/fbmem.c -@@ -388,7 +388,7 @@ static int fb_check_foreignness(struct f - - static int do_register_framebuffer(struct fb_info *fb_info) - { -- int i; -+ int i, err = 0; - struct fb_videomode mode; - - if (fb_check_foreignness(fb_info)) -@@ -397,10 +397,18 @@ static int do_register_framebuffer(struc - if (num_registered_fb == FB_MAX) - return -ENXIO; - -- num_registered_fb++; - for (i = 0 ; i < FB_MAX; i++) - if (!registered_fb[i]) - break; -+ -+ if (!fb_info->modelist.prev || !fb_info->modelist.next) -+ INIT_LIST_HEAD(&fb_info->modelist); -+ -+ fb_var_to_videomode(&mode, &fb_info->var); -+ err = fb_add_videomode(&mode, &fb_info->modelist); -+ if (err < 0) -+ return err; -+ - fb_info->node = i; - refcount_set(&fb_info->count, 1); - mutex_init(&fb_info->lock); -@@ -426,16 +434,12 @@ static int do_register_framebuffer(struc - if (bitmap_empty(fb_info->pixmap.blit_y, FB_MAX_BLIT_HEIGHT)) - bitmap_fill(fb_info->pixmap.blit_y, FB_MAX_BLIT_HEIGHT); - -- if (!fb_info->modelist.prev || !fb_info->modelist.next) -- INIT_LIST_HEAD(&fb_info->modelist); -- - if (fb_info->skip_vt_switch) - pm_vt_switch_required(fb_info->device, false); - else - pm_vt_switch_required(fb_info->device, true); - -- fb_var_to_videomode(&mode, &fb_info->var); -- fb_add_videomode(&mode, &fb_info->modelist); -+ num_registered_fb++; - registered_fb[i] = fb_info; - - #ifdef CONFIG_GUMSTIX_AM200EPD diff --git a/debian/patches/patchset-pf/fixes/0012-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch b/debian/patches/patchset-pf/fixes/0003-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch similarity index 97% rename from debian/patches/patchset-pf/fixes/0012-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch rename to debian/patches/patchset-pf/fixes/0003-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch index f952381..1805559 100644 --- a/debian/patches/patchset-pf/fixes/0012-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch +++ b/debian/patches/patchset-pf/fixes/0003-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch @@ -1,4 +1,4 @@ -From de09560d2e6fbb14ea586063217277e5ebc1bc71 Mon Sep 17 00:00:00 2001 +From 2c1c3b3aafb153cbc3bd298db57cc7313d1601b1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:56 -0600 Subject: mm/filemap: unify dropbehind flag testing and clearing diff --git a/debian/patches/patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch b/debian/patches/patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch deleted file mode 100644 index a05b608..0000000 --- a/debian/patches/patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 10c7fce24a1ad9197a8eabbba454a9a872f03d5c Mon Sep 17 00:00:00 2001 -From: Murad Masimov -Date: Mon, 28 Apr 2025 18:34:07 +0300 -Subject: fbdev: Fix fb_set_var to prevent null-ptr-deref in - fb_videomode_to_var - -If fb_add_videomode() in fb_set_var() fails to allocate memory for -fb_videomode, later it may lead to a null-ptr dereference in -fb_videomode_to_var(), as the fb_info is registered while not having the -mode in modelist that is expected to be there, i.e. the one that is -described in fb_info->var. - -================================================================ -general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI -KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] -CPU: 1 PID: 30371 Comm: syz-executor.1 Not tainted 5.10.226-syzkaller #0 -Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 -RIP: 0010:fb_videomode_to_var+0x24/0x610 drivers/video/fbdev/core/modedb.c:901 -Call Trace: - display_to_var+0x3a/0x7c0 drivers/video/fbdev/core/fbcon.c:929 - fbcon_resize+0x3e2/0x8f0 drivers/video/fbdev/core/fbcon.c:2071 - resize_screen drivers/tty/vt/vt.c:1176 [inline] - vc_do_resize+0x53a/0x1170 drivers/tty/vt/vt.c:1263 - fbcon_modechanged+0x3ac/0x6e0 drivers/video/fbdev/core/fbcon.c:2720 - fbcon_update_vcs+0x43/0x60 drivers/video/fbdev/core/fbcon.c:2776 - do_fb_ioctl+0x6d2/0x740 drivers/video/fbdev/core/fbmem.c:1128 - fb_ioctl+0xe7/0x150 drivers/video/fbdev/core/fbmem.c:1203 - vfs_ioctl fs/ioctl.c:48 [inline] - __do_sys_ioctl fs/ioctl.c:753 [inline] - __se_sys_ioctl fs/ioctl.c:739 [inline] - __x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:739 - do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 - entry_SYSCALL_64_after_hwframe+0x67/0xd1 -================================================================ - -The reason is that fb_info->var is being modified in fb_set_var(), and -then fb_videomode_to_var() is called. If it fails to add the mode to -fb_info->modelist, fb_set_var() returns error, but does not restore the -old value of fb_info->var. Restore fb_info->var on failure the same way -it is done earlier in the function. - -Found by Linux Verification Center (linuxtesting.org) with Syzkaller. - -Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") -Cc: stable@vger.kernel.org -Signed-off-by: Murad Masimov -Signed-off-by: Helge Deller ---- - drivers/video/fbdev/core/fbmem.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/drivers/video/fbdev/core/fbmem.c -+++ b/drivers/video/fbdev/core/fbmem.c -@@ -328,8 +328,10 @@ fb_set_var(struct fb_info *info, struct - !list_empty(&info->modelist)) - ret = fb_add_videomode(&mode, &info->modelist); - -- if (ret) -+ if (ret) { -+ info->var = old_var; - return ret; -+ } - - event.info = info; - event.data = &mode; diff --git a/debian/patches/patchset-pf/fixes/0013-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch b/debian/patches/patchset-pf/fixes/0004-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch similarity index 97% rename from debian/patches/patchset-pf/fixes/0013-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch rename to debian/patches/patchset-pf/fixes/0004-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch index 5f39c40..847731d 100644 --- a/debian/patches/patchset-pf/fixes/0013-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch +++ b/debian/patches/patchset-pf/fixes/0004-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch @@ -1,4 +1,4 @@ -From c041325f222c774573ad73d35939451a4e221e52 Mon Sep 17 00:00:00 2001 +From 61d27e9dadb2eb2b7596a11a37402452d97625f7 Mon Sep 17 00:00:00 2001 From: Shivank Garg Date: Mon, 26 May 2025 18:28:18 +0000 Subject: mm/khugepaged: fix race with folio split/free using temporary diff --git a/debian/patches/patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch b/debian/patches/patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch deleted file mode 100644 index daa40c5..0000000 --- a/debian/patches/patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 13ccad7713b89e7693feb5346e7893dc8edce7a8 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Mon, 7 Apr 2025 11:54:15 +0200 -Subject: anon_inode: use a proper mode internally - -This allows the VFS to not trip over anonymous inodes and we can add -asserts based on the mode into the vfs. When we report it to userspace -we can simply hide the mode to avoid regressions. I've audited all -direct callers of alloc_anon_inode() and only secretmen overrides i_mode -and i_op inode operations but it already uses a regular file. - -Link: https://lore.kernel.org/20250407-work-anon_inode-v1-1-53a44c20d44e@kernel.org -Fixes: af153bb63a336 ("vfs: catch invalid modes in may_open()") -Reviewed-by: Jeff Layton -Cc: stable@vger.kernel.org # all LTS kernels -Reported-by: syzbot+5d8e79d323a13aa0b248@syzkaller.appspotmail.com -Closes: https://lore.kernel.org/all/67ed3fb3.050a0220.14623d.0009.GAE@google.com -Signed-off-by: Christian Brauner ---- - fs/anon_inodes.c | 36 ++++++++++++++++++++++++++++++++++++ - fs/internal.h | 3 +++ - fs/libfs.c | 8 +++++++- - 3 files changed, 46 insertions(+), 1 deletion(-) - ---- a/fs/anon_inodes.c -+++ b/fs/anon_inodes.c -@@ -24,10 +24,44 @@ - - #include - -+#include "internal.h" -+ - static struct vfsmount *anon_inode_mnt __ro_after_init; - static struct inode *anon_inode_inode __ro_after_init; - - /* -+ * User space expects anonymous inodes to have no file type in st_mode. -+ * -+ * In particular, 'lsof' has this legacy logic: -+ * -+ * type = s->st_mode & S_IFMT; -+ * switch (type) { -+ * ... -+ * case 0: -+ * if (!strcmp(p, "anon_inode")) -+ * Lf->ntype = Ntype = N_ANON_INODE; -+ * -+ * to detect our old anon_inode logic. -+ * -+ * Rather than mess with our internal sane inode data, just fix it -+ * up here in getattr() by masking off the format bits. -+ */ -+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, -+ struct kstat *stat, u32 request_mask, -+ unsigned int query_flags) -+{ -+ struct inode *inode = d_inode(path->dentry); -+ -+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); -+ stat->mode &= ~S_IFMT; -+ return 0; -+} -+ -+static const struct inode_operations anon_inode_operations = { -+ .getattr = anon_inode_getattr, -+}; -+ -+/* - * anon_inodefs_dname() is called from d_path(). - */ - static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen) -@@ -66,6 +100,7 @@ static struct inode *anon_inode_make_sec - if (IS_ERR(inode)) - return inode; - inode->i_flags &= ~S_PRIVATE; -+ inode->i_op = &anon_inode_operations; - error = security_inode_init_security_anon(inode, &QSTR(name), - context_inode); - if (error) { -@@ -313,6 +348,7 @@ static int __init anon_inode_init(void) - anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); - if (IS_ERR(anon_inode_inode)) - panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); -+ anon_inode_inode->i_op = &anon_inode_operations; - - return 0; - } ---- a/fs/internal.h -+++ b/fs/internal.h -@@ -343,3 +343,6 @@ static inline bool path_mounted(const st - void file_f_owner_release(struct file *file); - bool file_seek_cur_needs_f_lock(struct file *file); - int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map); -+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, -+ struct kstat *stat, u32 request_mask, -+ unsigned int query_flags); ---- a/fs/libfs.c -+++ b/fs/libfs.c -@@ -1647,7 +1647,13 @@ struct inode *alloc_anon_inode(struct su - * that it already _is_ on the dirty list. - */ - inode->i_state = I_DIRTY; -- inode->i_mode = S_IRUSR | S_IWUSR; -+ /* -+ * Historically anonymous inodes didn't have a type at all and -+ * userspace has come to rely on this. Internally they're just -+ * regular files but S_IFREG is masked off when reporting -+ * information to userspace. -+ */ -+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); - inode->i_flags |= S_PRIVATE; diff --git a/debian/patches/patchset-pf/fixes/0014-mm-add-folio_expected_ref_count-for-reference-count-.patch b/debian/patches/patchset-pf/fixes/0005-mm-add-folio_expected_ref_count-for-reference-count-.patch similarity index 99% rename from debian/patches/patchset-pf/fixes/0014-mm-add-folio_expected_ref_count-for-reference-count-.patch rename to debian/patches/patchset-pf/fixes/0005-mm-add-folio_expected_ref_count-for-reference-count-.patch index 97b15e9..07482ff 100644 --- a/debian/patches/patchset-pf/fixes/0014-mm-add-folio_expected_ref_count-for-reference-count-.patch +++ b/debian/patches/patchset-pf/fixes/0005-mm-add-folio_expected_ref_count-for-reference-count-.patch @@ -1,4 +1,4 @@ -From 76653593bdf5fda03717991681b5d60e2af015e9 Mon Sep 17 00:00:00 2001 +From 8135974e9e512fdf6d15f59947f95e44f2834c37 Mon Sep 17 00:00:00 2001 From: Shivank Garg Date: Wed, 30 Apr 2025 10:01:51 +0000 Subject: mm: add folio_expected_ref_count() for reference count calculation diff --git a/debian/patches/patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch b/debian/patches/patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch deleted file mode 100644 index 840ba93..0000000 --- a/debian/patches/patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 5a3eea2c3e9675a8b713eef0d52b7c437f1f613b Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Mon, 7 Apr 2025 11:54:17 +0200 -Subject: anon_inode: explicitly block ->setattr() - -It is currently possible to change the mode and owner of the single -anonymous inode in the kernel: - -int main(int argc, char *argv[]) -{ - int ret, sfd; - sigset_t mask; - struct signalfd_siginfo fdsi; - - sigemptyset(&mask); - sigaddset(&mask, SIGINT); - sigaddset(&mask, SIGQUIT); - - ret = sigprocmask(SIG_BLOCK, &mask, NULL); - if (ret < 0) - _exit(1); - - sfd = signalfd(-1, &mask, 0); - if (sfd < 0) - _exit(2); - - ret = fchown(sfd, 5555, 5555); - if (ret < 0) - _exit(3); - - ret = fchmod(sfd, 0777); - if (ret < 0) - _exit(3); - - _exit(4); -} - -This is a bug. It's not really a meaningful one because anonymous inodes -don't really figure into path lookup and they cannot be reopened via -/proc//fd/ and can't be used for lookup itself. So they can -only ever serve as direct references. - -But it is still completely bogus to allow the mode and ownership or any -of the properties of the anonymous inode to be changed. Block this! - -Link: https://lore.kernel.org/20250407-work-anon_inode-v1-3-53a44c20d44e@kernel.org -Reviewed-by: Jeff Layton -Cc: stable@vger.kernel.org # all LTS kernels -Signed-off-by: Christian Brauner ---- - fs/anon_inodes.c | 7 +++++++ - fs/internal.h | 2 ++ - 2 files changed, 9 insertions(+) - ---- a/fs/anon_inodes.c -+++ b/fs/anon_inodes.c -@@ -57,8 +57,15 @@ int anon_inode_getattr(struct mnt_idmap - return 0; - } - -+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, -+ struct iattr *attr) -+{ -+ return -EOPNOTSUPP; -+} -+ - static const struct inode_operations anon_inode_operations = { - .getattr = anon_inode_getattr, -+ .setattr = anon_inode_setattr, - }; - - /* ---- a/fs/internal.h -+++ b/fs/internal.h -@@ -346,3 +346,5 @@ int statmount_mnt_idmap(struct mnt_idmap - int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, - struct kstat *stat, u32 request_mask, - unsigned int query_flags); -+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, -+ struct iattr *attr); diff --git a/debian/patches/patchset-zen/fixes/0003-drm-i915-snps_hdmi_pll-Fix-64-bit-divisor-truncation.patch b/debian/patches/patchset-pf/fixes/0006-drm-i915-snps_hdmi_pll-Fix-64-bit-divisor-truncation.patch similarity index 96% rename from debian/patches/patchset-zen/fixes/0003-drm-i915-snps_hdmi_pll-Fix-64-bit-divisor-truncation.patch rename to debian/patches/patchset-pf/fixes/0006-drm-i915-snps_hdmi_pll-Fix-64-bit-divisor-truncation.patch index e0186da..6cefff9 100644 --- a/debian/patches/patchset-zen/fixes/0003-drm-i915-snps_hdmi_pll-Fix-64-bit-divisor-truncation.patch +++ b/debian/patches/patchset-pf/fixes/0006-drm-i915-snps_hdmi_pll-Fix-64-bit-divisor-truncation.patch @@ -1,4 +1,4 @@ -From 96e19aa45a528ce5c722f1925d750f74efe22a8b Mon Sep 17 00:00:00 2001 +From 3d1a493525955678c231ab7ccf0950c0ba2b9f45 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Fri, 13 Jun 2025 11:42:46 +0530 Subject: drm/i915/snps_hdmi_pll: Fix 64-bit divisor truncation by using diff --git a/debian/patches/patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch b/debian/patches/patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch deleted file mode 100644 index b305b31..0000000 --- a/debian/patches/patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 8c9775d285f9755477a8b1f8b215102dce014ed2 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Mon, 7 Apr 2025 11:54:19 +0200 -Subject: anon_inode: raise SB_I_NODEV and SB_I_NOEXEC - -It isn't possible to execute anonymous inodes because they cannot be -opened in any way after they have been created. This includes execution: - -execveat(fd_anon_inode, "", NULL, NULL, AT_EMPTY_PATH) - -Anonymous inodes have inode->f_op set to no_open_fops which sets -no_open() which returns ENXIO. That means any call to do_dentry_open() -which is the endpoint of the do_open_execat() will fail. There's no -chance to execute an anonymous inode. Unless a given subsystem overrides -it ofc. - -However, we should still harden this and raise SB_I_NODEV and -SB_I_NOEXEC on the superblock itself so that no one gets any creative -ideas. - -Link: https://lore.kernel.org/20250407-work-anon_inode-v1-5-53a44c20d44e@kernel.org -Reviewed-by: Jeff Layton -Cc: stable@vger.kernel.org # all LTS kernels -Signed-off-by: Christian Brauner ---- - fs/anon_inodes.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/fs/anon_inodes.c -+++ b/fs/anon_inodes.c -@@ -86,6 +86,8 @@ static int anon_inodefs_init_fs_context( - struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC); - if (!ctx) - return -ENOMEM; -+ fc->s_iflags |= SB_I_NOEXEC; -+ fc->s_iflags |= SB_I_NODEV; - ctx->dops = &anon_inodefs_dentry_operations; - return 0; - } diff --git a/debian/patches/patchset-pf/fixes/0007-mm-shmem-swap-fix-softlockup-with-mTHP-swapin.patch b/debian/patches/patchset-pf/fixes/0007-mm-shmem-swap-fix-softlockup-with-mTHP-swapin.patch new file mode 100644 index 0000000..6463ce6 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0007-mm-shmem-swap-fix-softlockup-with-mTHP-swapin.patch @@ -0,0 +1,190 @@ +From 3a317593ed60909e02e059a43b2ef588f95fd457 Mon Sep 17 00:00:00 2001 +From: Kairui Song +Date: Tue, 10 Jun 2025 01:17:51 +0800 +Subject: mm/shmem, swap: fix softlockup with mTHP swapin +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Following softlockup can be easily reproduced on my test machine with: + +echo always > /sys/kernel/mm/transparent_hugepage/hugepages-64kB/enabled +swapon /dev/zram0 # zram0 is a 48G swap device +mkdir -p /sys/fs/cgroup/memory/test +echo 1G > /sys/fs/cgroup/test/memory.max +echo $BASHPID > /sys/fs/cgroup/test/cgroup.procs +while true; do + dd if=/dev/zero of=/tmp/test.img bs=1M count=5120 + cat /tmp/test.img > /dev/null + rm /tmp/test.img +done + +Then after a while: +watchdog: BUG: soft lockup - CPU#0 stuck for 763s! [cat:5787] +Modules linked in: zram virtiofs +CPU: 0 UID: 0 PID: 5787 Comm: cat Kdump: loaded Tainted: G L 6.15.0.orig-gf3021d9246bc-dirty #118 PREEMPT(voluntary)· +Tainted: [L]=SOFTLOCKUP +Hardware name: Red Hat KVM/RHEL-AV, BIOS 0.0.0 02/06/2015 +RIP: 0010:mpol_shared_policy_lookup+0xd/0x70 +Code: e9 b8 b4 ff ff 31 c0 c3 cc cc cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 0f 1f 44 00 00 41 54 55 53 <48> 8b 1f 48 85 db 74 41 4c 8d 67 08 48 89 fb 48 89 f5 4c 89 e7 e8 +RSP: 0018:ffffc90002b1fc28 EFLAGS: 00000202 +RAX: 00000000001c20ca RBX: 0000000000724e1e RCX: 0000000000000001 +RDX: ffff888118e214c8 RSI: 0000000000057d42 RDI: ffff888118e21518 +RBP: 000000000002bec8 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000bf4 R11: 0000000000000000 R12: 0000000000000001 +R13: 00000000001c20ca R14: 00000000001c20ca R15: 0000000000000000 +FS: 00007f03f995c740(0000) GS:ffff88a07ad9a000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f03f98f1000 CR3: 0000000144626004 CR4: 0000000000770eb0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +PKRU: 55555554 +Call Trace: + + shmem_alloc_folio+0x31/0xc0 + shmem_swapin_folio+0x309/0xcf0 + ? filemap_get_entry+0x117/0x1e0 + ? xas_load+0xd/0xb0 + ? filemap_get_entry+0x101/0x1e0 + shmem_get_folio_gfp+0x2ed/0x5b0 + shmem_file_read_iter+0x7f/0x2e0 + vfs_read+0x252/0x330 + ksys_read+0x68/0xf0 + do_syscall_64+0x4c/0x1c0 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +RIP: 0033:0x7f03f9a46991 +Code: 00 48 8b 15 81 14 10 00 f7 d8 64 89 02 b8 ff ff ff ff eb bd e8 20 ad 01 00 f3 0f 1e fa 80 3d 35 97 10 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 4f c3 66 0f 1f 44 00 00 55 48 89 e5 48 83 ec +RSP: 002b:00007fff3c52bd28 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 +RAX: ffffffffffffffda RBX: 0000000000040000 RCX: 00007f03f9a46991 +RDX: 0000000000040000 RSI: 00007f03f98ba000 RDI: 0000000000000003 +RBP: 00007fff3c52bd50 R08: 0000000000000000 R09: 00007f03f9b9a380 +R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000040000 +R13: 00007f03f98ba000 R14: 0000000000000003 R15: 0000000000000000 + + +The reason is simple, readahead brought some order 0 folio in swap cache, +and the swapin mTHP folio being allocated is in conflict with it, so +swapcache_prepare fails and causes shmem_swap_alloc_folio to return +-EEXIST, and shmem simply retries again and again causing this loop. + +Fix it by applying a similar fix for anon mTHP swapin. + +The performance change is very slight, time of swapin 10g zero folios +with shmem (test for 12 times): +Before: 2.47s +After: 2.48s + +[kasong@tencent.com: add comment] + Link: https://lkml.kernel.org/r/20250610181645.45922-1-ryncsn@gmail.com +Link: https://lkml.kernel.org/r/20250610181645.45922-1-ryncsn@gmail.com +Link: https://lkml.kernel.org/r/20250609171751.36305-1-ryncsn@gmail.com +Fixes: 1dd44c0af4fa ("mm: shmem: skip swapcache for swapin of synchronous swap device") +Signed-off-by: Kairui Song +Reviewed-by: Barry Song +Acked-by: Nhat Pham +Reviewed-by: Baolin Wang +Cc: Baoquan He +Cc: Chris Li +Cc: Hugh Dickins +Cc: Kemeng Shi +Cc: Usama Arif +Cc: +Signed-off-by: Andrew Morton +--- + mm/memory.c | 20 -------------------- + mm/shmem.c | 6 +++++- + mm/swap.h | 23 +++++++++++++++++++++++ + 3 files changed, 28 insertions(+), 21 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -4225,26 +4225,6 @@ static struct folio *__alloc_swap_folio( + } + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE +-static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +-{ +- struct swap_info_struct *si = swp_swap_info(entry); +- pgoff_t offset = swp_offset(entry); +- int i; +- +- /* +- * While allocating a large folio and doing swap_read_folio, which is +- * the case the being faulted pte doesn't have swapcache. We need to +- * ensure all PTEs have no cache as well, otherwise, we might go to +- * swap devices while the content is in swapcache. +- */ +- for (i = 0; i < max_nr; i++) { +- if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) +- return i; +- } +- +- return i; +-} +- + /* + * Check if the PTEs within a range are contiguous swap entries + * and have consistent swapcache, zeromap. +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2262,6 +2262,7 @@ static int shmem_swapin_folio(struct ino + folio = swap_cache_get_folio(swap, NULL, 0); + order = xa_get_order(&mapping->i_pages, index); + if (!folio) { ++ int nr_pages = 1 << order; + bool fallback_order0 = false; + + /* Or update major stats only when swapin succeeds?? */ +@@ -2275,9 +2276,12 @@ static int shmem_swapin_folio(struct ino + * If uffd is active for the vma, we need per-page fault + * fidelity to maintain the uffd semantics, then fallback + * to swapin order-0 folio, as well as for zswap case. ++ * Any existing sub folio in the swap cache also blocks ++ * mTHP swapin. + */ + if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) || +- !zswap_never_enabled())) ++ !zswap_never_enabled() || ++ non_swapcache_batch(swap, nr_pages) != nr_pages)) + fallback_order0 = true; + + /* Skip swapcache for synchronous device. */ +--- a/mm/swap.h ++++ b/mm/swap.h +@@ -106,6 +106,25 @@ static inline int swap_zeromap_batch(swp + return find_next_bit(sis->zeromap, end, start) - start; + } + ++static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) ++{ ++ struct swap_info_struct *si = swp_swap_info(entry); ++ pgoff_t offset = swp_offset(entry); ++ int i; ++ ++ /* ++ * While allocating a large folio and doing mTHP swapin, we need to ++ * ensure all entries are not cached, otherwise, the mTHP folio will ++ * be in conflict with the folio in swap cache. ++ */ ++ for (i = 0; i < max_nr; i++) { ++ if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) ++ return i; ++ } ++ ++ return i; ++} ++ + #else /* CONFIG_SWAP */ + struct swap_iocb; + static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) +@@ -199,6 +218,10 @@ static inline int swap_zeromap_batch(swp + return 0; + } + ++static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) ++{ ++ return 0; ++} + #endif /* CONFIG_SWAP */ + + #endif /* _MM_SWAP_H */ diff --git a/debian/patches/patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch b/debian/patches/patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch deleted file mode 100644 index f844b1c..0000000 --- a/debian/patches/patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch +++ /dev/null @@ -1,136 +0,0 @@ -From d90681a50098e204f2e111b9433f6fc73a939854 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Mon, 21 Apr 2025 10:27:40 +0200 -Subject: fs: add S_ANON_INODE - -This makes it easy to detect proper anonymous inodes and to ensure that -we can detect them in codepaths such as readahead(). - -Readahead on anonymous inodes didn't work because they didn't have a -proper mode. Now that they have we need to retain EINVAL being returned -otherwise LTP will fail. - -We also need to ensure that ioctls aren't simply fired like they are for -regular files so things like inotify inodes continue to correctly call -their own ioctl handlers as in [1]. - -Reported-by: Xilin Wu -Link: https://lore.kernel.org/3A9139D5CD543962+89831381-31b9-4392-87ec-a84a5b3507d8@radxa.com [1] -Link: https://lore.kernel.org/7a1a7076-ff6b-4cb0-94e7-7218a0a44028@sirena.org.uk -Signed-off-by: Christian Brauner ---- - fs/ioctl.c | 7 ++++--- - fs/libfs.c | 2 +- - fs/pidfs.c | 2 +- - include/linux/fs.h | 2 ++ - mm/readahead.c | 20 ++++++++++++++++---- - 5 files changed, 24 insertions(+), 9 deletions(-) - ---- a/fs/ioctl.c -+++ b/fs/ioctl.c -@@ -821,7 +821,8 @@ static int do_vfs_ioctl(struct file *fil - return ioctl_fioasync(fd, filp, argp); - - case FIOQSIZE: -- if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || -+ if (S_ISDIR(inode->i_mode) || -+ (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) || - S_ISLNK(inode->i_mode)) { - loff_t res = inode_get_bytes(inode); - return copy_to_user(argp, &res, sizeof(res)) ? -@@ -856,7 +857,7 @@ static int do_vfs_ioctl(struct file *fil - return ioctl_file_dedupe_range(filp, argp); - - case FIONREAD: -- if (!S_ISREG(inode->i_mode)) -+ if (!S_ISREG(inode->i_mode) || IS_ANON_FILE(inode)) - return vfs_ioctl(filp, cmd, arg); - - return put_user(i_size_read(inode) - filp->f_pos, -@@ -881,7 +882,7 @@ static int do_vfs_ioctl(struct file *fil - return ioctl_get_fs_sysfs_path(filp, argp); - - default: -- if (S_ISREG(inode->i_mode)) -+ if (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) - return file_ioctl(filp, cmd, argp); - break; - } ---- a/fs/libfs.c -+++ b/fs/libfs.c -@@ -1656,7 +1656,7 @@ struct inode *alloc_anon_inode(struct su - inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); -- inode->i_flags |= S_PRIVATE; -+ inode->i_flags |= S_PRIVATE | S_ANON_INODE; - simple_inode_init_ts(inode); - return inode; - } ---- a/fs/pidfs.c -+++ b/fs/pidfs.c -@@ -826,7 +826,7 @@ static int pidfs_init_inode(struct inode - const struct pid *pid = data; - - inode->i_private = data; -- inode->i_flags |= S_PRIVATE; -+ inode->i_flags |= S_PRIVATE | S_ANON_INODE; - inode->i_mode |= S_IRWXU; - inode->i_op = &pidfs_inode_operations; - inode->i_fop = &pidfs_file_operations; ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -2344,6 +2344,7 @@ struct super_operations { - #define S_CASEFOLD (1 << 15) /* Casefolded file */ - #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ - #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ -+#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */ - - /* - * Note that nosuid etc flags are inode-specific: setting some file-system -@@ -2400,6 +2401,7 @@ static inline bool sb_rdonly(const struc - - #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ - (inode)->i_rdev == WHITEOUT_DEV) -+#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE) - - static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, - struct inode *inode) ---- a/mm/readahead.c -+++ b/mm/readahead.c -@@ -690,9 +690,15 @@ EXPORT_SYMBOL_GPL(page_cache_async_ra); - - ssize_t ksys_readahead(int fd, loff_t offset, size_t count) - { -+ struct file *file; -+ const struct inode *inode; -+ - CLASS(fd, f)(fd); -+ if (fd_empty(f)) -+ return -EBADF; - -- if (fd_empty(f) || !(fd_file(f)->f_mode & FMODE_READ)) -+ file = fd_file(f); -+ if (!(file->f_mode & FMODE_READ)) - return -EBADF; - - /* -@@ -700,9 +706,15 @@ ssize_t ksys_readahead(int fd, loff_t of - * that can execute readahead. If readahead is not possible - * on this file, then we must return -EINVAL. - */ -- if (!fd_file(f)->f_mapping || !fd_file(f)->f_mapping->a_ops || -- (!S_ISREG(file_inode(fd_file(f))->i_mode) && -- !S_ISBLK(file_inode(fd_file(f))->i_mode))) -+ if (!file->f_mapping) -+ return -EINVAL; -+ if (!file->f_mapping->a_ops) -+ return -EINVAL; -+ -+ inode = file_inode(file); -+ if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) -+ return -EINVAL; -+ if (IS_ANON_FILE(inode)) - return -EINVAL; - - return vfs_fadvise(fd_file(f), offset, count, POSIX_FADV_WILLNEED); diff --git a/debian/patches/patchset-pf/fixes/0008-mm-gup-revert-mm-gup-fix-infinite-loop-within-__get_.patch b/debian/patches/patchset-pf/fixes/0008-mm-gup-revert-mm-gup-fix-infinite-loop-within-__get_.patch new file mode 100644 index 0000000..2b98b91 --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0008-mm-gup-revert-mm-gup-fix-infinite-loop-within-__get_.patch @@ -0,0 +1,100 @@ +From 4b247e559e4046bbbfab468e66f9d3197eaf12ec Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 11 Jun 2025 15:13:14 +0200 +Subject: mm/gup: revert "mm: gup: fix infinite loop within + __get_longterm_locked" + +After commit 1aaf8c122918 ("mm: gup: fix infinite loop within +__get_longterm_locked") we are able to longterm pin folios that are not +supposed to get longterm pinned, simply because they temporarily have the +LRU flag cleared (esp. temporarily isolated). + +For example, two __get_longterm_locked() callers can race, or +__get_longterm_locked() can race with anything else that temporarily +isolates folios. + +The introducing commit mentions the use case of a driver that uses +vm_ops->fault to insert pages allocated through cma_alloc() into the page +tables, assuming they can later get longterm pinned. These pages/ folios +would never have the LRU flag set and consequently cannot get isolated. +There is no known in-tree user making use of that so far, fortunately. + +To handle that in the future -- and avoid retrying forever to +isolate/migrate them -- we will need a different mechanism for the CMA +area *owner* to indicate that it actually already allocated the page and +is fine with longterm pinning it. The LRU flag is not suitable for that. + +Probably we can lookup the relevant CMA area and query the bitmap; we only +have have to care about some races, probably. If already allocated, we +could just allow longterm pinning) + +Anyhow, let's fix the "must not be longterm pinned" problem first by +reverting the original commit. + +Link: https://lkml.kernel.org/r/20250611131314.594529-1-david@redhat.com +Fixes: 1aaf8c122918 ("mm: gup: fix infinite loop within __get_longterm_locked") +Signed-off-by: David Hildenbrand +Closes: https://lore.kernel.org/all/20250522092755.GA3277597@tiffany/ +Reported-by: Hyesoo Yu +Reviewed-by: John Hubbard +Cc: Jason Gunthorpe +Cc: Peter Xu +Cc: Zhaoyang Huang +Cc: Aijun Sun +Cc: Alistair Popple +Cc: +Signed-off-by: Andrew Morton +--- + mm/gup.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -2320,13 +2320,13 @@ static void pofs_unpin(struct pages_or_f + /* + * Returns the number of collected folios. Return value is always >= 0. + */ +-static void collect_longterm_unpinnable_folios( ++static unsigned long collect_longterm_unpinnable_folios( + struct list_head *movable_folio_list, + struct pages_or_folios *pofs) + { ++ unsigned long i, collected = 0; + struct folio *prev_folio = NULL; + bool drain_allow = true; +- unsigned long i; + + for (i = 0; i < pofs->nr_entries; i++) { + struct folio *folio = pofs_get_folio(pofs, i); +@@ -2338,6 +2338,8 @@ static void collect_longterm_unpinnable_ + if (folio_is_longterm_pinnable(folio)) + continue; + ++ collected++; ++ + if (folio_is_device_coherent(folio)) + continue; + +@@ -2359,6 +2361,8 @@ static void collect_longterm_unpinnable_ + NR_ISOLATED_ANON + folio_is_file_lru(folio), + folio_nr_pages(folio)); + } ++ ++ return collected; + } + + /* +@@ -2435,9 +2439,11 @@ static long + check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs) + { + LIST_HEAD(movable_folio_list); ++ unsigned long collected; + +- collect_longterm_unpinnable_folios(&movable_folio_list, pofs); +- if (list_empty(&movable_folio_list)) ++ collected = collect_longterm_unpinnable_folios(&movable_folio_list, ++ pofs); ++ if (!collected) + return 0; + + return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs); diff --git a/debian/patches/patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch b/debian/patches/patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch deleted file mode 100644 index 57b3fda..0000000 --- a/debian/patches/patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch +++ /dev/null @@ -1,35 +0,0 @@ -From c161e0ffb55a12b9b26819fa0ecf8217ab781e97 Mon Sep 17 00:00:00 2001 -From: Zijun Hu -Date: Wed, 7 May 2025 19:50:26 +0800 -Subject: configfs: Do not override creating attribute file failure in - populate_attrs() - -populate_attrs() may override failure for creating attribute files -by success for creating subsequent bin attribute files, and have -wrong return value. - -Fix by creating bin attribute files under successfully creating -attribute files. - -Fixes: 03607ace807b ("configfs: implement binary attributes") -Cc: stable@vger.kernel.org -Reviewed-by: Joel Becker -Reviewed-by: Breno Leitao -Signed-off-by: Zijun Hu -Link: https://lore.kernel.org/r/20250507-fix_configfs-v3-2-fe2d96de8dc4@quicinc.com -Signed-off-by: Andreas Hindborg ---- - fs/configfs/dir.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/fs/configfs/dir.c -+++ b/fs/configfs/dir.c -@@ -619,7 +619,7 @@ static int populate_attrs(struct config_ - break; - } - } -- if (t->ct_bin_attrs) { -+ if (!error && t->ct_bin_attrs) { - for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) { - if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i)) - continue; diff --git a/debian/patches/patchset-pf/fixes/0009-mm-userfaultfd-fix-race-of-userfaultfd_move-and-swap.patch b/debian/patches/patchset-pf/fixes/0009-mm-userfaultfd-fix-race-of-userfaultfd_move-and-swap.patch new file mode 100644 index 0000000..56dfa1b --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0009-mm-userfaultfd-fix-race-of-userfaultfd_move-and-swap.patch @@ -0,0 +1,191 @@ +From 7ebf89b788aa5b83897e99ad6e3dd6e0cb0f5030 Mon Sep 17 00:00:00 2001 +From: Kairui Song +Date: Wed, 4 Jun 2025 23:10:38 +0800 +Subject: mm: userfaultfd: fix race of userfaultfd_move and swap cache + +This commit fixes two kinds of races, they may have different results: + +Barry reported a BUG_ON in commit c50f8e6053b0, we may see the same +BUG_ON if the filemap lookup returned NULL and folio is added to swap +cache after that. + +If another kind of race is triggered (folio changed after lookup) we +may see RSS counter is corrupted: + +[ 406.893936] BUG: Bad rss-counter state mm:ffff0000c5a9ddc0 +type:MM_ANONPAGES val:-1 +[ 406.894071] BUG: Bad rss-counter state mm:ffff0000c5a9ddc0 +type:MM_SHMEMPAGES val:1 + +Because the folio is being accounted to the wrong VMA. + +I'm not sure if there will be any data corruption though, seems no. +The issues above are critical already. + + +On seeing a swap entry PTE, userfaultfd_move does a lockless swap cache +lookup, and tries to move the found folio to the faulting vma. Currently, +it relies on checking the PTE value to ensure that the moved folio still +belongs to the src swap entry and that no new folio has been added to the +swap cache, which turns out to be unreliable. + +While working and reviewing the swap table series with Barry, following +existing races are observed and reproduced [1]: + +In the example below, move_pages_pte is moving src_pte to dst_pte, where +src_pte is a swap entry PTE holding swap entry S1, and S1 is not in the +swap cache: + +CPU1 CPU2 +userfaultfd_move + move_pages_pte() + entry = pte_to_swp_entry(orig_src_pte); + // Here it got entry = S1 + ... < interrupted> ... + + // folio A is a new allocated folio + // and get installed into src_pte + + // src_pte now points to folio A, S1 + // has swap count == 0, it can be freed + // by folio_swap_swap or swap + // allocator's reclaim. + + // folio B is a folio in another VMA. + + // S1 is freed, folio B can use it + // for swap out with no problem. + ... + folio = filemap_get_folio(S1) + // Got folio B here !!! + ... < interrupted again> ... + + // Now S1 is free to be used again. + + // Now src_pte is a swap entry PTE + // holding S1 again. + folio_trylock(folio) + move_swap_pte + double_pt_lock + is_pte_pages_stable + // Check passed because src_pte == S1 + folio_move_anon_rmap(...) + // Moved invalid folio B here !!! + +The race window is very short and requires multiple collisions of multiple +rare events, so it's very unlikely to happen, but with a deliberately +constructed reproducer and increased time window, it can be reproduced +easily. + +This can be fixed by checking if the folio returned by filemap is the +valid swap cache folio after acquiring the folio lock. + +Another similar race is possible: filemap_get_folio may return NULL, but +folio (A) could be swapped in and then swapped out again using the same +swap entry after the lookup. In such a case, folio (A) may remain in the +swap cache, so it must be moved too: + +CPU1 CPU2 +userfaultfd_move + move_pages_pte() + entry = pte_to_swp_entry(orig_src_pte); + // Here it got entry = S1, and S1 is not in swap cache + folio = filemap_get_folio(S1) + // Got NULL + ... < interrupted again> ... + + + move_swap_pte + double_pt_lock + is_pte_pages_stable + // Check passed because src_pte == S1 + folio_move_anon_rmap(...) + // folio A is ignored !!! + +Fix this by checking the swap cache again after acquiring the src_pte +lock. And to avoid the filemap overhead, we check swap_map directly [2]. + +The SWP_SYNCHRONOUS_IO path does make the problem more complex, but so far +we don't need to worry about that, since folios can only be exposed to the +swap cache in the swap out path, and this is covered in this patch by +checking the swap cache again after acquiring the src_pte lock. + +Testing with a simple C program that allocates and moves several GB of +memory did not show any observable performance change. + +Link: https://lkml.kernel.org/r/20250604151038.21968-1-ryncsn@gmail.com +Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") +Signed-off-by: Kairui Song +Closes: https://lore.kernel.org/linux-mm/CAMgjq7B1K=6OOrK2OUZ0-tqCzi+EJt+2_K97TPGoSt=9+JwP7Q@mail.gmail.com/ [1] +Link: https://lore.kernel.org/all/CAGsJ_4yJhJBo16XhiC-nUzSheyX-V3-nFE+tAi=8Y560K8eT=A@mail.gmail.com/ [2] +Reviewed-by: Lokesh Gidra +Acked-by: Peter Xu +Reviewed-by: Suren Baghdasaryan +Reviewed-by: Barry Song +Reviewed-by: Chris Li +Cc: Andrea Arcangeli +Cc: David Hildenbrand +Cc: Kairui Song +Cc: +Signed-off-by: Andrew Morton +--- + mm/userfaultfd.c | 33 +++++++++++++++++++++++++++++++-- + 1 file changed, 31 insertions(+), 2 deletions(-) + +--- a/mm/userfaultfd.c ++++ b/mm/userfaultfd.c +@@ -1084,8 +1084,18 @@ static int move_swap_pte(struct mm_struc + pte_t orig_dst_pte, pte_t orig_src_pte, + pmd_t *dst_pmd, pmd_t dst_pmdval, + spinlock_t *dst_ptl, spinlock_t *src_ptl, +- struct folio *src_folio) ++ struct folio *src_folio, ++ struct swap_info_struct *si, swp_entry_t entry) + { ++ /* ++ * Check if the folio still belongs to the target swap entry after ++ * acquiring the lock. Folio can be freed in the swap cache while ++ * not locked. ++ */ ++ if (src_folio && unlikely(!folio_test_swapcache(src_folio) || ++ entry.val != src_folio->swap.val)) ++ return -EAGAIN; ++ + double_pt_lock(dst_ptl, src_ptl); + + if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte, +@@ -1102,6 +1112,25 @@ static int move_swap_pte(struct mm_struc + if (src_folio) { + folio_move_anon_rmap(src_folio, dst_vma); + src_folio->index = linear_page_index(dst_vma, dst_addr); ++ } else { ++ /* ++ * Check if the swap entry is cached after acquiring the src_pte ++ * lock. Otherwise, we might miss a newly loaded swap cache folio. ++ * ++ * Check swap_map directly to minimize overhead, READ_ONCE is sufficient. ++ * We are trying to catch newly added swap cache, the only possible case is ++ * when a folio is swapped in and out again staying in swap cache, using the ++ * same entry before the PTE check above. The PTL is acquired and released ++ * twice, each time after updating the swap_map's flag. So holding ++ * the PTL here ensures we see the updated value. False positive is possible, ++ * e.g. SWP_SYNCHRONOUS_IO swapin may set the flag without touching the ++ * cache, or during the tiny synchronization window between swap cache and ++ * swap_map, but it will be gone very quickly, worst result is retry jitters. ++ */ ++ if (READ_ONCE(si->swap_map[swp_offset(entry)]) & SWAP_HAS_CACHE) { ++ double_pt_unlock(dst_ptl, src_ptl); ++ return -EAGAIN; ++ } + } + + orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); +@@ -1412,7 +1441,7 @@ retry: + } + err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte, + orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, +- dst_ptl, src_ptl, src_folio); ++ dst_ptl, src_ptl, src_folio, si, entry); + } + + out: diff --git a/debian/patches/patchset-pf/fixes/0010-dm-raid-fix-variable-in-journal-device-check.patch b/debian/patches/patchset-pf/fixes/0010-dm-raid-fix-variable-in-journal-device-check.patch new file mode 100644 index 0000000..106544b --- /dev/null +++ b/debian/patches/patchset-pf/fixes/0010-dm-raid-fix-variable-in-journal-device-check.patch @@ -0,0 +1,26 @@ +From 222985dcb732fae554af5276f44c30d648a1d05b Mon Sep 17 00:00:00 2001 +From: Heinz Mauelshagen +Date: Tue, 10 Jun 2025 20:53:30 +0200 +Subject: dm-raid: fix variable in journal device check + +Replace "rdev" with correct loop variable name "r". + +Signed-off-by: Heinz Mauelshagen +Cc: stable@vger.kernel.org +Fixes: 63c32ed4afc2 ("dm raid: add raid4/5/6 journaling support") +Signed-off-by: Mikulas Patocka +--- + drivers/md/dm-raid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -2410,7 +2410,7 @@ static int super_init_validation(struct + */ + sb_retrieve_failed_devices(sb, failed_devices); + rdev_for_each(r, mddev) { +- if (test_bit(Journal, &rdev->flags) || ++ if (test_bit(Journal, &r->flags) || + !r->sb_page) + continue; + sb2 = page_address(r->sb_page); diff --git a/debian/patches/patchset-pf/fixes/0015-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch b/debian/patches/patchset-pf/fixes/0015-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch deleted file mode 100644 index 82bc2ce..0000000 --- a/debian/patches/patchset-pf/fixes/0015-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 1e9a258def978a9388a50ae43c85557b0598a7d3 Mon Sep 17 00:00:00 2001 -From: Pu Lehui -Date: Thu, 29 May 2025 15:56:47 +0000 -Subject: mm: fix uprobe pte be overwritten when expanding vma - -Patch series "Fix uprobe pte be overwritten when expanding vma". - - -This patch (of 4): - -We encountered a BUG alert triggered by Syzkaller as follows: - BUG: Bad rss-counter state mm:00000000b4a60fca type:MM_ANONPAGES val:1 - -And we can reproduce it with the following steps: -1. register uprobe on file at zero offset -2. mmap the file at zero offset: - addr1 = mmap(NULL, 2 * 4096, PROT_NONE, MAP_PRIVATE, fd, 0); -3. mremap part of vma1 to new vma2: - addr2 = mremap(addr1, 4096, 2 * 4096, MREMAP_MAYMOVE); -4. mremap back to orig addr1: - mremap(addr2, 4096, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, addr1); - -In step 3, the vma1 range [addr1, addr1 + 4096] will be remap to new vma2 -with range [addr2, addr2 + 8192], and remap uprobe anon page from the vma1 -to vma2, then unmap the vma1 range [addr1, addr1 + 4096]. - -In step 4, the vma2 range [addr2, addr2 + 4096] will be remap back to the -addr range [addr1, addr1 + 4096]. Since the addr range [addr1 + 4096, -addr1 + 8192] still maps the file, it will take vma_merge_new_range to -expand the range, and then do uprobe_mmap in vma_complete. Since the -merged vma pgoff is also zero offset, it will install uprobe anon page to -the merged vma. However, the upcomming move_page_tables step, which use -set_pte_at to remap the vma2 uprobe pte to the merged vma, will overwrite -the newly uprobe pte in the merged vma, and lead that pte to be orphan. - -Since the uprobe pte will be remapped to the merged vma, we can remove the -unnecessary uprobe_mmap upon merged vma. - -This problem was first found in linux-6.6.y and also exists in the -community syzkaller: -https://lore.kernel.org/all/000000000000ada39605a5e71711@google.com/T/ - -Link: https://lkml.kernel.org/r/20250529155650.4017699-1-pulehui@huaweicloud.com -Link: https://lkml.kernel.org/r/20250529155650.4017699-2-pulehui@huaweicloud.com -Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints") -Signed-off-by: Pu Lehui -Suggested-by: Lorenzo Stoakes -Reviewed-by: Lorenzo Stoakes -Acked-by: David Hildenbrand -Cc: Jann Horn -Cc: Liam Howlett -Cc: "Masami Hiramatsu (Google)" -Cc: Oleg Nesterov -Cc: Peter Zijlstra -Cc: Vlastimil Babka -Cc: -Signed-off-by: Andrew Morton ---- - mm/vma.c | 20 +++++++++++++++++--- - mm/vma.h | 7 +++++++ - 2 files changed, 24 insertions(+), 3 deletions(-) - ---- a/mm/vma.c -+++ b/mm/vma.c -@@ -144,6 +144,9 @@ static void init_multi_vma_prep(struct v - vp->file = vma->vm_file; - if (vp->file) - vp->mapping = vma->vm_file->f_mapping; -+ -+ if (vmg && vmg->skip_vma_uprobe) -+ vp->skip_vma_uprobe = true; - } - - /* -@@ -333,10 +336,13 @@ static void vma_complete(struct vma_prep - - if (vp->file) { - i_mmap_unlock_write(vp->mapping); -- uprobe_mmap(vp->vma); - -- if (vp->adj_next) -- uprobe_mmap(vp->adj_next); -+ if (!vp->skip_vma_uprobe) { -+ uprobe_mmap(vp->vma); -+ -+ if (vp->adj_next) -+ uprobe_mmap(vp->adj_next); -+ } - } - - if (vp->remove) { -@@ -1783,6 +1789,14 @@ struct vm_area_struct *copy_vma(struct v - faulted_in_anon_vma = false; - } - -+ /* -+ * If the VMA we are copying might contain a uprobe PTE, ensure -+ * that we do not establish one upon merge. Otherwise, when mremap() -+ * moves page tables, it will orphan the newly created PTE. -+ */ -+ if (vma->vm_file) -+ vmg.skip_vma_uprobe = true; -+ - new_vma = find_vma_prev(mm, addr, &vmg.prev); - if (new_vma && new_vma->vm_start < addr + len) - return NULL; /* should never get here */ ---- a/mm/vma.h -+++ b/mm/vma.h -@@ -19,6 +19,8 @@ struct vma_prepare { - struct vm_area_struct *insert; - struct vm_area_struct *remove; - struct vm_area_struct *remove2; -+ -+ bool skip_vma_uprobe :1; - }; - - struct unlink_vma_file_batch { -@@ -120,6 +122,11 @@ struct vma_merge_struct { - */ - bool give_up_on_oom :1; - -+ /* -+ * If set, skip uprobe_mmap upon merged vma. -+ */ -+ bool skip_vma_uprobe :1; -+ - /* Internal flags set during merge process: */ - - /* diff --git a/debian/patches/patchset-pf/fixes/0016-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch b/debian/patches/patchset-pf/fixes/0016-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch deleted file mode 100644 index 0aae85c..0000000 --- a/debian/patches/patchset-pf/fixes/0016-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch +++ /dev/null @@ -1,217 +0,0 @@ -From 2d8c79ec421253aab9560a47a7e73d678c84585c Mon Sep 17 00:00:00 2001 -From: Jann Horn -Date: Tue, 27 May 2025 23:23:53 +0200 -Subject: mm/hugetlb: unshare page tables during VMA split, not before - -Currently, __split_vma() triggers hugetlb page table unsharing through -vm_ops->may_split(). This happens before the VMA lock and rmap locks are -taken - which is too early, it allows racing VMA-locked page faults in our -process and racing rmap walks from other processes to cause page tables to -be shared again before we actually perform the split. - -Fix it by explicitly calling into the hugetlb unshare logic from -__split_vma() in the same place where THP splitting also happens. At that -point, both the VMA and the rmap(s) are write-locked. - -An annoying detail is that we can now call into the helper -hugetlb_unshare_pmds() from two different locking contexts: - -1. from hugetlb_split(), holding: - - mmap lock (exclusively) - - VMA lock - - file rmap lock (exclusively) -2. hugetlb_unshare_all_pmds(), which I think is designed to be able to - call us with only the mmap lock held (in shared mode), but currently - only runs while holding mmap lock (exclusively) and VMA lock - -Backporting note: -This commit fixes a racy protection that was introduced in commit -b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that -commit claimed to fix an issue introduced in 5.13, but it should actually -also go all the way back. - -[jannh@google.com: v2] - Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1a@google.com -Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1a@google.com -Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58a@google.com -Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page") -Signed-off-by: Jann Horn -Cc: Liam Howlett -Reviewed-by: Lorenzo Stoakes -Reviewed-by: Oscar Salvador -Cc: Lorenzo Stoakes -Cc: Vlastimil Babka -Cc: [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs] -Cc: -Signed-off-by: Andrew Morton ---- - include/linux/hugetlb.h | 3 ++ - mm/hugetlb.c | 60 +++++++++++++++++++++++--------- - mm/vma.c | 7 ++++ - tools/testing/vma/vma_internal.h | 2 ++ - 4 files changed, 56 insertions(+), 16 deletions(-) - ---- a/include/linux/hugetlb.h -+++ b/include/linux/hugetlb.h -@@ -276,6 +276,7 @@ bool is_hugetlb_entry_migration(pte_t pt - bool is_hugetlb_entry_hwpoisoned(pte_t pte); - void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); - void fixup_hugetlb_reservations(struct vm_area_struct *vma); -+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); - - #else /* !CONFIG_HUGETLB_PAGE */ - -@@ -473,6 +474,8 @@ static inline void fixup_hugetlb_reserva - { - } - -+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} -+ - #endif /* !CONFIG_HUGETLB_PAGE */ - - #ifndef pgd_write ---- a/mm/hugetlb.c -+++ b/mm/hugetlb.c -@@ -120,7 +120,7 @@ static void hugetlb_vma_lock_free(struct - static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); - static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); - static void hugetlb_unshare_pmds(struct vm_area_struct *vma, -- unsigned long start, unsigned long end); -+ unsigned long start, unsigned long end, bool take_locks); - static struct resv_map *vma_resv_map(struct vm_area_struct *vma); - - static void hugetlb_free_folio(struct folio *folio) -@@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm - { - if (addr & ~(huge_page_mask(hstate_vma(vma)))) - return -EINVAL; -+ return 0; -+} - -+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) -+{ - /* - * PMD sharing is only possible for PUD_SIZE-aligned address ranges - * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this - * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. -+ * This function is called in the middle of a VMA split operation, with -+ * MM, VMA and rmap all write-locked to prevent concurrent page table -+ * walks (except hardware and gup_fast()). - */ -+ vma_assert_write_locked(vma); -+ i_mmap_assert_write_locked(vma->vm_file->f_mapping); -+ - if (addr & ~PUD_MASK) { -- /* -- * hugetlb_vm_op_split is called right before we attempt to -- * split the VMA. We will need to unshare PMDs in the old and -- * new VMAs, so let's unshare before we split. -- */ - unsigned long floor = addr & PUD_MASK; - unsigned long ceil = floor + PUD_SIZE; - -- if (floor >= vma->vm_start && ceil <= vma->vm_end) -- hugetlb_unshare_pmds(vma, floor, ceil); -+ if (floor >= vma->vm_start && ceil <= vma->vm_end) { -+ /* -+ * Locking: -+ * Use take_locks=false here. -+ * The file rmap lock is already held. -+ * The hugetlb VMA lock can't be taken when we already -+ * hold the file rmap lock, and we don't need it because -+ * its purpose is to synchronize against concurrent page -+ * table walks, which are not possible thanks to the -+ * locks held by our caller. -+ */ -+ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false); -+ } - } -- -- return 0; - } - - static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma) -@@ -7884,9 +7898,16 @@ void move_hugetlb_state(struct folio *ol - spin_unlock_irq(&hugetlb_lock); - } - -+/* -+ * If @take_locks is false, the caller must ensure that no concurrent page table -+ * access can happen (except for gup_fast() and hardware page walks). -+ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like -+ * concurrent page fault handling) and the file rmap lock. -+ */ - static void hugetlb_unshare_pmds(struct vm_area_struct *vma, - unsigned long start, -- unsigned long end) -+ unsigned long end, -+ bool take_locks) - { - struct hstate *h = hstate_vma(vma); - unsigned long sz = huge_page_size(h); -@@ -7910,8 +7931,12 @@ static void hugetlb_unshare_pmds(struct - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, - start, end); - mmu_notifier_invalidate_range_start(&range); -- hugetlb_vma_lock_write(vma); -- i_mmap_lock_write(vma->vm_file->f_mapping); -+ if (take_locks) { -+ hugetlb_vma_lock_write(vma); -+ i_mmap_lock_write(vma->vm_file->f_mapping); -+ } else { -+ i_mmap_assert_write_locked(vma->vm_file->f_mapping); -+ } - for (address = start; address < end; address += PUD_SIZE) { - ptep = hugetlb_walk(vma, address, sz); - if (!ptep) -@@ -7921,8 +7946,10 @@ static void hugetlb_unshare_pmds(struct - spin_unlock(ptl); - } - flush_hugetlb_tlb_range(vma, start, end); -- i_mmap_unlock_write(vma->vm_file->f_mapping); -- hugetlb_vma_unlock_write(vma); -+ if (take_locks) { -+ i_mmap_unlock_write(vma->vm_file->f_mapping); -+ hugetlb_vma_unlock_write(vma); -+ } - /* - * No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see - * Documentation/mm/mmu_notifier.rst. -@@ -7937,7 +7964,8 @@ static void hugetlb_unshare_pmds(struct - void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) - { - hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), -- ALIGN_DOWN(vma->vm_end, PUD_SIZE)); -+ ALIGN_DOWN(vma->vm_end, PUD_SIZE), -+ /* take_locks = */ true); - } - - /* ---- a/mm/vma.c -+++ b/mm/vma.c -@@ -516,7 +516,14 @@ __split_vma(struct vma_iterator *vmi, st - init_vma_prep(&vp, vma); - vp.insert = new; - vma_prepare(&vp); -+ -+ /* -+ * Get rid of huge pages and shared page tables straddling the split -+ * boundary. -+ */ - vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL); -+ if (is_vm_hugetlb_page(vma)) -+ hugetlb_split(vma, addr); - - if (new_below) { - vma->vm_start = addr; ---- a/tools/testing/vma/vma_internal.h -+++ b/tools/testing/vma/vma_internal.h -@@ -793,6 +793,8 @@ static inline void vma_adjust_trans_huge - (void)next; - } - -+static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} -+ - static inline void vma_iter_free(struct vma_iterator *vmi) - { - mas_destroy(&vmi->mas); diff --git a/debian/patches/patchset-pf/fixes/0017-mm-hugetlb-fix-huge_pmd_unshare-vs-GUP-fast-race.patch b/debian/patches/patchset-pf/fixes/0017-mm-hugetlb-fix-huge_pmd_unshare-vs-GUP-fast-race.patch deleted file mode 100644 index fad6de6..0000000 --- a/debian/patches/patchset-pf/fixes/0017-mm-hugetlb-fix-huge_pmd_unshare-vs-GUP-fast-race.patch +++ /dev/null @@ -1,50 +0,0 @@ -From e1280358284feaf844db5c6a76078b2c1738c5ae Mon Sep 17 00:00:00 2001 -From: Jann Horn -Date: Tue, 27 May 2025 23:23:54 +0200 -Subject: mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race - -huge_pmd_unshare() drops a reference on a page table that may have -previously been shared across processes, potentially turning it into a -normal page table used in another process in which unrelated VMAs can -afterwards be installed. - -If this happens in the middle of a concurrent gup_fast(), gup_fast() could -end up walking the page tables of another process. While I don't see any -way in which that immediately leads to kernel memory corruption, it is -really weird and unexpected. - -Fix it with an explicit broadcast IPI through tlb_remove_table_sync_one(), -just like we do in khugepaged when removing page tables for a THP -collapse. - -Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-2-1329349bad1a@google.com -Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-2-f4136f5ec58a@google.com -Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page") -Signed-off-by: Jann Horn -Reviewed-by: Lorenzo Stoakes -Cc: Liam Howlett -Cc: Muchun Song -Cc: Oscar Salvador -Cc: Vlastimil Babka -Cc: -Signed-off-by: Andrew Morton ---- - mm/hugetlb.c | 7 +++++++ - 1 file changed, 7 insertions(+) - ---- a/mm/hugetlb.c -+++ b/mm/hugetlb.c -@@ -7628,6 +7628,13 @@ int huge_pmd_unshare(struct mm_struct *m - return 0; - - pud_clear(pud); -+ /* -+ * Once our caller drops the rmap lock, some other process might be -+ * using this page table as a normal, non-hugetlb page table. -+ * Wait for pending gup_fast() in other threads to finish before letting -+ * that happen. -+ */ -+ tlb_remove_table_sync_one(); - ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep)); - mm_dec_nr_pmds(mm); - return 1; diff --git a/debian/patches/patchset-pf/fixes/0018-mm-madvise-handle-madvise_lock-failure-during-race-u.patch b/debian/patches/patchset-pf/fixes/0018-mm-madvise-handle-madvise_lock-failure-during-race-u.patch deleted file mode 100644 index d3f9746..0000000 --- a/debian/patches/patchset-pf/fixes/0018-mm-madvise-handle-madvise_lock-failure-during-race-u.patch +++ /dev/null @@ -1,48 +0,0 @@ -From b36611870ea72c82eb78d90a017658394bdb9690 Mon Sep 17 00:00:00 2001 -From: SeongJae Park -Date: Mon, 2 Jun 2025 10:49:26 -0700 -Subject: mm/madvise: handle madvise_lock() failure during race unwinding - -When unwinding race on -ERESTARTNOINTR handling of process_madvise(), -madvise_lock() failure is ignored. Check the failure and abort remaining -works in the case. - -Link: https://lkml.kernel.org/r/20250602174926.1074-1-sj@kernel.org -Fixes: 4000e3d0a367 ("mm/madvise: remove redundant mmap_lock operations from process_madvise()") -Signed-off-by: SeongJae Park -Reported-by: Barry Song <21cnbao@gmail.com> -Closes: https://lore.kernel.org/CAGsJ_4xJXXO0G+4BizhohSZ4yDteziPw43_uF8nPXPWxUVChzw@mail.gmail.com -Reviewed-by: Jann Horn -Reviewed-by: Lorenzo Stoakes -Acked-by: David Hildenbrand -Reviewed-by: Shakeel Butt -Reviewed-by: Barry Song -Cc: Liam Howlett -Cc: Vlastimil Babka -Cc: -Signed-off-by: Andrew Morton ---- - mm/madvise.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - ---- a/mm/madvise.c -+++ b/mm/madvise.c -@@ -1830,7 +1830,9 @@ static ssize_t vector_madvise(struct mm_ - - /* Drop and reacquire lock to unwind race. */ - madvise_unlock(mm, behavior); -- madvise_lock(mm, behavior); -+ ret = madvise_lock(mm, behavior); -+ if (ret) -+ goto out; - continue; - } - if (ret < 0) -@@ -1839,6 +1841,7 @@ static ssize_t vector_madvise(struct mm_ - } - madvise_unlock(mm, behavior); - -+out: - ret = (total_len - iov_iter_count(iter)) ? : ret; - - return ret; diff --git a/debian/patches/patchset-pf/fixes/0019-video-screen_info-Relocate-framebuffers-behind-PCI-b.patch b/debian/patches/patchset-pf/fixes/0019-video-screen_info-Relocate-framebuffers-behind-PCI-b.patch deleted file mode 100644 index 7c07d45..0000000 --- a/debian/patches/patchset-pf/fixes/0019-video-screen_info-Relocate-framebuffers-behind-PCI-b.patch +++ /dev/null @@ -1,164 +0,0 @@ -From f0ab226d0eae3aa7e26524efc040026a65ead640 Mon Sep 17 00:00:00 2001 -From: Thomas Zimmermann -Date: Wed, 28 May 2025 10:02:08 +0200 -Subject: video: screen_info: Relocate framebuffers behind PCI bridges - -Apply PCI host-bridge window offsets to screen_info framebuffers. Fixes -invalid access to I/O memory. - -Resources behind a PCI host bridge can be relocated by a certain offset -in the kernel's CPU address range used for I/O. The framebuffer memory -range stored in screen_info refers to the CPU addresses as seen during -boot (where the offset is 0). During boot up, firmware may assign a -different memory offset to the PCI host bridge and thereby relocating -the framebuffer address of the PCI graphics device as seen by the kernel. -The information in screen_info must be updated as well. - -The helper pcibios_bus_to_resource() performs the relocation of the -screen_info's framebuffer resource (given in PCI bus addresses). The -result matches the I/O-memory resource of the PCI graphics device (given -in CPU addresses). As before, we store away the information necessary to -later update the information in screen_info itself. - -Commit 78aa89d1dfba ("firmware/sysfb: Update screen_info for relocated -EFI framebuffers") added the code for updating screen_info. It is based -on similar functionality that pre-existed in efifb. Efifb uses a pointer -to the PCI resource, while the newer code does a memcpy of the region. -Hence efifb sees any updates to the PCI resource and avoids the issue. - -v3: -- Only use struct pci_bus_region for PCI bus addresses (Bjorn) -- Clarify address semantics in commit messages and comments (Bjorn) -v2: -- Fixed tags (Takashi, Ivan) -- Updated information on efifb - -Signed-off-by: Thomas Zimmermann -Reviewed-by: Javier Martinez Canillas -Reported-by: "Ivan T. Ivanov" -Closes: https://bugzilla.suse.com/show_bug.cgi?id=1240696 -Tested-by: "Ivan T. Ivanov" -Fixes: 78aa89d1dfba ("firmware/sysfb: Update screen_info for relocated EFI framebuffers") -Cc: dri-devel@lists.freedesktop.org -Cc: # v6.9+ -Link: https://lore.kernel.org/r/20250528080234.7380-1-tzimmermann@suse.de ---- - drivers/video/screen_info_pci.c | 79 +++++++++++++++++++++------------ - 1 file changed, 50 insertions(+), 29 deletions(-) - ---- a/drivers/video/screen_info_pci.c -+++ b/drivers/video/screen_info_pci.c -@@ -7,8 +7,8 @@ - - static struct pci_dev *screen_info_lfb_pdev; - static size_t screen_info_lfb_bar; --static resource_size_t screen_info_lfb_offset; --static struct resource screen_info_lfb_res = DEFINE_RES_MEM(0, 0); -+static resource_size_t screen_info_lfb_res_start; // original start of resource -+static resource_size_t screen_info_lfb_offset; // framebuffer offset within resource - - static bool __screen_info_relocation_is_valid(const struct screen_info *si, struct resource *pr) - { -@@ -31,7 +31,7 @@ void screen_info_apply_fixups(void) - if (screen_info_lfb_pdev) { - struct resource *pr = &screen_info_lfb_pdev->resource[screen_info_lfb_bar]; - -- if (pr->start != screen_info_lfb_res.start) { -+ if (pr->start != screen_info_lfb_res_start) { - if (__screen_info_relocation_is_valid(si, pr)) { - /* - * Only update base if we have an actual -@@ -47,46 +47,67 @@ void screen_info_apply_fixups(void) - } - } - -+static int __screen_info_lfb_pci_bus_region(const struct screen_info *si, unsigned int type, -+ struct pci_bus_region *r) -+{ -+ u64 base, size; -+ -+ base = __screen_info_lfb_base(si); -+ if (!base) -+ return -EINVAL; -+ -+ size = __screen_info_lfb_size(si, type); -+ if (!size) -+ return -EINVAL; -+ -+ r->start = base; -+ r->end = base + size - 1; -+ -+ return 0; -+} -+ - static void screen_info_fixup_lfb(struct pci_dev *pdev) - { - unsigned int type; -- struct resource res[SCREEN_INFO_MAX_RESOURCES]; -- size_t i, numres; -+ struct pci_bus_region bus_region; - int ret; -+ struct resource r = { -+ .flags = IORESOURCE_MEM, -+ }; -+ const struct resource *pr; - const struct screen_info *si = &screen_info; - - if (screen_info_lfb_pdev) - return; // already found - - type = screen_info_video_type(si); -- if (type != VIDEO_TYPE_EFI) -- return; // only applies to EFI -+ if (!__screen_info_has_lfb(type)) -+ return; // only applies to EFI; maybe VESA - -- ret = screen_info_resources(si, res, ARRAY_SIZE(res)); -+ ret = __screen_info_lfb_pci_bus_region(si, type, &bus_region); - if (ret < 0) - return; -- numres = ret; - -- for (i = 0; i < numres; ++i) { -- struct resource *r = &res[i]; -- const struct resource *pr; -- -- if (!(r->flags & IORESOURCE_MEM)) -- continue; -- pr = pci_find_resource(pdev, r); -- if (!pr) -- continue; -- -- /* -- * We've found a PCI device with the framebuffer -- * resource. Store away the parameters to track -- * relocation of the framebuffer aperture. -- */ -- screen_info_lfb_pdev = pdev; -- screen_info_lfb_bar = pr - pdev->resource; -- screen_info_lfb_offset = r->start - pr->start; -- memcpy(&screen_info_lfb_res, r, sizeof(screen_info_lfb_res)); -- } -+ /* -+ * Translate the PCI bus address to resource. Account -+ * for an offset if the framebuffer is behind a PCI host -+ * bridge. -+ */ -+ pcibios_bus_to_resource(pdev->bus, &r, &bus_region); -+ -+ pr = pci_find_resource(pdev, &r); -+ if (!pr) -+ return; -+ -+ /* -+ * We've found a PCI device with the framebuffer -+ * resource. Store away the parameters to track -+ * relocation of the framebuffer aperture. -+ */ -+ screen_info_lfb_pdev = pdev; -+ screen_info_lfb_bar = pr - pdev->resource; -+ screen_info_lfb_offset = r.start - pr->start; -+ screen_info_lfb_res_start = bus_region.start; - } - DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY, 16, - screen_info_fixup_lfb); diff --git a/debian/patches/patchset-pf/fixes/0020-sysfb-Fix-screen_info-type-check-for-VGA.patch b/debian/patches/patchset-pf/fixes/0020-sysfb-Fix-screen_info-type-check-for-VGA.patch deleted file mode 100644 index 1c16ea3..0000000 --- a/debian/patches/patchset-pf/fixes/0020-sysfb-Fix-screen_info-type-check-for-VGA.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 717bcb42b8cd4119c88249fbfc26d08e25a2ca24 Mon Sep 17 00:00:00 2001 -From: Thomas Zimmermann -Date: Tue, 3 Jun 2025 17:48:20 +0200 -Subject: sysfb: Fix screen_info type check for VGA -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Use the helper screen_info_video_type() to get the framebuffer -type from struct screen_info. Handle supported values in sorted -switch statement. - -Reading orig_video_isVGA is unreliable. On most systems it is a -VIDEO_TYPE_ constant. On some systems with VGA it is simply set -to 1 to signal the presence of a VGA output. See vga_probe() for -an example. Retrieving the screen_info type with the helper -screen_info_video_type() detects these cases and returns the -appropriate VIDEO_TYPE_ constant. For VGA, sysfb creates a device -named "vga-framebuffer". - -The sysfb code has been taken from vga16fb, where it likely didn't -work correctly either. With this bugfix applied, vga16fb loads for -compatible vga-framebuffer devices. - -Fixes: 0db5b61e0dc0 ("fbdev/vga16fb: Create EGA/VGA devices in sysfb code") -Cc: Thomas Zimmermann -Cc: Javier Martinez Canillas -Cc: Alex Deucher -Cc: Tzung-Bi Shih -Cc: Helge Deller -Cc: "Uwe Kleine-König" -Cc: Zsolt Kajtar -Cc: # v6.1+ -Signed-off-by: Thomas Zimmermann -Reviewed-by: Tzung-Bi Shih -Reviewed-by: Javier Martinez Canillas -Link: https://lore.kernel.org/r/20250603154838.401882-1-tzimmermann@suse.de ---- - drivers/firmware/sysfb.c | 26 ++++++++++++++++++-------- - 1 file changed, 18 insertions(+), 8 deletions(-) - ---- a/drivers/firmware/sysfb.c -+++ b/drivers/firmware/sysfb.c -@@ -143,6 +143,7 @@ static __init int sysfb_init(void) - { - struct screen_info *si = &screen_info; - struct device *parent; -+ unsigned int type; - struct simplefb_platform_data mode; - const char *name; - bool compatible; -@@ -170,17 +171,26 @@ static __init int sysfb_init(void) - goto put_device; - } - -+ type = screen_info_video_type(si); -+ - /* if the FB is incompatible, create a legacy framebuffer device */ -- if (si->orig_video_isVGA == VIDEO_TYPE_EFI) -- name = "efi-framebuffer"; -- else if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) -- name = "vesa-framebuffer"; -- else if (si->orig_video_isVGA == VIDEO_TYPE_VGAC) -- name = "vga-framebuffer"; -- else if (si->orig_video_isVGA == VIDEO_TYPE_EGAC) -+ switch (type) { -+ case VIDEO_TYPE_EGAC: - name = "ega-framebuffer"; -- else -+ break; -+ case VIDEO_TYPE_VGAC: -+ name = "vga-framebuffer"; -+ break; -+ case VIDEO_TYPE_VLFB: -+ name = "vesa-framebuffer"; -+ break; -+ case VIDEO_TYPE_EFI: -+ name = "efi-framebuffer"; -+ break; -+ default: - name = "platform-framebuffer"; -+ break; -+ } - - pd = platform_device_alloc(name, 0); - if (!pd) { diff --git a/debian/patches/patchset-pf/fixes/0021-watchdog-fix-watchdog-may-detect-false-positive-of-s.patch b/debian/patches/patchset-pf/fixes/0021-watchdog-fix-watchdog-may-detect-false-positive-of-s.patch deleted file mode 100644 index d1ef618..0000000 --- a/debian/patches/patchset-pf/fixes/0021-watchdog-fix-watchdog-may-detect-false-positive-of-s.patch +++ /dev/null @@ -1,200 +0,0 @@ -From 08b1e02fc44abc04d813dbc827812db9ebca0dad Mon Sep 17 00:00:00 2001 -From: Luo Gengkun -Date: Mon, 21 Apr 2025 03:50:21 +0000 -Subject: watchdog: fix watchdog may detect false positive of softlockup - -When updating `watchdog_thresh`, there is a race condition between writing -the new `watchdog_thresh` value and stopping the old watchdog timer. If -the old timer triggers during this window, it may falsely detect a -softlockup due to the old interval and the new `watchdog_thresh` value -being used. The problem can be described as follow: - - # We asuume previous watchdog_thresh is 60, so the watchdog timer is - # coming every 24s. -echo 10 > /proc/sys/kernel/watchdog_thresh (User space) -| -+------>+ update watchdog_thresh (We are in kernel now) - | - | # using old interval and new `watchdog_thresh` - +------>+ watchdog hrtimer (irq context: detect softlockup) - | - | - +-------+ - | - | - + softlockup_stop_all - -To fix this problem, introduce a shadow variable for `watchdog_thresh`. -The update to the actual `watchdog_thresh` is delayed until after the old -timer is stopped, preventing false positives. - -The following testcase may help to understand this problem. - ---------------------------------------------- -echo RT_RUNTIME_SHARE > /sys/kernel/debug/sched/features -echo -1 > /proc/sys/kernel/sched_rt_runtime_us -echo 0 > /sys/kernel/debug/sched/fair_server/cpu3/runtime -echo 60 > /proc/sys/kernel/watchdog_thresh -taskset -c 3 chrt -r 99 /bin/bash -c "while true;do true; done" & -echo 10 > /proc/sys/kernel/watchdog_thresh & ---------------------------------------------- - -The test case above first removes the throttling restrictions for -real-time tasks. It then sets watchdog_thresh to 60 and executes a -real-time task ,a simple while(1) loop, on cpu3. Consequently, the final -command gets blocked because the presence of this real-time thread -prevents kworker:3 from being selected by the scheduler. This eventually -triggers a softlockup detection on cpu3 due to watchdog_timer_fn operating -with inconsistent variable - using both the old interval and the updated -watchdog_thresh simultaneously. - -[nysal@linux.ibm.com: fix the SOFTLOCKUP_DETECTOR=n case] - Link: https://lkml.kernel.org/r/20250502111120.282690-1-nysal@linux.ibm.com -Link: https://lkml.kernel.org/r/20250421035021.3507649-1-luogengkun@huaweicloud.com -Signed-off-by: Luo Gengkun -Signed-off-by: Nysal Jan K.A. -Cc: Doug Anderson -Cc: Joel Granados -Cc: Song Liu -Cc: Thomas Gleinxer -Cc: "Nysal Jan K.A." -Cc: Venkat Rao Bagalkote -Cc: -Signed-off-by: Andrew Morton ---- - kernel/watchdog.c | 41 +++++++++++++++++++++++++++-------------- - 1 file changed, 27 insertions(+), 14 deletions(-) - ---- a/kernel/watchdog.c -+++ b/kernel/watchdog.c -@@ -47,6 +47,7 @@ int __read_mostly watchdog_user_enabled - static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT; - static int __read_mostly watchdog_softlockup_user_enabled = 1; - int __read_mostly watchdog_thresh = 10; -+static int __read_mostly watchdog_thresh_next; - static int __read_mostly watchdog_hardlockup_available; - - struct cpumask watchdog_cpumask __read_mostly; -@@ -870,12 +871,20 @@ int lockup_detector_offline_cpu(unsigned - return 0; - } - --static void __lockup_detector_reconfigure(void) -+static void __lockup_detector_reconfigure(bool thresh_changed) - { - cpus_read_lock(); - watchdog_hardlockup_stop(); - - softlockup_stop_all(); -+ /* -+ * To prevent watchdog_timer_fn from using the old interval and -+ * the new watchdog_thresh at the same time, which could lead to -+ * false softlockup reports, it is necessary to update the -+ * watchdog_thresh after the softlockup is completed. -+ */ -+ if (thresh_changed) -+ watchdog_thresh = READ_ONCE(watchdog_thresh_next); - set_sample_period(); - lockup_detector_update_enable(); - if (watchdog_enabled && watchdog_thresh) -@@ -888,7 +897,7 @@ static void __lockup_detector_reconfigur - void lockup_detector_reconfigure(void) - { - mutex_lock(&watchdog_mutex); -- __lockup_detector_reconfigure(); -+ __lockup_detector_reconfigure(false); - mutex_unlock(&watchdog_mutex); - } - -@@ -908,27 +917,29 @@ static __init void lockup_detector_setup - return; - - mutex_lock(&watchdog_mutex); -- __lockup_detector_reconfigure(); -+ __lockup_detector_reconfigure(false); - softlockup_initialized = true; - mutex_unlock(&watchdog_mutex); - } - - #else /* CONFIG_SOFTLOCKUP_DETECTOR */ --static void __lockup_detector_reconfigure(void) -+static void __lockup_detector_reconfigure(bool thresh_changed) - { - cpus_read_lock(); - watchdog_hardlockup_stop(); -+ if (thresh_changed) -+ watchdog_thresh = READ_ONCE(watchdog_thresh_next); - lockup_detector_update_enable(); - watchdog_hardlockup_start(); - cpus_read_unlock(); - } - void lockup_detector_reconfigure(void) - { -- __lockup_detector_reconfigure(); -+ __lockup_detector_reconfigure(false); - } - static inline void lockup_detector_setup(void) - { -- __lockup_detector_reconfigure(); -+ __lockup_detector_reconfigure(false); - } - #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ - -@@ -946,11 +957,11 @@ void lockup_detector_soft_poweroff(void) - #ifdef CONFIG_SYSCTL - - /* Propagate any changes to the watchdog infrastructure */ --static void proc_watchdog_update(void) -+static void proc_watchdog_update(bool thresh_changed) - { - /* Remove impossible cpus to keep sysctl output clean. */ - cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); -- __lockup_detector_reconfigure(); -+ __lockup_detector_reconfigure(thresh_changed); - } - - /* -@@ -984,7 +995,7 @@ static int proc_watchdog_common(int whic - } else { - err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (!err && old != READ_ONCE(*param)) -- proc_watchdog_update(); -+ proc_watchdog_update(false); - } - mutex_unlock(&watchdog_mutex); - return err; -@@ -1035,11 +1046,13 @@ static int proc_watchdog_thresh(const st - - mutex_lock(&watchdog_mutex); - -- old = READ_ONCE(watchdog_thresh); -+ watchdog_thresh_next = READ_ONCE(watchdog_thresh); -+ -+ old = watchdog_thresh_next; - err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - -- if (!err && write && old != READ_ONCE(watchdog_thresh)) -- proc_watchdog_update(); -+ if (!err && write && old != READ_ONCE(watchdog_thresh_next)) -+ proc_watchdog_update(true); - - mutex_unlock(&watchdog_mutex); - return err; -@@ -1060,7 +1073,7 @@ static int proc_watchdog_cpumask(const s - - err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); - if (!err && write) -- proc_watchdog_update(); -+ proc_watchdog_update(false); - - mutex_unlock(&watchdog_mutex); - return err; -@@ -1080,7 +1093,7 @@ static const struct ctl_table watchdog_s - }, - { - .procname = "watchdog_thresh", -- .data = &watchdog_thresh, -+ .data = &watchdog_thresh_next, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_watchdog_thresh, diff --git a/debian/patches/patchset-pf/fixes/0022-sched-rt-Fix-race-in-push_rt_task.patch b/debian/patches/patchset-pf/fixes/0022-sched-rt-Fix-race-in-push_rt_task.patch deleted file mode 100644 index e4a7a71..0000000 --- a/debian/patches/patchset-pf/fixes/0022-sched-rt-Fix-race-in-push_rt_task.patch +++ /dev/null @@ -1,288 +0,0 @@ -From ff8503c4997332bb5708c3b77f8a19f334e947a9 Mon Sep 17 00:00:00 2001 -From: Harshit Agarwal -Date: Tue, 25 Feb 2025 18:05:53 +0000 -Subject: sched/rt: Fix race in push_rt_task -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Overview -======== -When a CPU chooses to call push_rt_task and picks a task to push to -another CPU's runqueue then it will call find_lock_lowest_rq method -which would take a double lock on both CPUs' runqueues. If one of the -locks aren't readily available, it may lead to dropping the current -runqueue lock and reacquiring both the locks at once. During this window -it is possible that the task is already migrated and is running on some -other CPU. These cases are already handled. However, if the task is -migrated and has already been executed and another CPU is now trying to -wake it up (ttwu) such that it is queued again on the runqeue -(on_rq is 1) and also if the task was run by the same CPU, then the -current checks will pass even though the task was migrated out and is no -longer in the pushable tasks list. - -Crashes -======= -This bug resulted in quite a few flavors of crashes triggering kernel -panics with various crash signatures such as assert failures, page -faults, null pointer dereferences, and queue corruption errors all -coming from scheduler itself. - -Some of the crashes: --> kernel BUG at kernel/sched/rt.c:1616! BUG_ON(idx >= MAX_RT_PRIO) - Call Trace: - ? __die_body+0x1a/0x60 - ? die+0x2a/0x50 - ? do_trap+0x85/0x100 - ? pick_next_task_rt+0x6e/0x1d0 - ? do_error_trap+0x64/0xa0 - ? pick_next_task_rt+0x6e/0x1d0 - ? exc_invalid_op+0x4c/0x60 - ? pick_next_task_rt+0x6e/0x1d0 - ? asm_exc_invalid_op+0x12/0x20 - ? pick_next_task_rt+0x6e/0x1d0 - __schedule+0x5cb/0x790 - ? update_ts_time_stats+0x55/0x70 - schedule_idle+0x1e/0x40 - do_idle+0x15e/0x200 - cpu_startup_entry+0x19/0x20 - start_secondary+0x117/0x160 - secondary_startup_64_no_verify+0xb0/0xbb - --> BUG: kernel NULL pointer dereference, address: 00000000000000c0 - Call Trace: - ? __die_body+0x1a/0x60 - ? no_context+0x183/0x350 - ? __warn+0x8a/0xe0 - ? exc_page_fault+0x3d6/0x520 - ? asm_exc_page_fault+0x1e/0x30 - ? pick_next_task_rt+0xb5/0x1d0 - ? pick_next_task_rt+0x8c/0x1d0 - __schedule+0x583/0x7e0 - ? update_ts_time_stats+0x55/0x70 - schedule_idle+0x1e/0x40 - do_idle+0x15e/0x200 - cpu_startup_entry+0x19/0x20 - start_secondary+0x117/0x160 - secondary_startup_64_no_verify+0xb0/0xbb - --> BUG: unable to handle page fault for address: ffff9464daea5900 - kernel BUG at kernel/sched/rt.c:1861! BUG_ON(rq->cpu != task_cpu(p)) - --> kernel BUG at kernel/sched/rt.c:1055! BUG_ON(!rq->nr_running) - Call Trace: - ? __die_body+0x1a/0x60 - ? die+0x2a/0x50 - ? do_trap+0x85/0x100 - ? dequeue_top_rt_rq+0xa2/0xb0 - ? do_error_trap+0x64/0xa0 - ? dequeue_top_rt_rq+0xa2/0xb0 - ? exc_invalid_op+0x4c/0x60 - ? dequeue_top_rt_rq+0xa2/0xb0 - ? asm_exc_invalid_op+0x12/0x20 - ? dequeue_top_rt_rq+0xa2/0xb0 - dequeue_rt_entity+0x1f/0x70 - dequeue_task_rt+0x2d/0x70 - __schedule+0x1a8/0x7e0 - ? blk_finish_plug+0x25/0x40 - schedule+0x3c/0xb0 - futex_wait_queue_me+0xb6/0x120 - futex_wait+0xd9/0x240 - do_futex+0x344/0xa90 - ? get_mm_exe_file+0x30/0x60 - ? audit_exe_compare+0x58/0x70 - ? audit_filter_rules.constprop.26+0x65e/0x1220 - __x64_sys_futex+0x148/0x1f0 - do_syscall_64+0x30/0x80 - entry_SYSCALL_64_after_hwframe+0x62/0xc7 - --> BUG: unable to handle page fault for address: ffff8cf3608bc2c0 - Call Trace: - ? __die_body+0x1a/0x60 - ? no_context+0x183/0x350 - ? spurious_kernel_fault+0x171/0x1c0 - ? exc_page_fault+0x3b6/0x520 - ? plist_check_list+0x15/0x40 - ? plist_check_list+0x2e/0x40 - ? asm_exc_page_fault+0x1e/0x30 - ? _cond_resched+0x15/0x30 - ? futex_wait_queue_me+0xc8/0x120 - ? futex_wait+0xd9/0x240 - ? try_to_wake_up+0x1b8/0x490 - ? futex_wake+0x78/0x160 - ? do_futex+0xcd/0xa90 - ? plist_check_list+0x15/0x40 - ? plist_check_list+0x2e/0x40 - ? plist_del+0x6a/0xd0 - ? plist_check_list+0x15/0x40 - ? plist_check_list+0x2e/0x40 - ? dequeue_pushable_task+0x20/0x70 - ? __schedule+0x382/0x7e0 - ? asm_sysvec_reschedule_ipi+0xa/0x20 - ? schedule+0x3c/0xb0 - ? exit_to_user_mode_prepare+0x9e/0x150 - ? irqentry_exit_to_user_mode+0x5/0x30 - ? asm_sysvec_reschedule_ipi+0x12/0x20 - -Above are some of the common examples of the crashes that were observed -due to this issue. - -Details -======= -Let's look at the following scenario to understand this race. - -1) CPU A enters push_rt_task - a) CPU A has chosen next_task = task p. - b) CPU A calls find_lock_lowest_rq(Task p, CPU Z’s rq). - c) CPU A identifies CPU X as a destination CPU (X < Z). - d) CPU A enters double_lock_balance(CPU Z’s rq, CPU X’s rq). - e) Since X is lower than Z, CPU A unlocks CPU Z’s rq. Someone else has - locked CPU X’s rq, and thus, CPU A must wait. - -2) At CPU Z - a) Previous task has completed execution and thus, CPU Z enters - schedule, locks its own rq after CPU A releases it. - b) CPU Z dequeues previous task and begins executing task p. - c) CPU Z unlocks its rq. - d) Task p yields the CPU (ex. by doing IO or waiting to acquire a - lock) which triggers the schedule function on CPU Z. - e) CPU Z enters schedule again, locks its own rq, and dequeues task p. - f) As part of dequeue, it sets p.on_rq = 0 and unlocks its rq. - -3) At CPU B - a) CPU B enters try_to_wake_up with input task p. - b) Since CPU Z dequeued task p, p.on_rq = 0, and CPU B updates - B.state = WAKING. - c) CPU B via select_task_rq determines CPU Y as the target CPU. - -4) The race - a) CPU A acquires CPU X’s lock and relocks CPU Z. - b) CPU A reads task p.cpu = Z and incorrectly concludes task p is - still on CPU Z. - c) CPU A failed to notice task p had been dequeued from CPU Z while - CPU A was waiting for locks in double_lock_balance. If CPU A knew - that task p had been dequeued, it would return NULL forcing - push_rt_task to give up the task p's migration. - d) CPU B updates task p.cpu = Y and calls ttwu_queue. - e) CPU B locks Ys rq. CPU B enqueues task p onto Y and sets task - p.on_rq = 1. - f) CPU B unlocks CPU Y, triggering memory synchronization. - g) CPU A reads task p.on_rq = 1, cementing its assumption that task p - has not migrated. - h) CPU A decides to migrate p to CPU X. - -This leads to A dequeuing p from Y's queue and various crashes down the -line. - -Solution -======== -The solution here is fairly simple. After obtaining the lock (at 4a), -the check is enhanced to make sure that the task is still at the head of -the pushable tasks list. If not, then it is anyway not suitable for -being pushed out. - -Testing -======= -The fix is tested on a cluster of 3 nodes, where the panics due to this -are hit every couple of days. A fix similar to this was deployed on such -cluster and was stable for more than 30 days. - -Co-developed-by: Jon Kohler -Signed-off-by: Jon Kohler -Co-developed-by: Gauri Patwardhan -Signed-off-by: Gauri Patwardhan -Co-developed-by: Rahul Chunduru -Signed-off-by: Rahul Chunduru -Signed-off-by: Harshit Agarwal -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: "Steven Rostedt (Google)" -Reviewed-by: Phil Auld -Tested-by: Will Ton -Cc: stable@vger.kernel.org -Link: https://lore.kernel.org/r/20250225180553.167995-1-harshit@nutanix.com ---- - kernel/sched/rt.c | 54 +++++++++++++++++++++++------------------------ - 1 file changed, 26 insertions(+), 28 deletions(-) - ---- a/kernel/sched/rt.c -+++ b/kernel/sched/rt.c -@@ -1883,6 +1883,27 @@ static int find_lowest_rq(struct task_st - return -1; - } - -+static struct task_struct *pick_next_pushable_task(struct rq *rq) -+{ -+ struct task_struct *p; -+ -+ if (!has_pushable_tasks(rq)) -+ return NULL; -+ -+ p = plist_first_entry(&rq->rt.pushable_tasks, -+ struct task_struct, pushable_tasks); -+ -+ BUG_ON(rq->cpu != task_cpu(p)); -+ BUG_ON(task_current(rq, p)); -+ BUG_ON(task_current_donor(rq, p)); -+ BUG_ON(p->nr_cpus_allowed <= 1); -+ -+ BUG_ON(!task_on_rq_queued(p)); -+ BUG_ON(!rt_task(p)); -+ -+ return p; -+} -+ - /* Will lock the rq it finds */ - static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) - { -@@ -1913,18 +1934,16 @@ static struct rq *find_lock_lowest_rq(st - /* - * We had to unlock the run queue. In - * the mean time, task could have -- * migrated already or had its affinity changed. -- * Also make sure that it wasn't scheduled on its rq. -+ * migrated already or had its affinity changed, -+ * therefore check if the task is still at the -+ * head of the pushable tasks list. - * It is possible the task was scheduled, set - * "migrate_disabled" and then got preempted, so we must - * check the task migration disable flag here too. - */ -- if (unlikely(task_rq(task) != rq || -+ if (unlikely(is_migration_disabled(task) || - !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) || -- task_on_cpu(rq, task) || -- !rt_task(task) || -- is_migration_disabled(task) || -- !task_on_rq_queued(task))) { -+ task != pick_next_pushable_task(rq))) { - - double_unlock_balance(rq, lowest_rq); - lowest_rq = NULL; -@@ -1944,27 +1963,6 @@ static struct rq *find_lock_lowest_rq(st - return lowest_rq; - } - --static struct task_struct *pick_next_pushable_task(struct rq *rq) --{ -- struct task_struct *p; -- -- if (!has_pushable_tasks(rq)) -- return NULL; -- -- p = plist_first_entry(&rq->rt.pushable_tasks, -- struct task_struct, pushable_tasks); -- -- BUG_ON(rq->cpu != task_cpu(p)); -- BUG_ON(task_current(rq, p)); -- BUG_ON(task_current_donor(rq, p)); -- BUG_ON(p->nr_cpus_allowed <= 1); -- -- BUG_ON(!task_on_rq_queued(p)); -- BUG_ON(!rt_task(p)); -- -- return p; --} -- - /* - * If the current CPU has more than one RT task, see if the non - * running task can migrate over to a CPU that is running a task diff --git a/debian/patches/patchset-pf/fixes/0023-sched-fair-Adhere-to-place_entity-constraints.patch b/debian/patches/patchset-pf/fixes/0023-sched-fair-Adhere-to-place_entity-constraints.patch deleted file mode 100644 index 37360d5..0000000 --- a/debian/patches/patchset-pf/fixes/0023-sched-fair-Adhere-to-place_entity-constraints.patch +++ /dev/null @@ -1,62 +0,0 @@ -From e02cbdc12bf63da363d7e3391376819241d67fbe Mon Sep 17 00:00:00 2001 -From: Peter Zijlstra -Date: Tue, 28 Jan 2025 15:39:49 +0100 -Subject: sched/fair: Adhere to place_entity() constraints - -Mike reports that commit 6d71a9c61604 ("sched/fair: Fix EEVDF entity -placement bug causing scheduling lag") relies on commit 4423af84b297 -("sched/fair: optimize the PLACE_LAG when se->vlag is zero") to not -trip a WARN in place_entity(). - -What happens is that the lag of the very last entity is 0 per -definition -- the average of one element matches the value of that -element. Therefore place_entity() will match the condition skipping -the lag adjustment: - - if (sched_feat(PLACE_LAG) && cfs_rq->nr_queued && se->vlag) { - -Without the 'se->vlag' condition -- it will attempt to adjust the zero -lag even though we're inserting into an empty tree. - -Notably, we should have failed the 'cfs_rq->nr_queued' condition, but -don't because they didn't get updated. - -Additionally, move update_load_add() after placement() as is -consistent with other place_entity() users -- this change is -non-functional, place_entity() does not use cfs_rq->load. - -Fixes: 6d71a9c61604 ("sched/fair: Fix EEVDF entity placement bug causing scheduling lag") -Signed-off-by: Peter Zijlstra (Intel) -Reported-by: Mike Galbraith -Signed-off-by: "Peter Zijlstra (Intel)" -Signed-off-by: Mike Galbraith -Signed-off-by: Peter Zijlstra (Intel) -Cc: stable@vger.kernel.org -Link: https://lore.kernel.org/r/c216eb4ef0e0e0029c600aefc69d56681cee5581.camel@gmx.de ---- - kernel/sched/fair.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -3808,6 +3808,7 @@ static void reweight_entity(struct cfs_r - update_entity_lag(cfs_rq, se); - se->deadline -= se->vruntime; - se->rel_deadline = 1; -+ cfs_rq->nr_queued--; - if (!curr) - __dequeue_entity(cfs_rq, se); - update_load_sub(&cfs_rq->load, se->load.weight); -@@ -3834,10 +3835,11 @@ static void reweight_entity(struct cfs_r - - enqueue_load_avg(cfs_rq, se); - if (se->on_rq) { -- update_load_add(&cfs_rq->load, se->load.weight); - place_entity(cfs_rq, se, 0); -+ update_load_add(&cfs_rq->load, se->load.weight); - if (!curr) - __enqueue_entity(cfs_rq, se); -+ cfs_rq->nr_queued++; - - /* - * The entity's vruntime has been adjusted, so let's check diff --git a/debian/patches/patchset-pf/fixes/0024-alloc_tag-handle-module-codetag-load-errors-as-modul.patch b/debian/patches/patchset-pf/fixes/0024-alloc_tag-handle-module-codetag-load-errors-as-modul.patch deleted file mode 100644 index 87eb717..0000000 --- a/debian/patches/patchset-pf/fixes/0024-alloc_tag-handle-module-codetag-load-errors-as-modul.patch +++ /dev/null @@ -1,184 +0,0 @@ -From 7257e4f8df6b5783978ab06063fc8529ee2631d5 Mon Sep 17 00:00:00 2001 -From: Suren Baghdasaryan -Date: Wed, 21 May 2025 09:06:02 -0700 -Subject: alloc_tag: handle module codetag load errors as module load failures - -Failures inside codetag_load_module() are currently ignored. As a result -an error there would not cause a module load failure and freeing of the -associated resources. Correct this behavior by propagating the error code -to the caller and handling possible errors. With this change, error to -allocate percpu counters, which happens at this stage, will not be ignored -and will cause a module load failure and freeing of resources. With this -change we also do not need to disable memory allocation profiling when -this error happens, instead we fail to load the module. - -Link: https://lkml.kernel.org/r/20250521160602.1940771-1-surenb@google.com -Fixes: 10075262888b ("alloc_tag: allocate percpu counters for module tags dynamically") -Signed-off-by: Suren Baghdasaryan -Reported-by: Casey Chen -Closes: https://lore.kernel.org/all/20250520231620.15259-1-cachen@purestorage.com/ -Cc: Daniel Gomez -Cc: David Wang <00107082@163.com> -Cc: Kent Overstreet -Cc: Luis Chamberalin -Cc: Petr Pavlu -Cc: Sami Tolvanen -Cc: -Signed-off-by: Andrew Morton ---- - include/linux/codetag.h | 8 ++++---- - kernel/module/main.c | 5 +++-- - lib/alloc_tag.c | 12 +++++++----- - lib/codetag.c | 34 +++++++++++++++++++++++++--------- - 4 files changed, 39 insertions(+), 20 deletions(-) - ---- a/include/linux/codetag.h -+++ b/include/linux/codetag.h -@@ -36,8 +36,8 @@ union codetag_ref { - struct codetag_type_desc { - const char *section; - size_t tag_size; -- void (*module_load)(struct module *mod, -- struct codetag *start, struct codetag *end); -+ int (*module_load)(struct module *mod, -+ struct codetag *start, struct codetag *end); - void (*module_unload)(struct module *mod, - struct codetag *start, struct codetag *end); - #ifdef CONFIG_MODULES -@@ -89,7 +89,7 @@ void *codetag_alloc_module_section(struc - unsigned long align); - void codetag_free_module_sections(struct module *mod); - void codetag_module_replaced(struct module *mod, struct module *new_mod); --void codetag_load_module(struct module *mod); -+int codetag_load_module(struct module *mod); - void codetag_unload_module(struct module *mod); - - #else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ -@@ -103,7 +103,7 @@ codetag_alloc_module_section(struct modu - unsigned long align) { return NULL; } - static inline void codetag_free_module_sections(struct module *mod) {} - static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} --static inline void codetag_load_module(struct module *mod) {} -+static inline int codetag_load_module(struct module *mod) { return 0; } - static inline void codetag_unload_module(struct module *mod) {} - - #endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ ---- a/kernel/module/main.c -+++ b/kernel/module/main.c -@@ -3399,11 +3399,12 @@ static int load_module(struct load_info - goto sysfs_cleanup; - } - -+ if (codetag_load_module(mod)) -+ goto sysfs_cleanup; -+ - /* Get rid of temporary copy. */ - free_copy(info, flags); - -- codetag_load_module(mod); -- - /* Done! */ - trace_module_load(mod); - ---- a/lib/alloc_tag.c -+++ b/lib/alloc_tag.c -@@ -618,15 +618,16 @@ out: - mas_unlock(&mas); - } - --static void load_module(struct module *mod, struct codetag *start, struct codetag *stop) -+static int load_module(struct module *mod, struct codetag *start, struct codetag *stop) - { - /* Allocate module alloc_tag percpu counters */ - struct alloc_tag *start_tag; - struct alloc_tag *stop_tag; - struct alloc_tag *tag; - -+ /* percpu counters for core allocations are already statically allocated */ - if (!mod) -- return; -+ return 0; - - start_tag = ct_to_alloc_tag(start); - stop_tag = ct_to_alloc_tag(stop); -@@ -638,12 +639,13 @@ static void load_module(struct module *m - free_percpu(tag->counters); - tag->counters = NULL; - } -- shutdown_mem_profiling(true); -- pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n", -+ pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s\n", - mod->name); -- break; -+ return -ENOMEM; - } - } -+ -+ return 0; - } - - static void replace_module(struct module *mod, struct module *new_mod) ---- a/lib/codetag.c -+++ b/lib/codetag.c -@@ -167,6 +167,7 @@ static int codetag_module_init(struct co - { - struct codetag_range range; - struct codetag_module *cmod; -+ int mod_id; - int err; - - range = get_section_range(mod, cttype->desc.section); -@@ -190,11 +191,20 @@ static int codetag_module_init(struct co - cmod->range = range; - - down_write(&cttype->mod_lock); -- err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); -- if (err >= 0) { -- cttype->count += range_size(cttype, &range); -- if (cttype->desc.module_load) -- cttype->desc.module_load(mod, range.start, range.stop); -+ mod_id = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL); -+ if (mod_id >= 0) { -+ if (cttype->desc.module_load) { -+ err = cttype->desc.module_load(mod, range.start, range.stop); -+ if (!err) -+ cttype->count += range_size(cttype, &range); -+ else -+ idr_remove(&cttype->mod_idr, mod_id); -+ } else { -+ cttype->count += range_size(cttype, &range); -+ err = 0; -+ } -+ } else { -+ err = mod_id; - } - up_write(&cttype->mod_lock); - -@@ -295,17 +305,23 @@ void codetag_module_replaced(struct modu - mutex_unlock(&codetag_lock); - } - --void codetag_load_module(struct module *mod) -+int codetag_load_module(struct module *mod) - { - struct codetag_type *cttype; -+ int ret = 0; - - if (!mod) -- return; -+ return 0; - - mutex_lock(&codetag_lock); -- list_for_each_entry(cttype, &codetag_types, link) -- codetag_module_init(cttype, mod); -+ list_for_each_entry(cttype, &codetag_types, link) { -+ ret = codetag_module_init(cttype, mod); -+ if (ret) -+ break; -+ } - mutex_unlock(&codetag_lock); -+ -+ return ret; - } - - void codetag_unload_module(struct module *mod) diff --git a/debian/patches/patchset-pf/fixes/0025-svcrdma-Unregister-the-device-if-svc_rdma_accept-fai.patch b/debian/patches/patchset-pf/fixes/0025-svcrdma-Unregister-the-device-if-svc_rdma_accept-fai.patch deleted file mode 100644 index 7432543..0000000 --- a/debian/patches/patchset-pf/fixes/0025-svcrdma-Unregister-the-device-if-svc_rdma_accept-fai.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 57fdc30dcdad60e3b868682cc1e77083c091aef5 Mon Sep 17 00:00:00 2001 -From: Chuck Lever -Date: Sun, 27 Apr 2025 12:39:59 -0400 -Subject: svcrdma: Unregister the device if svc_rdma_accept() fails - -To handle device removal, svc_rdma_accept() requests removal -notification for the underlying device when accepting a connection. -However svc_rdma_free() is not invoked if svc_rdma_accept() fails. -There needs to be a matching "unregister" in that case; otherwise -the device cannot be removed. - -Fixes: c4de97f7c454 ("svcrdma: Handle device removal outside of the CM event handler") -Cc: stable@vger.kernel.org -Reviewed-by: Zhu Yanjun -Signed-off-by: Chuck Lever ---- - net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/net/sunrpc/xprtrdma/svc_rdma_transport.c -+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c -@@ -577,6 +577,7 @@ static struct svc_xprt *svc_rdma_accept( - if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) - ib_destroy_qp(newxprt->sc_qp); - rdma_destroy_id(newxprt->sc_cm_id); -+ rpcrdma_rn_unregister(dev, &newxprt->sc_rn); - /* This call to put will destroy the transport */ - svc_xprt_put(&newxprt->sc_xprt); - return NULL; diff --git a/debian/patches/patchset-pf/fixes/0026-SUNRPC-Prevent-hang-on-NFS-mount-with-xprtsec-m-tls.patch b/debian/patches/patchset-pf/fixes/0026-SUNRPC-Prevent-hang-on-NFS-mount-with-xprtsec-m-tls.patch deleted file mode 100644 index cacfc0b..0000000 --- a/debian/patches/patchset-pf/fixes/0026-SUNRPC-Prevent-hang-on-NFS-mount-with-xprtsec-m-tls.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 92e99ba55ff0ce68ea7567331beda21861da2028 Mon Sep 17 00:00:00 2001 -From: Chuck Lever -Date: Wed, 21 May 2025 16:34:13 -0400 -Subject: SUNRPC: Prevent hang on NFS mount with xprtsec=[m]tls - -Engineers at Hammerspace noticed that sometimes mounting with -"xprtsec=tls" hangs for a minute or so, and then times out, even -when the NFS server is reachable and responsive. - -kTLS shuts off data_ready callbacks if strp->msg_ready is set to -mitigate data_ready callbacks when a full TLS record is not yet -ready to be read from the socket. - -Normally msg_ready is clear when the first TLS record arrives on -a socket. However, I observed that sometimes tls_setsockopt() sets -strp->msg_ready, and that prevents forward progress because -tls_data_ready() becomes a no-op. - -Moreover, Jakub says: "If there's a full record queued at the time -when [tlshd] passes the socket back to the kernel, it's up to the -reader to read the already queued data out." So SunRPC cannot -expect a data_ready call when ingress data is already waiting. - -Add an explicit poll after SunRPC's upper transport is set up to -pick up any data that arrived after the TLS handshake but before -transport set-up is complete. - -Reported-by: Steve Sears -Suggested-by: Jakub Kacinski -Fixes: 75eb6af7acdf ("SUNRPC: Add a TCP-with-TLS RPC transport class") -Tested-by: Mike Snitzer -Reviewed-by: Mike Snitzer -Cc: stable@vger.kernel.org -Signed-off-by: Chuck Lever -Signed-off-by: Anna Schumaker ---- - net/sunrpc/xprtsock.c | 5 +++++ - 1 file changed, 5 insertions(+) - ---- a/net/sunrpc/xprtsock.c -+++ b/net/sunrpc/xprtsock.c -@@ -2740,6 +2740,11 @@ static void xs_tcp_tls_setup_socket(stru - } - rpc_shutdown_client(lower_clnt); - -+ /* Check for ingress data that arrived before the socket's -+ * ->data_ready callback was set up. -+ */ -+ xs_poll_check_readable(upper_transport); -+ - out_unlock: - current_restore_flags(pflags, PF_MEMALLOC); - upper_transport->clnt = NULL; diff --git a/debian/patches/patchset-pf/fixes/0027-hv_netvsc-fix-potential-deadlock-in-netvsc_vf_setxdp.patch b/debian/patches/patchset-pf/fixes/0027-hv_netvsc-fix-potential-deadlock-in-netvsc_vf_setxdp.patch deleted file mode 100644 index de03a05..0000000 --- a/debian/patches/patchset-pf/fixes/0027-hv_netvsc-fix-potential-deadlock-in-netvsc_vf_setxdp.patch +++ /dev/null @@ -1,89 +0,0 @@ -From ac0c5ac5efecec7f731a1d80ec40ef3d34adc5ee Mon Sep 17 00:00:00 2001 -From: Saurabh Sengar -Date: Thu, 29 May 2025 03:18:30 -0700 -Subject: hv_netvsc: fix potential deadlock in netvsc_vf_setxdp() - -The MANA driver's probe registers netdevice via the following call chain: - -mana_probe() - register_netdev() - register_netdevice() - -register_netdevice() calls notifier callback for netvsc driver, -holding the netdev mutex via netdev_lock_ops(). - -Further this netvsc notifier callback end up attempting to acquire the -same lock again in dev_xdp_propagate() leading to deadlock. - -netvsc_netdev_event() - netvsc_vf_setxdp() - dev_xdp_propagate() - -This deadlock was not observed so far because net_shaper_ops was never set, -and thus the lock was effectively a no-op in this case. Fix this by using -netif_xdp_propagate() instead of dev_xdp_propagate() to avoid recursive -locking in this path. - -And, since no deadlock is observed on the other path which is via -netvsc_probe, add the lock exclusivly for that path. - -Also, clean up the unregistration path by removing the unnecessary call to -netvsc_vf_setxdp(), since unregister_netdevice_many_notify() already -performs this cleanup via dev_xdp_uninstall(). - -Fixes: 97246d6d21c2 ("net: hold netdev instance lock during ndo_bpf") -Cc: stable@vger.kernel.org -Signed-off-by: Saurabh Sengar -Tested-by: Erni Sri Satya Vennela -Reviewed-by: Haiyang Zhang -Reviewed-by: Subbaraya Sundeep -Link: https://patch.msgid.link/1748513910-23963-1-git-send-email-ssengar@linux.microsoft.com -Signed-off-by: Jakub Kicinski ---- - drivers/net/hyperv/netvsc_bpf.c | 2 +- - drivers/net/hyperv/netvsc_drv.c | 4 ++-- - net/core/dev.c | 1 + - 3 files changed, 4 insertions(+), 3 deletions(-) - ---- a/drivers/net/hyperv/netvsc_bpf.c -+++ b/drivers/net/hyperv/netvsc_bpf.c -@@ -183,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device * - xdp.command = XDP_SETUP_PROG; - xdp.prog = prog; - -- ret = dev_xdp_propagate(vf_netdev, &xdp); -+ ret = netif_xdp_propagate(vf_netdev, &xdp); - - if (ret && prog) - bpf_prog_put(prog); ---- a/drivers/net/hyperv/netvsc_drv.c -+++ b/drivers/net/hyperv/netvsc_drv.c -@@ -2462,8 +2462,6 @@ static int netvsc_unregister_vf(struct n - - netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - -- netvsc_vf_setxdp(vf_netdev, NULL); -- - reinit_completion(&net_device_ctx->vf_add); - netdev_rx_handler_unregister(vf_netdev); - netdev_upper_dev_unlink(vf_netdev, ndev); -@@ -2631,7 +2629,9 @@ static int netvsc_probe(struct hv_device - continue; - - netvsc_prepare_bonding(vf_netdev); -+ netdev_lock_ops(vf_netdev); - netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); -+ netdev_unlock_ops(vf_netdev); - __netvsc_vf_setup(net, vf_netdev); - break; - } ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -9863,6 +9863,7 @@ int netif_xdp_propagate(struct net_devic - - return dev->netdev_ops->ndo_bpf(dev, bpf); - } -+EXPORT_SYMBOL_GPL(netif_xdp_propagate); - - u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) - { diff --git a/debian/patches/patchset-pf/fixes/0028-net-clear-the-dst-when-changing-skb-protocol.patch b/debian/patches/patchset-pf/fixes/0028-net-clear-the-dst-when-changing-skb-protocol.patch deleted file mode 100644 index f780308..0000000 --- a/debian/patches/patchset-pf/fixes/0028-net-clear-the-dst-when-changing-skb-protocol.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 485c82a86fb97fb86cac303348c85b6cf71fd787 Mon Sep 17 00:00:00 2001 -From: Jakub Kicinski -Date: Mon, 9 Jun 2025 17:12:44 -0700 -Subject: net: clear the dst when changing skb protocol -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -A not-so-careful NAT46 BPF program can crash the kernel -if it indiscriminately flips ingress packets from v4 to v6: - - BUG: kernel NULL pointer dereference, address: 0000000000000000 - ip6_rcv_core (net/ipv6/ip6_input.c:190:20) - ipv6_rcv (net/ipv6/ip6_input.c:306:8) - process_backlog (net/core/dev.c:6186:4) - napi_poll (net/core/dev.c:6906:9) - net_rx_action (net/core/dev.c:7028:13) - do_softirq (kernel/softirq.c:462:3) - netif_rx (net/core/dev.c:5326:3) - dev_loopback_xmit (net/core/dev.c:4015:2) - ip_mc_finish_output (net/ipv4/ip_output.c:363:8) - NF_HOOK (./include/linux/netfilter.h:314:9) - ip_mc_output (net/ipv4/ip_output.c:400:5) - dst_output (./include/net/dst.h:459:9) - ip_local_out (net/ipv4/ip_output.c:130:9) - ip_send_skb (net/ipv4/ip_output.c:1496:8) - udp_send_skb (net/ipv4/udp.c:1040:8) - udp_sendmsg (net/ipv4/udp.c:1328:10) - -The output interface has a 4->6 program attached at ingress. -We try to loop the multicast skb back to the sending socket. -Ingress BPF runs as part of netif_rx(), pushes a valid v6 hdr -and changes skb->protocol to v6. We enter ip6_rcv_core which -tries to use skb_dst(). But the dst is still an IPv4 one left -after IPv4 mcast output. - -Clear the dst in all BPF helpers which change the protocol. -Try to preserve metadata dsts, those may carry non-routing -metadata. - -Cc: stable@vger.kernel.org -Reviewed-by: Maciej Żenczykowski -Acked-by: Daniel Borkmann -Fixes: d219df60a70e ("bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()") -Fixes: 1b00e0dfe7d0 ("bpf: update skb->protocol in bpf_skb_net_grow") -Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") -Reviewed-by: Willem de Bruijn -Link: https://patch.msgid.link/20250610001245.1981782-1-kuba@kernel.org -Signed-off-by: Jakub Kicinski ---- - net/core/filter.c | 19 +++++++++++++------ - 1 file changed, 13 insertions(+), 6 deletions(-) - ---- a/net/core/filter.c -+++ b/net/core/filter.c -@@ -3233,6 +3233,13 @@ static const struct bpf_func_proto bpf_s - .arg1_type = ARG_PTR_TO_CTX, - }; - -+static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) -+{ -+ skb->protocol = htons(proto); -+ if (skb_valid_dst(skb)) -+ skb_dst_drop(skb); -+} -+ - static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) - { - /* Caller already did skb_cow() with len as headroom, -@@ -3329,7 +3336,7 @@ static int bpf_skb_proto_4_to_6(struct s - } - } - -- skb->protocol = htons(ETH_P_IPV6); -+ bpf_skb_change_protocol(skb, ETH_P_IPV6); - skb_clear_hash(skb); - - return 0; -@@ -3359,7 +3366,7 @@ static int bpf_skb_proto_6_to_4(struct s - } - } - -- skb->protocol = htons(ETH_P_IP); -+ bpf_skb_change_protocol(skb, ETH_P_IP); - skb_clear_hash(skb); - - return 0; -@@ -3550,10 +3557,10 @@ static int bpf_skb_net_grow(struct sk_bu - /* Match skb->protocol to new outer l3 protocol */ - if (skb->protocol == htons(ETH_P_IP) && - flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) -- skb->protocol = htons(ETH_P_IPV6); -+ bpf_skb_change_protocol(skb, ETH_P_IPV6); - else if (skb->protocol == htons(ETH_P_IPV6) && - flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) -- skb->protocol = htons(ETH_P_IP); -+ bpf_skb_change_protocol(skb, ETH_P_IP); - } - - if (skb_is_gso(skb)) { -@@ -3606,10 +3613,10 @@ static int bpf_skb_net_shrink(struct sk_ - /* Match skb->protocol to new outer l3 protocol */ - if (skb->protocol == htons(ETH_P_IP) && - flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) -- skb->protocol = htons(ETH_P_IPV6); -+ bpf_skb_change_protocol(skb, ETH_P_IPV6); - else if (skb->protocol == htons(ETH_P_IPV6) && - flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) -- skb->protocol = htons(ETH_P_IP); -+ bpf_skb_change_protocol(skb, ETH_P_IP); - - if (skb_is_gso(skb)) { - struct skb_shared_info *shinfo = skb_shinfo(skb); diff --git a/debian/patches/patchset-pf/fixes/0029-net_sched-sch_sfq-reject-invalid-perturb-period.patch b/debian/patches/patchset-pf/fixes/0029-net_sched-sch_sfq-reject-invalid-perturb-period.patch deleted file mode 100644 index 1b1bb76..0000000 --- a/debian/patches/patchset-pf/fixes/0029-net_sched-sch_sfq-reject-invalid-perturb-period.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 2bf1f4a3adcecc53c1012e460d1412cece3747ce Mon Sep 17 00:00:00 2001 -From: Eric Dumazet -Date: Wed, 11 Jun 2025 08:35:01 +0000 -Subject: net_sched: sch_sfq: reject invalid perturb period - -Gerrard Tai reported that SFQ perturb_period has no range check yet, -and this can be used to trigger a race condition fixed in a separate patch. - -We want to make sure ctl->perturb_period * HZ will not overflow -and is positive. - -Tested: - -tc qd add dev lo root sfq perturb -10 # negative value : error -Error: sch_sfq: invalid perturb period. - -tc qd add dev lo root sfq perturb 1000000000 # too big : error -Error: sch_sfq: invalid perturb period. - -tc qd add dev lo root sfq perturb 2000000 # acceptable value -tc -s -d qd sh dev lo -qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec - Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) - backlog 0b 0p requeues 0 - -Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") -Reported-by: Gerrard Tai -Signed-off-by: Eric Dumazet -Cc: stable@vger.kernel.org -Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com -Signed-off-by: Jakub Kicinski ---- - net/sched/sch_sfq.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - ---- a/net/sched/sch_sfq.c -+++ b/net/sched/sch_sfq.c -@@ -656,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, - NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); - return -EINVAL; - } -+ -+ if (ctl->perturb_period < 0 || -+ ctl->perturb_period > INT_MAX / HZ) { -+ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period"); -+ return -EINVAL; -+ } -+ perturb_period = ctl->perturb_period * HZ; -+ - if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, - ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) - return -EINVAL; -@@ -672,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, - headdrop = q->headdrop; - maxdepth = q->maxdepth; - maxflows = q->maxflows; -- perturb_period = q->perturb_period; - quantum = q->quantum; - flags = q->flags; - - /* update and validate configuration */ - if (ctl->quantum) - quantum = ctl->quantum; -- perturb_period = ctl->perturb_period * HZ; - if (ctl->flows) - maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); - if (ctl->divisor) { diff --git a/debian/patches/patchset-pf/fixes/0030-mm-vma-reset-VMA-iterator-on-commit_merge-OOM-failur.patch b/debian/patches/patchset-pf/fixes/0030-mm-vma-reset-VMA-iterator-on-commit_merge-OOM-failur.patch deleted file mode 100644 index 31f60a2..0000000 --- a/debian/patches/patchset-pf/fixes/0030-mm-vma-reset-VMA-iterator-on-commit_merge-OOM-failur.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 90a5248443f925040b46e32fcf6715615c73e396 Mon Sep 17 00:00:00 2001 -From: Lorenzo Stoakes -Date: Fri, 6 Jun 2025 13:50:32 +0100 -Subject: mm/vma: reset VMA iterator on commit_merge() OOM failure - -While an OOM failure in commit_merge() isn't really feasible due to the -allocation which might fail (a maple tree pre-allocation) being 'too small -to fail', we do need to handle this case correctly regardless. - -In vma_merge_existing_range(), we can theoretically encounter failures -which result in an OOM error in two ways - firstly dup_anon_vma() might -fail with an OOM error, and secondly commit_merge() failing, ultimately, -to pre-allocate a maple tree node. - -The abort logic for dup_anon_vma() resets the VMA iterator to the initial -range, ensuring that any logic looping on this iterator will correctly -proceed to the next VMA. - -However the commit_merge() abort logic does not do the same thing. This -resulted in a syzbot report occurring because mlockall() iterates through -VMAs, is tolerant of errors, but ended up with an incorrect previous VMA -being specified due to incorrect iterator state. - -While making this change, it became apparent we are duplicating logic - -the logic introduced in commit 41e6ddcaa0f1 ("mm/vma: add give_up_on_oom -option on modify/merge, use in uffd release") duplicates the -vmg->give_up_on_oom check in both abort branches. - -Additionally, we observe that we can perform the anon_dup check safely on -dup_anon_vma() failure, as this will not be modified should this call -fail. - -Finally, we need to reset the iterator in both cases, so now we can simply -use the exact same code to abort for both. - -We remove the VM_WARN_ON(err != -ENOMEM) as it would be silly for this to -be otherwise and it allows us to implement the abort check more neatly. - -Link: https://lkml.kernel.org/r/20250606125032.164249-1-lorenzo.stoakes@oracle.com -Fixes: 47b16d0462a4 ("mm: abort vma_modify() on merge out of memory failure") -Signed-off-by: Lorenzo Stoakes -Reported-by: syzbot+d16409ea9ecc16ed261a@syzkaller.appspotmail.com -Closes: https://lore.kernel.org/linux-mm/6842cc67.a00a0220.29ac89.003b.GAE@google.com/ -Reviewed-by: Pedro Falcato -Reviewed-by: Vlastimil Babka -Reviewed-by: Liam R. Howlett -Cc: Jann Horn -Cc: -Signed-off-by: Andrew Morton ---- - mm/vma.c | 22 ++++------------------ - 1 file changed, 4 insertions(+), 18 deletions(-) - ---- a/mm/vma.c -+++ b/mm/vma.c -@@ -927,26 +927,9 @@ static __must_check struct vm_area_struc - err = dup_anon_vma(next, middle, &anon_dup); - } - -- if (err) -+ if (err || commit_merge(vmg)) - goto abort; - -- err = commit_merge(vmg); -- if (err) { -- VM_WARN_ON(err != -ENOMEM); -- -- if (anon_dup) -- unlink_anon_vmas(anon_dup); -- -- /* -- * We've cleaned up any cloned anon_vma's, no VMAs have been -- * modified, no harm no foul if the user requests that we not -- * report this and just give up, leaving the VMAs unmerged. -- */ -- if (!vmg->give_up_on_oom) -- vmg->state = VMA_MERGE_ERROR_NOMEM; -- return NULL; -- } -- - khugepaged_enter_vma(vmg->target, vmg->flags); - vmg->state = VMA_MERGE_SUCCESS; - return vmg->target; -@@ -955,6 +938,9 @@ abort: - vma_iter_set(vmg->vmi, start); - vma_iter_load(vmg->vmi); - -+ if (anon_dup) -+ unlink_anon_vmas(anon_dup); -+ - /* - * This means we have failed to clone anon_vma's correctly, but no - * actual changes to VMAs have occurred, so no harm no foul - if the diff --git a/debian/patches/patchset-pf/fixes/0031-mm-close-theoretical-race-where-stale-TLB-entries-co.patch b/debian/patches/patchset-pf/fixes/0031-mm-close-theoretical-race-where-stale-TLB-entries-co.patch deleted file mode 100644 index 07677c4..0000000 --- a/debian/patches/patchset-pf/fixes/0031-mm-close-theoretical-race-where-stale-TLB-entries-co.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 7c9d5350d8acfe1b876a8acabdf247b44a803d58 Mon Sep 17 00:00:00 2001 -From: Ryan Roberts -Date: Fri, 6 Jun 2025 10:28:07 +0100 -Subject: mm: close theoretical race where stale TLB entries could linger - -Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a -parallel reclaim leaving stale TLB entries") described a theoretical race -as such: - - -""" -Nadav Amit identified a theoretical race between page reclaim and mprotect -due to TLB flushes being batched outside of the PTL being held. - -He described the race as follows: - - CPU0 CPU1 - ---- ---- - user accesses memory using RW PTE - [PTE now cached in TLB] - try_to_unmap_one() - ==> ptep_get_and_clear() - ==> set_tlb_ubc_flush_pending() - mprotect(addr, PROT_READ) - ==> change_pte_range() - ==> [ PTE non-present - no flush ] - - user writes using cached RW PTE - ... - - try_to_unmap_flush() - -The same type of race exists for reads when protecting for PROT_NONE and -also exists for operations that can leave an old TLB entry behind such as -munmap, mremap and madvise. -""" - -The solution was to introduce flush_tlb_batched_pending() and call it -under the PTL from mprotect/madvise/munmap/mremap to complete any pending -tlb flushes. - -However, while madvise_free_pte_range() and -madvise_cold_or_pageout_pte_range() were both retro-fitted to call -flush_tlb_batched_pending() immediately after initially acquiring the PTL, -they both temporarily release the PTL to split a large folio if they -stumble upon one. In this case, where re-acquiring the PTL -flush_tlb_batched_pending() must be called again, but it previously was -not. Let's fix that. - -There are 2 Fixes: tags here: the first is the commit that fixed -madvise_free_pte_range(). The second is the commit that added -madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the -faulty pattern from madvise_free_pte_range(). - -This is a theoretical bug discovered during code review. - -Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com -Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries") -Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD") -Signed-off-by: Ryan Roberts -Reviewed-by: Jann Horn -Acked-by: David Hildenbrand -Cc: Liam Howlett -Cc: Lorenzo Stoakes -Cc: Mel Gorman -Cc: Vlastimil Babka -Cc: -Signed-off-by: Andrew Morton ---- - mm/madvise.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/mm/madvise.c -+++ b/mm/madvise.c -@@ -503,6 +503,7 @@ restart: - pte_offset_map_lock(mm, pmd, addr, &ptl); - if (!start_pte) - break; -+ flush_tlb_batched_pending(mm); - arch_enter_lazy_mmu_mode(); - if (!err) - nr = 0; -@@ -736,6 +737,7 @@ static int madvise_free_pte_range(pmd_t - start_pte = pte; - if (!start_pte) - break; -+ flush_tlb_batched_pending(mm); - arch_enter_lazy_mmu_mode(); - if (!err) - nr = 0; diff --git a/debian/patches/patchset-pf/fixes/0032-io_uring-kbuf-don-t-truncate-end-buffer-for-multiple.patch b/debian/patches/patchset-pf/fixes/0032-io_uring-kbuf-don-t-truncate-end-buffer-for-multiple.patch deleted file mode 100644 index a384a40..0000000 --- a/debian/patches/patchset-pf/fixes/0032-io_uring-kbuf-don-t-truncate-end-buffer-for-multiple.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 862a81c79f0bea8ede0352b637b44716f02f71b9 Mon Sep 17 00:00:00 2001 -From: Jens Axboe -Date: Fri, 13 Jun 2025 11:01:49 -0600 -Subject: io_uring/kbuf: don't truncate end buffer for multiple buffer peeks - -If peeking a bunch of buffers, normally io_ring_buffers_peek() will -truncate the end buffer. This isn't optimal as presumably more data will -be arriving later, and hence it's better to stop with the last full -buffer rather than truncate the end buffer. - -Cc: stable@vger.kernel.org -Fixes: 35c8711c8fc4 ("io_uring/kbuf: add helpers for getting/peeking multiple buffers") -Reported-by: Christian Mazakas -Signed-off-by: Jens Axboe ---- - io_uring/kbuf.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - ---- a/io_uring/kbuf.c -+++ b/io_uring/kbuf.c -@@ -270,8 +270,11 @@ static int io_ring_buffers_peek(struct i - /* truncate end piece, if needed, for non partial buffers */ - if (len > arg->max_len) { - len = arg->max_len; -- if (!(bl->flags & IOBL_INC)) -+ if (!(bl->flags & IOBL_INC)) { -+ if (iov != arg->iovs) -+ break; - buf->len = len; -+ } - } - - iov->iov_base = u64_to_user_ptr(buf->addr); diff --git a/debian/patches/patchset-pf/fixes/0033-nvme-always-punt-polled-uring_cmd-end_io-work-to-tas.patch b/debian/patches/patchset-pf/fixes/0033-nvme-always-punt-polled-uring_cmd-end_io-work-to-tas.patch deleted file mode 100644 index e635d0a..0000000 --- a/debian/patches/patchset-pf/fixes/0033-nvme-always-punt-polled-uring_cmd-end_io-work-to-tas.patch +++ /dev/null @@ -1,54 +0,0 @@ -From bb3d761325a1707c8064a3d7dd556ed6a501a2e7 Mon Sep 17 00:00:00 2001 -From: Jens Axboe -Date: Fri, 13 Jun 2025 13:37:41 -0600 -Subject: nvme: always punt polled uring_cmd end_io work to task_work - -Currently NVMe uring_cmd completions will complete locally, if they are -polled. This is done because those completions are always invoked from -task context. And while that is true, there's no guarantee that it's -invoked under the right ring context, or even task. If someone does -NVMe passthrough via multiple threads and with a limited number of -poll queues, then ringA may find completions from ringB. For that case, -completing the request may not be sound. - -Always just punt the passthrough completions via task_work, which will -redirect the completion, if needed. - -Cc: stable@vger.kernel.org -Fixes: 585079b6e425 ("nvme: wire up async polling for io passthrough commands") -Signed-off-by: Jens Axboe ---- - drivers/nvme/host/ioctl.c | 21 +++++++-------------- - 1 file changed, 7 insertions(+), 14 deletions(-) - ---- a/drivers/nvme/host/ioctl.c -+++ b/drivers/nvme/host/ioctl.c -@@ -429,21 +429,14 @@ static enum rq_end_io_ret nvme_uring_cmd - pdu->result = le64_to_cpu(nvme_req(req)->result.u64); - - /* -- * For iopoll, complete it directly. Note that using the uring_cmd -- * helper for this is safe only because we check blk_rq_is_poll(). -- * As that returns false if we're NOT on a polled queue, then it's -- * safe to use the polled completion helper. -- * -- * Otherwise, move the completion to task work. -+ * IOPOLL could potentially complete this request directly, but -+ * if multiple rings are polling on the same queue, then it's possible -+ * for one ring to find completions for another ring. Punting the -+ * completion via task_work will always direct it to the right -+ * location, rather than potentially complete requests for ringA -+ * under iopoll invocations from ringB. - */ -- if (blk_rq_is_poll(req)) { -- if (pdu->bio) -- blk_rq_unmap_user(pdu->bio); -- io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); -- } else { -- io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); -- } -- -+ io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); - return RQ_END_IO_FREE; - } - diff --git a/debian/patches/patchset-pf/fixes/0034-block-Clear-BIO_EMULATES_ZONE_APPEND-flag-on-BIO-com.patch b/debian/patches/patchset-pf/fixes/0034-block-Clear-BIO_EMULATES_ZONE_APPEND-flag-on-BIO-com.patch deleted file mode 100644 index cea29d8..0000000 --- a/debian/patches/patchset-pf/fixes/0034-block-Clear-BIO_EMULATES_ZONE_APPEND-flag-on-BIO-com.patch +++ /dev/null @@ -1,33 +0,0 @@ -From a57621608b2cbcbd0c7da184e9012b9b111a8577 Mon Sep 17 00:00:00 2001 -From: Damien Le Moal -Date: Wed, 11 Jun 2025 09:59:15 +0900 -Subject: block: Clear BIO_EMULATES_ZONE_APPEND flag on BIO completion - -When blk_zone_write_plug_bio_endio() is called for a regular write BIO -used to emulate a zone append operation, that is, a BIO flagged with -BIO_EMULATES_ZONE_APPEND, the BIO operation code is restored to the -original REQ_OP_ZONE_APPEND but the BIO_EMULATES_ZONE_APPEND flag is not -cleared. Clear it to fully return the BIO to its orginal definition. - -Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation") -Cc: stable@vger.kernel.org -Signed-off-by: Damien Le Moal -Reviewed-by: Johannes Thumshirn -Reviewed-by: Hannes Reinecke -Reviewed-by: Christoph Hellwig -Link: https://lore.kernel.org/r/20250611005915.89843-1-dlemoal@kernel.org -Signed-off-by: Jens Axboe ---- - block/blk-zoned.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/block/blk-zoned.c -+++ b/block/blk-zoned.c -@@ -1225,6 +1225,7 @@ void blk_zone_write_plug_bio_endio(struc - if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) { - bio->bi_opf &= ~REQ_OP_MASK; - bio->bi_opf |= REQ_OP_ZONE_APPEND; -+ bio_clear_flag(bio, BIO_EMULATES_ZONE_APPEND); - } - - /* diff --git a/debian/patches/patchset-pf/fixes/0035-block-use-plug-request-list-tail-for-one-shot-backme.patch b/debian/patches/patchset-pf/fixes/0035-block-use-plug-request-list-tail-for-one-shot-backme.patch deleted file mode 100644 index 2f42dd9..0000000 --- a/debian/patches/patchset-pf/fixes/0035-block-use-plug-request-list-tail-for-one-shot-backme.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 7fc5a2cbcc8459cab6ae8c5dd1220768027ccb70 Mon Sep 17 00:00:00 2001 -From: Jens Axboe -Date: Wed, 11 Jun 2025 08:48:46 -0600 -Subject: block: use plug request list tail for one-shot backmerge attempt - -Previously, the block layer stored the requests in the plug list in -LIFO order. For this reason, blk_attempt_plug_merge() would check -just the head entry for a back merge attempt, and abort after that -unless requests for multiple queues existed in the plug list. If more -than one request is present in the plug list, this makes the one-shot -back merging less useful than before, as it'll always fail to find a -quick merge candidate. - -Use the tail entry for the one-shot merge attempt, which is the last -added request in the list. If that fails, abort immediately unless -there are multiple queues available. If multiple queues are available, -then scan the list. Ideally the latter scan would be a backwards scan -of the list, but as it currently stands, the plug list is singly linked -and hence this isn't easily feasible. - -Cc: stable@vger.kernel.org -Link: https://lore.kernel.org/linux-block/20250611121626.7252-1-abuehaze@amazon.com/ -Reported-by: Hazem Mohamed Abuelfotoh -Fixes: e70c301faece ("block: don't reorder requests in blk_add_rq_to_plug") -Signed-off-by: Jens Axboe ---- - block/blk-merge.c | 26 +++++++++++++------------- - 1 file changed, 13 insertions(+), 13 deletions(-) - ---- a/block/blk-merge.c -+++ b/block/blk-merge.c -@@ -1127,20 +1127,20 @@ bool blk_attempt_plug_merge(struct reque - if (!plug || rq_list_empty(&plug->mq_list)) - return false; - -- rq_list_for_each(&plug->mq_list, rq) { -- if (rq->q == q) { -- if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == -- BIO_MERGE_OK) -- return true; -- break; -- } -+ rq = plug->mq_list.tail; -+ if (rq->q == q) -+ return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == -+ BIO_MERGE_OK; -+ else if (!plug->multiple_queues) -+ return false; - -- /* -- * Only keep iterating plug list for merges if we have multiple -- * queues -- */ -- if (!plug->multiple_queues) -- break; -+ rq_list_for_each(&plug->mq_list, rq) { -+ if (rq->q != q) -+ continue; -+ if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == -+ BIO_MERGE_OK) -+ return true; -+ break; - } - return false; - } diff --git a/debian/patches/patchset-pf/fixes/0036-Revert-mm-execmem-Unify-early-execmem_cache-behaviou.patch b/debian/patches/patchset-pf/fixes/0036-Revert-mm-execmem-Unify-early-execmem_cache-behaviou.patch deleted file mode 100644 index a915c91..0000000 --- a/debian/patches/patchset-pf/fixes/0036-Revert-mm-execmem-Unify-early-execmem_cache-behaviou.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 8ad4520fc849262ab23adbabebd366d4755035bc Mon Sep 17 00:00:00 2001 -From: "Mike Rapoport (Microsoft)" -Date: Tue, 3 Jun 2025 14:14:45 +0300 -Subject: Revert "mm/execmem: Unify early execmem_cache behaviour" - -The commit d6d1e3e6580c ("mm/execmem: Unify early execmem_cache -behaviour") changed early behaviour of execemem ROX cache to allow its -usage in early x86 code that allocates text pages when -CONFIG_MITGATION_ITS is enabled. - -The permission management of the pages allocated from execmem for ITS -mitigation is now completely contained in arch/x86/kernel/alternatives.c -and therefore there is no need to special case early allocations in -execmem. - -This reverts commit d6d1e3e6580ca35071ad474381f053cbf1fb6414. - -Signed-off-by: Mike Rapoport (Microsoft) -Signed-off-by: Peter Zijlstra (Intel) -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/20250603111446.2609381-6-rppt@kernel.org ---- - arch/x86/mm/init_32.c | 3 --- - arch/x86/mm/init_64.c | 3 --- - include/linux/execmem.h | 8 +------- - mm/execmem.c | 40 +++------------------------------------- - 4 files changed, 4 insertions(+), 50 deletions(-) - ---- a/arch/x86/mm/init_32.c -+++ b/arch/x86/mm/init_32.c -@@ -30,7 +30,6 @@ - #include - #include - #include --#include - - #include - #include -@@ -756,8 +755,6 @@ void mark_rodata_ro(void) - pr_info("Write protecting kernel text and read-only data: %luk\n", - size >> 10); - -- execmem_cache_make_ro(); -- - kernel_set_to_readonly = 1; - - #ifdef CONFIG_CPA_DEBUG ---- a/arch/x86/mm/init_64.c -+++ b/arch/x86/mm/init_64.c -@@ -34,7 +34,6 @@ - #include - #include - #include --#include - - #include - #include -@@ -1392,8 +1391,6 @@ void mark_rodata_ro(void) - (end - start) >> 10); - set_memory_ro(start, (end - start) >> PAGE_SHIFT); - -- execmem_cache_make_ro(); -- - kernel_set_to_readonly = 1; - - /* ---- a/include/linux/execmem.h -+++ b/include/linux/execmem.h -@@ -54,7 +54,7 @@ enum execmem_range_flags { - EXECMEM_ROX_CACHE = (1 << 1), - }; - --#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) -+#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX - /** - * execmem_fill_trapping_insns - set memory to contain instructions that - * will trap -@@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size - * Return: 0 on success or negative error code on failure. - */ - int execmem_restore_rox(void *ptr, size_t size); -- --/* -- * Called from mark_readonly(), where the system transitions to ROX. -- */ --void execmem_cache_make_ro(void); - #else - static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } - static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } --static inline void execmem_cache_make_ro(void) { } - #endif - - /** ---- a/mm/execmem.c -+++ b/mm/execmem.c -@@ -254,34 +254,6 @@ out_unlock: - return ptr; - } - --static bool execmem_cache_rox = false; -- --void execmem_cache_make_ro(void) --{ -- struct maple_tree *free_areas = &execmem_cache.free_areas; -- struct maple_tree *busy_areas = &execmem_cache.busy_areas; -- MA_STATE(mas_free, free_areas, 0, ULONG_MAX); -- MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX); -- struct mutex *mutex = &execmem_cache.mutex; -- void *area; -- -- execmem_cache_rox = true; -- -- mutex_lock(mutex); -- -- mas_for_each(&mas_free, area, ULONG_MAX) { -- unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT; -- set_memory_ro(mas_free.index, pages); -- } -- -- mas_for_each(&mas_busy, area, ULONG_MAX) { -- unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT; -- set_memory_ro(mas_busy.index, pages); -- } -- -- mutex_unlock(mutex); --} -- - static int execmem_cache_populate(struct execmem_range *range, size_t size) - { - unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; -@@ -302,15 +274,9 @@ static int execmem_cache_populate(struct - /* fill memory with instructions that will trap */ - execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); - -- if (execmem_cache_rox) { -- err = set_memory_rox((unsigned long)p, vm->nr_pages); -- if (err) -- goto err_free_mem; -- } else { -- err = set_memory_x((unsigned long)p, vm->nr_pages); -- if (err) -- goto err_free_mem; -- } -+ err = set_memory_rox((unsigned long)p, vm->nr_pages); -+ if (err) -+ goto err_free_mem; - - err = execmem_cache_add(p, alloc_size); - if (err) diff --git a/debian/patches/patchset-pf/fixes/0037-x86-virt-tdx-Avoid-indirect-calls-to-TDX-assembly-fu.patch b/debian/patches/patchset-pf/fixes/0037-x86-virt-tdx-Avoid-indirect-calls-to-TDX-assembly-fu.patch deleted file mode 100644 index c716808..0000000 --- a/debian/patches/patchset-pf/fixes/0037-x86-virt-tdx-Avoid-indirect-calls-to-TDX-assembly-fu.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 85bfdd784bd61df94fd42daca141ed173f647e8c Mon Sep 17 00:00:00 2001 -From: Kai Huang -Date: Sat, 7 Jun 2025 01:07:37 +1200 -Subject: x86/virt/tdx: Avoid indirect calls to TDX assembly functions - -Two 'static inline' TDX helper functions (sc_retry() and -sc_retry_prerr()) take function pointer arguments which refer to -assembly functions. Normally, the compiler inlines the TDX helper, -realizes that the function pointer targets are completely static -- -thus can be resolved at compile time -- and generates direct call -instructions. - -But, other times (like when CONFIG_CC_OPTIMIZE_FOR_SIZE=y), the -compiler declines to inline the helpers and will instead generate -indirect call instructions. - -Indirect calls to assembly functions require special annotation (for -various Control Flow Integrity mechanisms). But TDX assembly -functions lack the special annotations and can only be called -directly. - -Annotate both the helpers as '__always_inline' to prod the compiler -into maintaining the direct calls. There is no guarantee here, but -Peter has volunteered to report the compiler bug if this assumption -ever breaks[1]. - -Fixes: 1e66a7e27539 ("x86/virt/tdx: Handle SEAMCALL no entropy error in common code") -Fixes: df01f5ae07dd ("x86/virt/tdx: Add SEAMCALL error printing for module initialization") -Signed-off-by: Kai Huang -Signed-off-by: Dave Hansen -Cc: stable@vger.kernel.org -Link: https://lore.kernel.org/lkml/20250605145914.GW39944@noisy.programming.kicks-ass.net/ [1] -Link: https://lore.kernel.org/all/20250606130737.30713-1-kai.huang%40intel.com ---- - arch/x86/include/asm/tdx.h | 2 +- - arch/x86/virt/vmx/tdx/tdx.c | 5 +++-- - 2 files changed, 4 insertions(+), 3 deletions(-) - ---- a/arch/x86/include/asm/tdx.h -+++ b/arch/x86/include/asm/tdx.h -@@ -100,7 +100,7 @@ void tdx_init(void); - - typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); - --static inline u64 sc_retry(sc_func_t func, u64 fn, -+static __always_inline u64 sc_retry(sc_func_t func, u64 fn, - struct tdx_module_args *args) - { - int retry = RDRAND_RETRY_LOOPS; ---- a/arch/x86/virt/vmx/tdx/tdx.c -+++ b/arch/x86/virt/vmx/tdx/tdx.c -@@ -69,8 +69,9 @@ static inline void seamcall_err_ret(u64 - args->r9, args->r10, args->r11); - } - --static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func, -- u64 fn, struct tdx_module_args *args) -+static __always_inline int sc_retry_prerr(sc_func_t func, -+ sc_err_func_t err_func, -+ u64 fn, struct tdx_module_args *args) - { - u64 sret = sc_retry(func, fn, args); - diff --git a/debian/patches/patchset-pf/fixes/0038-x86-mm-pat-don-t-collapse-pages-without-PSE-set.patch b/debian/patches/patchset-pf/fixes/0038-x86-mm-pat-don-t-collapse-pages-without-PSE-set.patch deleted file mode 100644 index c36e705..0000000 --- a/debian/patches/patchset-pf/fixes/0038-x86-mm-pat-don-t-collapse-pages-without-PSE-set.patch +++ /dev/null @@ -1,31 +0,0 @@ -From a94cf5c6e7e31be9d4788916ce847adb15735d81 Mon Sep 17 00:00:00 2001 -From: Juergen Gross -Date: Tue, 3 Jun 2025 14:14:41 +0300 -Subject: x86/mm/pat: don't collapse pages without PSE set - -Collapsing pages to a leaf PMD or PUD should be done only if -X86_FEATURE_PSE is available, which is not the case when running e.g. -as a Xen PV guest. - -Fixes: 41d88484c71c ("x86/mm/pat: restore large ROX pages after fragmentation") -Signed-off-by: Juergen Gross -Signed-off-by: Mike Rapoport (Microsoft) -Signed-off-by: Peter Zijlstra (Intel) -Cc: stable@vger.kernel.org -Link: https://lore.kernel.org/r/20250528123557.12847-3-jgross@suse.com ---- - arch/x86/mm/pat/set_memory.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/arch/x86/mm/pat/set_memory.c -+++ b/arch/x86/mm/pat/set_memory.c -@@ -1257,6 +1257,9 @@ static int collapse_pmd_page(pmd_t *pmd, - pgprot_t pgprot; - int i = 0; - -+ if (!cpu_feature_enabled(X86_FEATURE_PSE)) -+ return 0; -+ - addr &= PMD_MASK; - pte = pte_offset_kernel(pmd, addr); - first = *pte; diff --git a/debian/patches/patchset-pf/fixes/0039-x86-Kconfig-only-enable-ROX-cache-in-execmem-when-ST.patch b/debian/patches/patchset-pf/fixes/0039-x86-Kconfig-only-enable-ROX-cache-in-execmem-when-ST.patch deleted file mode 100644 index eebe58a..0000000 --- a/debian/patches/patchset-pf/fixes/0039-x86-Kconfig-only-enable-ROX-cache-in-execmem-when-ST.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 8f28d595d167316469bb33b701e27b4b79c1aab1 Mon Sep 17 00:00:00 2001 -From: "Mike Rapoport (Microsoft)" -Date: Tue, 3 Jun 2025 14:14:42 +0300 -Subject: x86/Kconfig: only enable ROX cache in execmem when STRICT_MODULE_RWX - is set - -Currently ROX cache in execmem is enabled regardless of -STRICT_MODULE_RWX setting. This breaks an assumption that module memory -is writable when STRICT_MODULE_RWX is disabled, for instance for kernel -debuggin. - -Only enable ROX cache in execmem when STRICT_MODULE_RWX is set to -restore the original behaviour of module text permissions. - -Fixes: 64f6a4e10c05 ("x86: re-enable EXECMEM_ROX support") -Signed-off-by: Mike Rapoport (Microsoft) -Signed-off-by: Peter Zijlstra (Intel) -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/20250603111446.2609381-3-rppt@kernel.org ---- - arch/x86/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -88,7 +88,7 @@ config X86 - select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN - select ARCH_HAS_EARLY_DEBUG if KGDB - select ARCH_HAS_ELF_RANDOMIZE -- select ARCH_HAS_EXECMEM_ROX if X86_64 -+ select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX - select ARCH_HAS_FAST_MULTIPLIER - select ARCH_HAS_FORTIFY_SOURCE - select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/debian/patches/patchset-pf/fixes/0040-x86-its-move-its_pages-array-to-struct-mod_arch_spec.patch b/debian/patches/patchset-pf/fixes/0040-x86-its-move-its_pages-array-to-struct-mod_arch_spec.patch deleted file mode 100644 index c24e6b7..0000000 --- a/debian/patches/patchset-pf/fixes/0040-x86-its-move-its_pages-array-to-struct-mod_arch_spec.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 24fd2e3cef1b98f4417b8015ba24a8a4dcaae0c1 Mon Sep 17 00:00:00 2001 -From: "Mike Rapoport (Microsoft)" -Date: Tue, 3 Jun 2025 14:14:43 +0300 -Subject: x86/its: move its_pages array to struct mod_arch_specific - -The of pages with ITS thunks allocated for modules are tracked by an -array in 'struct module'. - -Since this is very architecture specific data structure, move it to -'struct mod_arch_specific'. - -No functional changes. - -Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") -Suggested-by: Peter Zijlstra (Intel) -Signed-off-by: Mike Rapoport (Microsoft) -Signed-off-by: Peter Zijlstra (Intel) -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/20250603111446.2609381-4-rppt@kernel.org ---- - arch/x86/include/asm/module.h | 8 ++++++++ - arch/x86/kernel/alternative.c | 19 ++++++++++--------- - include/linux/module.h | 5 ----- - 3 files changed, 18 insertions(+), 14 deletions(-) - ---- a/arch/x86/include/asm/module.h -+++ b/arch/x86/include/asm/module.h -@@ -5,12 +5,20 @@ - #include - #include - -+struct its_array { -+#ifdef CONFIG_MITIGATION_ITS -+ void **pages; -+ int num; -+#endif -+}; -+ - struct mod_arch_specific { - #ifdef CONFIG_UNWINDER_ORC - unsigned int num_orcs; - int *orc_unwind_ip; - struct orc_entry *orc_unwind; - #endif -+ struct its_array its_pages; - }; - - #endif /* _ASM_X86_MODULE_H */ ---- a/arch/x86/kernel/alternative.c -+++ b/arch/x86/kernel/alternative.c -@@ -195,8 +195,8 @@ void its_fini_mod(struct module *mod) - its_page = NULL; - mutex_unlock(&text_mutex); - -- for (int i = 0; i < mod->its_num_pages; i++) { -- void *page = mod->its_page_array[i]; -+ for (int i = 0; i < mod->arch.its_pages.num; i++) { -+ void *page = mod->arch.its_pages.pages[i]; - execmem_restore_rox(page, PAGE_SIZE); - } - } -@@ -206,11 +206,11 @@ void its_free_mod(struct module *mod) - if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) - return; - -- for (int i = 0; i < mod->its_num_pages; i++) { -- void *page = mod->its_page_array[i]; -+ for (int i = 0; i < mod->arch.its_pages.num; i++) { -+ void *page = mod->arch.its_pages.pages[i]; - execmem_free(page); - } -- kfree(mod->its_page_array); -+ kfree(mod->arch.its_pages.pages); - } - #endif /* CONFIG_MODULES */ - -@@ -223,14 +223,15 @@ static void *its_alloc(void) - - #ifdef CONFIG_MODULES - if (its_mod) { -- void *tmp = krealloc(its_mod->its_page_array, -- (its_mod->its_num_pages+1) * sizeof(void *), -+ struct its_array *pages = &its_mod->arch.its_pages; -+ void *tmp = krealloc(pages->pages, -+ (pages->num+1) * sizeof(void *), - GFP_KERNEL); - if (!tmp) - return NULL; - -- its_mod->its_page_array = tmp; -- its_mod->its_page_array[its_mod->its_num_pages++] = page; -+ pages->pages = tmp; -+ pages->pages[pages->num++] = page; - - execmem_make_temp_rw(page, PAGE_SIZE); - } ---- a/include/linux/module.h -+++ b/include/linux/module.h -@@ -586,11 +586,6 @@ struct module { - atomic_t refcnt; - #endif - --#ifdef CONFIG_MITIGATION_ITS -- int its_num_pages; -- void **its_page_array; --#endif -- - #ifdef CONFIG_CONSTRUCTORS - /* Constructor functions. */ - ctor_fn_t *ctors; diff --git a/debian/patches/patchset-pf/fixes/0041-x86-its-explicitly-manage-permissions-for-ITS-pages.patch b/debian/patches/patchset-pf/fixes/0041-x86-its-explicitly-manage-permissions-for-ITS-pages.patch deleted file mode 100644 index c93a534..0000000 --- a/debian/patches/patchset-pf/fixes/0041-x86-its-explicitly-manage-permissions-for-ITS-pages.patch +++ /dev/null @@ -1,148 +0,0 @@ -From 48d82c4dd03de376a6f673bda0f4f2b97138d855 Mon Sep 17 00:00:00 2001 -From: "Peter Zijlstra (Intel)" -Date: Tue, 3 Jun 2025 14:14:44 +0300 -Subject: x86/its: explicitly manage permissions for ITS pages - -execmem_alloc() sets permissions differently depending on the kernel -configuration, CPU support for PSE and whether a page is allocated -before or after mark_rodata_ro(). - -Add tracking for pages allocated for ITS when patching the core kernel -and make sure the permissions for ITS pages are explicitly managed for -both kernel and module allocations. - -Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") -Signed-off-by: Peter Zijlstra (Intel) -Co-developed-by: Mike Rapoport (Microsoft) -Signed-off-by: Mike Rapoport (Microsoft) -Signed-off-by: Peter Zijlstra (Intel) -Reviewed-by: Nikolay Borisov -Cc: stable@vger.kernel.org -Link: https://lkml.kernel.org/r/20250603111446.2609381-5-rppt@kernel.org ---- - arch/x86/kernel/alternative.c | 74 ++++++++++++++++++++++++----------- - 1 file changed, 52 insertions(+), 22 deletions(-) - ---- a/arch/x86/kernel/alternative.c -+++ b/arch/x86/kernel/alternative.c -@@ -138,6 +138,24 @@ static struct module *its_mod; - #endif - static void *its_page; - static unsigned int its_offset; -+struct its_array its_pages; -+ -+static void *__its_alloc(struct its_array *pages) -+{ -+ void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); -+ if (!page) -+ return NULL; -+ -+ void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *), -+ GFP_KERNEL); -+ if (!tmp) -+ return NULL; -+ -+ pages->pages = tmp; -+ pages->pages[pages->num++] = page; -+ -+ return no_free_ptr(page); -+} - - /* Initialize a thunk with the "jmp *reg; int3" instructions. */ - static void *its_init_thunk(void *thunk, int reg) -@@ -173,6 +191,21 @@ static void *its_init_thunk(void *thunk, - return thunk + offset; - } - -+static void its_pages_protect(struct its_array *pages) -+{ -+ for (int i = 0; i < pages->num; i++) { -+ void *page = pages->pages[i]; -+ execmem_restore_rox(page, PAGE_SIZE); -+ } -+} -+ -+static void its_fini_core(void) -+{ -+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) -+ its_pages_protect(&its_pages); -+ kfree(its_pages.pages); -+} -+ - #ifdef CONFIG_MODULES - void its_init_mod(struct module *mod) - { -@@ -195,10 +228,8 @@ void its_fini_mod(struct module *mod) - its_page = NULL; - mutex_unlock(&text_mutex); - -- for (int i = 0; i < mod->arch.its_pages.num; i++) { -- void *page = mod->arch.its_pages.pages[i]; -- execmem_restore_rox(page, PAGE_SIZE); -- } -+ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) -+ its_pages_protect(&mod->arch.its_pages); - } - - void its_free_mod(struct module *mod) -@@ -216,28 +247,23 @@ void its_free_mod(struct module *mod) - - static void *its_alloc(void) - { -- void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); -+ struct its_array *pages = &its_pages; -+ void *page; - -+#ifdef CONFIG_MODULE -+ if (its_mod) -+ pages = &its_mod->arch.its_pages; -+#endif -+ -+ page = __its_alloc(pages); - if (!page) - return NULL; - --#ifdef CONFIG_MODULES -- if (its_mod) { -- struct its_array *pages = &its_mod->arch.its_pages; -- void *tmp = krealloc(pages->pages, -- (pages->num+1) * sizeof(void *), -- GFP_KERNEL); -- if (!tmp) -- return NULL; -- -- pages->pages = tmp; -- pages->pages[pages->num++] = page; -+ execmem_make_temp_rw(page, PAGE_SIZE); -+ if (pages == &its_pages) -+ set_memory_x((unsigned long)page, 1); - -- execmem_make_temp_rw(page, PAGE_SIZE); -- } --#endif /* CONFIG_MODULES */ -- -- return no_free_ptr(page); -+ return page; - } - - static void *its_allocate_thunk(int reg) -@@ -291,7 +317,9 @@ u8 *its_static_thunk(int reg) - return thunk; - } - --#endif -+#else -+static inline void its_fini_core(void) {} -+#endif /* CONFIG_MITIGATION_ITS */ - - /* - * Nomenclature for variable names to simplify and clarify this code and ease -@@ -2368,6 +2396,8 @@ void __init alternative_instructions(voi - apply_retpolines(__retpoline_sites, __retpoline_sites_end); - apply_returns(__return_sites, __return_sites_end); - -+ its_fini_core(); -+ - /* - * Adjust all CALL instructions to point to func()-10, including - * those in .altinstr_replacement. diff --git a/debian/patches/patchset-pf/fixes/0042-KVM-SVM-Clear-current_vmcb-during-vCPU-free-for-all-.patch b/debian/patches/patchset-pf/fixes/0042-KVM-SVM-Clear-current_vmcb-during-vCPU-free-for-all-.patch deleted file mode 100644 index f9bf3d4..0000000 --- a/debian/patches/patchset-pf/fixes/0042-KVM-SVM-Clear-current_vmcb-during-vCPU-free-for-all-.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 9bed8caa4c73f2d524d9600c74e6cbcff71c2456 Mon Sep 17 00:00:00 2001 -From: Yosry Ahmed -Date: Tue, 29 Apr 2025 08:32:15 -0700 -Subject: KVM: SVM: Clear current_vmcb during vCPU free for all *possible* CPUs - -When freeing a vCPU and thus its VMCB, clear current_vmcb for all possible -CPUs, not just online CPUs, as it's theoretically possible a CPU could go -offline and come back online in conjunction with KVM reusing the page for -a new VMCB. - -Link: https://lore.kernel.org/all/20250320013759.3965869-1-yosry.ahmed@linux.dev -Fixes: fd65d3142f73 ("kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb") -Cc: stable@vger.kernel.org -Cc: Jim Mattson -Signed-off-by: Yosry Ahmed -[sean: split to separate patch, write changelog] -Signed-off-by: Sean Christopherson ---- - arch/x86/kvm/svm/svm.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/arch/x86/kvm/svm/svm.c -+++ b/arch/x86/kvm/svm/svm.c -@@ -1488,7 +1488,7 @@ static void svm_clear_current_vmcb(struc - { - int i; - -- for_each_online_cpu(i) -+ for_each_possible_cpu(i) - cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL); - } - diff --git a/debian/patches/patchset-pf/fixes/0043-KVM-VMX-Flush-shadow-VMCS-on-emergency-reboot.patch b/debian/patches/patchset-pf/fixes/0043-KVM-VMX-Flush-shadow-VMCS-on-emergency-reboot.patch deleted file mode 100644 index a031fa1..0000000 --- a/debian/patches/patchset-pf/fixes/0043-KVM-VMX-Flush-shadow-VMCS-on-emergency-reboot.patch +++ /dev/null @@ -1,43 +0,0 @@ -From d74cb6c8b70d9b5ad8482f4821679b83bad9de63 Mon Sep 17 00:00:00 2001 -From: Chao Gao -Date: Mon, 24 Mar 2025 22:08:48 +0800 -Subject: KVM: VMX: Flush shadow VMCS on emergency reboot - -Ensure the shadow VMCS cache is evicted during an emergency reboot to -prevent potential memory corruption if the cache is evicted after reboot. - -This issue was identified through code inspection, as __loaded_vmcs_clear() -flushes both the normal VMCS and the shadow VMCS. - -Avoid checking the "launched" state during an emergency reboot, unlike the -behavior in __loaded_vmcs_clear(). This is important because reboot NMIs -can interfere with operations like copy_shadow_to_vmcs12(), where shadow -VMCSes are loaded directly using VMPTRLD. In such cases, if NMIs occur -right after the VMCS load, the shadow VMCSes will be active but the -"launched" state may not be set. - -Fixes: 16f5b9034b69 ("KVM: nVMX: Copy processor-specific shadow-vmcs to VMCS12") -Cc: stable@vger.kernel.org -Signed-off-by: Chao Gao -Reviewed-by: Kai Huang -Link: https://lore.kernel.org/r/20250324140849.2099723-1-chao.gao@intel.com -Signed-off-by: Sean Christopherson ---- - arch/x86/kvm/vmx/vmx.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - ---- a/arch/x86/kvm/vmx/vmx.c -+++ b/arch/x86/kvm/vmx/vmx.c -@@ -769,8 +769,11 @@ void vmx_emergency_disable_virtualizatio - return; - - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), -- loaded_vmcss_on_cpu_link) -+ loaded_vmcss_on_cpu_link) { - vmcs_clear(v->vmcs); -+ if (v->shadow_vmcs) -+ vmcs_clear(v->shadow_vmcs); -+ } - - kvm_cpu_vmxoff(); - } diff --git a/debian/patches/patchset-pf/fixes/0044-cgroup-freezer-fix-incomplete-freezing-when-attachin.patch b/debian/patches/patchset-pf/fixes/0044-cgroup-freezer-fix-incomplete-freezing-when-attachin.patch deleted file mode 100644 index 3cfd6eb..0000000 --- a/debian/patches/patchset-pf/fixes/0044-cgroup-freezer-fix-incomplete-freezing-when-attachin.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 6e492900893c011cbe13fbb881cf1e11df08982b Mon Sep 17 00:00:00 2001 -From: Chen Ridong -Date: Wed, 18 Jun 2025 07:32:17 +0000 -Subject: cgroup,freezer: fix incomplete freezing when attaching tasks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -An issue was found: - - # cd /sys/fs/cgroup/freezer/ - # mkdir test - # echo FROZEN > test/freezer.state - # cat test/freezer.state - FROZEN - # sleep 1000 & - [1] 863 - # echo 863 > test/cgroup.procs - # cat test/freezer.state - FREEZING - -When tasks are migrated to a frozen cgroup, the freezer fails to -immediately freeze the tasks, causing the cgroup to remain in the -"FREEZING". - -The freeze_task() function is called before clearing the CGROUP_FROZEN -flag. This causes the freezing() check to incorrectly return false, -preventing __freeze_task() from being invoked for the migrated task. - -To fix this issue, clear the CGROUP_FROZEN state before calling -freeze_task(). - -Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") -Cc: stable@vger.kernel.org # v6.1+ -Reported-by: Zhong Jiawei -Signed-off-by: Chen Ridong -Acked-by: Michal Koutný -Signed-off-by: Tejun Heo ---- - kernel/cgroup/legacy_freezer.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c -index 039d1eb2f215..507b8f19a262 100644 ---- a/kernel/cgroup/legacy_freezer.c -+++ b/kernel/cgroup/legacy_freezer.c -@@ -188,13 +188,12 @@ static void freezer_attach(struct cgroup_taskset *tset) - if (!(freezer->state & CGROUP_FREEZING)) { - __thaw_task(task); - } else { -- freeze_task(task); -- - /* clear FROZEN and propagate upwards */ - while (freezer && (freezer->state & CGROUP_FROZEN)) { - freezer->state &= ~CGROUP_FROZEN; - freezer = parent_freezer(freezer); - } -+ freeze_task(task); - } - } - --- -2.50.0 - diff --git a/debian/patches/patchset-pf/nfs/0001-NFSD-unregister-filesystem-in-case-genl_register_fam.patch b/debian/patches/patchset-pf/nfs/0001-NFSD-unregister-filesystem-in-case-genl_register_fam.patch deleted file mode 100644 index 71bb98b..0000000 --- a/debian/patches/patchset-pf/nfs/0001-NFSD-unregister-filesystem-in-case-genl_register_fam.patch +++ /dev/null @@ -1,39 +0,0 @@ -From ef4d2ebb50f1bd0d5b2e3f1aa2280d7d31e4a3c9 Mon Sep 17 00:00:00 2001 -From: Maninder Singh -Date: Thu, 6 Mar 2025 14:50:06 +0530 -Subject: NFSD: unregister filesystem in case genl_register_family() fails - -With rpc_status netlink support, unregister of register_filesystem() -was missed in case of genl_register_family() fails. - -Correcting it by making new label. - -Fixes: bd9d6a3efa97 ("NFSD: add rpc_status netlink support") -Cc: stable@vger.kernel.org -Signed-off-by: Maninder Singh -Reviewed-by: Jeff Layton -Signed-off-by: Chuck Lever ---- - fs/nfsd/nfsctl.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/fs/nfsd/nfsctl.c -+++ b/fs/nfsd/nfsctl.c -@@ -2305,7 +2305,7 @@ static int __init init_nfsd(void) - goto out_free_cld; - retval = register_filesystem(&nfsd_fs_type); - if (retval) -- goto out_free_all; -+ goto out_free_nfsd4; - retval = genl_register_family(&nfsd_nl_family); - if (retval) - goto out_free_all; -@@ -2313,6 +2313,8 @@ static int __init init_nfsd(void) - - return 0; - out_free_all: -+ unregister_filesystem(&nfsd_fs_type); -+out_free_nfsd4: - nfsd4_destroy_laundry_wq(); - out_free_cld: - unregister_cld_notifier(); diff --git a/debian/patches/patchset-pf/nfs/0002-NFSD-fix-race-between-nfsd-registration-and-exports_.patch b/debian/patches/patchset-pf/nfs/0002-NFSD-fix-race-between-nfsd-registration-and-exports_.patch deleted file mode 100644 index a43962f..0000000 --- a/debian/patches/patchset-pf/nfs/0002-NFSD-fix-race-between-nfsd-registration-and-exports_.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 6c2a6b3e27a3a02fd9f3f92458d4995014dfe69f Mon Sep 17 00:00:00 2001 -From: Maninder Singh -Date: Thu, 6 Mar 2025 14:50:07 +0530 -Subject: NFSD: fix race between nfsd registration and exports_proc - -As of now nfsd calls create_proc_exports_entry() at start of init_nfsd -and cleanup by remove_proc_entry() at last of exit_nfsd. - -Which causes kernel OOPs if there is race between below 2 operations: -(i) exportfs -r -(ii) mount -t nfsd none /proc/fs/nfsd - -for 5.4 kernel ARM64: - -CPU 1: -el1_irq+0xbc/0x180 -arch_counter_get_cntvct+0x14/0x18 -running_clock+0xc/0x18 -preempt_count_add+0x88/0x110 -prep_new_page+0xb0/0x220 -get_page_from_freelist+0x2d8/0x1778 -__alloc_pages_nodemask+0x15c/0xef0 -__vmalloc_node_range+0x28c/0x478 -__vmalloc_node_flags_caller+0x8c/0xb0 -kvmalloc_node+0x88/0xe0 -nfsd_init_net+0x6c/0x108 [nfsd] -ops_init+0x44/0x170 -register_pernet_operations+0x114/0x270 -register_pernet_subsys+0x34/0x50 -init_nfsd+0xa8/0x718 [nfsd] -do_one_initcall+0x54/0x2e0 - -CPU 2 : -Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 - -PC is at : exports_net_open+0x50/0x68 [nfsd] - -Call trace: -exports_net_open+0x50/0x68 [nfsd] -exports_proc_open+0x2c/0x38 [nfsd] -proc_reg_open+0xb8/0x198 -do_dentry_open+0x1c4/0x418 -vfs_open+0x38/0x48 -path_openat+0x28c/0xf18 -do_filp_open+0x70/0xe8 -do_sys_open+0x154/0x248 - -Sometimes it crashes at exports_net_open() and sometimes cache_seq_next_rcu(). - -and same is happening on latest 6.14 kernel as well: - -[ 0.000000] Linux version 6.14.0-rc5-next-20250304-dirty -... -[ 285.455918] Unable to handle kernel paging request at virtual address 00001f4800001f48 -... -[ 285.464902] pc : cache_seq_next_rcu+0x78/0xa4 -... -[ 285.469695] Call trace: -[ 285.470083] cache_seq_next_rcu+0x78/0xa4 (P) -[ 285.470488] seq_read+0xe0/0x11c -[ 285.470675] proc_reg_read+0x9c/0xf0 -[ 285.470874] vfs_read+0xc4/0x2fc -[ 285.471057] ksys_read+0x6c/0xf4 -[ 285.471231] __arm64_sys_read+0x1c/0x28 -[ 285.471428] invoke_syscall+0x44/0x100 -[ 285.471633] el0_svc_common.constprop.0+0x40/0xe0 -[ 285.471870] do_el0_svc_compat+0x1c/0x34 -[ 285.472073] el0_svc_compat+0x2c/0x80 -[ 285.472265] el0t_32_sync_handler+0x90/0x140 -[ 285.472473] el0t_32_sync+0x19c/0x1a0 -[ 285.472887] Code: f9400885 93407c23 937d7c27 11000421 (f86378a3) -[ 285.473422] ---[ end trace 0000000000000000 ]--- - -It reproduced simply with below script: -while [ 1 ] -do -/exportfs -r -done & - -while [ 1 ] -do -insmod /nfsd.ko -mount -t nfsd none /proc/fs/nfsd -umount /proc/fs/nfsd -rmmod nfsd -done & - -So exporting interfaces to user space shall be done at last and -cleanup at first place. - -With change there is no Kernel OOPs. - -Co-developed-by: Shubham Rana -Signed-off-by: Shubham Rana -Signed-off-by: Maninder Singh -Reviewed-by: Jeff Layton -Cc: stable@vger.kernel.org -Signed-off-by: Chuck Lever ---- - fs/nfsd/nfsctl.c | 17 ++++++++--------- - 1 file changed, 8 insertions(+), 9 deletions(-) - ---- a/fs/nfsd/nfsctl.c -+++ b/fs/nfsd/nfsctl.c -@@ -2291,12 +2291,9 @@ static int __init init_nfsd(void) - if (retval) - goto out_free_pnfs; - nfsd_lockd_init(); /* lockd->nfsd callbacks */ -- retval = create_proc_exports_entry(); -- if (retval) -- goto out_free_lockd; - retval = register_pernet_subsys(&nfsd_net_ops); - if (retval < 0) -- goto out_free_exports; -+ goto out_free_lockd; - retval = register_cld_notifier(); - if (retval) - goto out_free_subsys; -@@ -2308,11 +2305,16 @@ static int __init init_nfsd(void) - goto out_free_nfsd4; - retval = genl_register_family(&nfsd_nl_family); - if (retval) -+ goto out_free_filesystem; -+ retval = create_proc_exports_entry(); -+ if (retval) - goto out_free_all; - nfsd_localio_ops_init(); - - return 0; - out_free_all: -+ genl_unregister_family(&nfsd_nl_family); -+out_free_filesystem: - unregister_filesystem(&nfsd_fs_type); - out_free_nfsd4: - nfsd4_destroy_laundry_wq(); -@@ -2320,9 +2322,6 @@ out_free_cld: - unregister_cld_notifier(); - out_free_subsys: - unregister_pernet_subsys(&nfsd_net_ops); --out_free_exports: -- remove_proc_entry("fs/nfs/exports", NULL); -- remove_proc_entry("fs/nfs", NULL); - out_free_lockd: - nfsd_lockd_shutdown(); - nfsd_drc_slab_free(); -@@ -2335,14 +2334,14 @@ out_free_slabs: - - static void __exit exit_nfsd(void) - { -+ remove_proc_entry("fs/nfs/exports", NULL); -+ remove_proc_entry("fs/nfs", NULL); - genl_unregister_family(&nfsd_nl_family); - unregister_filesystem(&nfsd_fs_type); - nfsd4_destroy_laundry_wq(); - unregister_cld_notifier(); - unregister_pernet_subsys(&nfsd_net_ops); - nfsd_drc_slab_free(); -- remove_proc_entry("fs/nfs/exports", NULL); -- remove_proc_entry("fs/nfs", NULL); - nfsd_lockd_shutdown(); - nfsd4_free_slabs(); - nfsd4_exit_pnfs(); diff --git a/debian/patches/patchset-pf/nfs/0003-nfsd-fix-access-checking-for-NLM-under-XPRTSEC-polic.patch b/debian/patches/patchset-pf/nfs/0003-nfsd-fix-access-checking-for-NLM-under-XPRTSEC-polic.patch deleted file mode 100644 index 1c2312f..0000000 --- a/debian/patches/patchset-pf/nfs/0003-nfsd-fix-access-checking-for-NLM-under-XPRTSEC-polic.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 0d4fc17cb5da09d14dbff91da7e28e50d3f54af2 Mon Sep 17 00:00:00 2001 -From: Olga Kornievskaia -Date: Fri, 21 Mar 2025 20:13:04 -0400 -Subject: nfsd: fix access checking for NLM under XPRTSEC policies - -When an export policy with xprtsec policy is set with "tls" -and/or "mtls", but an NFS client is doing a v3 xprtsec=tls -mount, then NLM locking calls fail with an error because -there is currently no support for NLM with TLS. - -Until such support is added, allow NLM calls under TLS-secured -policy. - -Fixes: 4cc9b9f2bf4d ("nfsd: refine and rename NFSD_MAY_LOCK") -Cc: stable@vger.kernel.org -Signed-off-by: Olga Kornievskaia -Reviewed-by: NeilBrown -Reviewed-by: Jeff Layton -Signed-off-by: Chuck Lever ---- - fs/nfsd/export.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/fs/nfsd/export.c -+++ b/fs/nfsd/export.c -@@ -1124,7 +1124,8 @@ __be32 check_nfsd_access(struct svc_expo - test_bit(XPT_PEER_AUTH, &xprt->xpt_flags)) - goto ok; - } -- goto denied; -+ if (!may_bypass_gss) -+ goto denied; - - ok: - /* legacy gss-only clients are always OK: */ diff --git a/debian/patches/patchset-pf/nfs/0004-nfsd-nfsd4_spo_must_allow-must-check-this-is-a-v4-co.patch b/debian/patches/patchset-pf/nfs/0004-nfsd-nfsd4_spo_must_allow-must-check-this-is-a-v4-co.patch deleted file mode 100644 index db09dbc..0000000 --- a/debian/patches/patchset-pf/nfs/0004-nfsd-nfsd4_spo_must_allow-must-check-this-is-a-v4-co.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 2fa924062a9494772cd997cb8b1ec572cfe6490f Mon Sep 17 00:00:00 2001 -From: NeilBrown -Date: Fri, 28 Mar 2025 11:05:59 +1100 -Subject: nfsd: nfsd4_spo_must_allow() must check this is a v4 compound request - -If the request being processed is not a v4 compound request, then -examining the cstate can have undefined results. - -This patch adds a check that the rpc procedure being executed -(rq_procinfo) is the NFSPROC4_COMPOUND procedure. - -Reported-by: Olga Kornievskaia -Cc: stable@vger.kernel.org -Reviewed-by: Jeff Layton -Signed-off-by: NeilBrown -Signed-off-by: Chuck Lever ---- - fs/nfsd/nfs4proc.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/fs/nfsd/nfs4proc.c -+++ b/fs/nfsd/nfs4proc.c -@@ -3766,7 +3766,8 @@ bool nfsd4_spo_must_allow(struct svc_rqs - struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow; - u32 opiter; - -- if (!cstate->minorversion) -+ if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] || -+ cstate->minorversion == 0) - return false; - - if (cstate->spo_must_allowed) diff --git a/debian/patches/patchset-pf/nfs/0005-nfsd-Initialize-ssc-before-laundromat_work-to-preven.patch b/debian/patches/patchset-pf/nfs/0005-nfsd-Initialize-ssc-before-laundromat_work-to-preven.patch deleted file mode 100644 index 0a494d1..0000000 --- a/debian/patches/patchset-pf/nfs/0005-nfsd-Initialize-ssc-before-laundromat_work-to-preven.patch +++ /dev/null @@ -1,47 +0,0 @@ -From c860b8340bf921de66aa7871f40507dd5628926f Mon Sep 17 00:00:00 2001 -From: Li Lingfeng -Date: Mon, 14 Apr 2025 22:38:52 +0800 -Subject: nfsd: Initialize ssc before laundromat_work to prevent NULL - dereference - -In nfs4_state_start_net(), laundromat_work may access nfsd_ssc through -nfs4_laundromat -> nfsd4_ssc_expire_umount. If nfsd_ssc isn't initialized, -this can cause NULL pointer dereference. - -Normally the delayed start of laundromat_work allows sufficient time for -nfsd_ssc initialization to complete. However, when the kernel waits too -long for userspace responses (e.g. in nfs4_state_start_net -> -nfsd4_end_grace -> nfsd4_record_grace_done -> nfsd4_cld_grace_done -> -cld_pipe_upcall -> __cld_pipe_upcall -> wait_for_completion path), the -delayed work may start before nfsd_ssc initialization finishes. - -Fix this by moving nfsd_ssc initialization before starting laundromat_work. - -Fixes: f4e44b393389 ("NFSD: delay unmount source's export after inter-server copy completed.") -Cc: stable@vger.kernel.org -Reviewed-by: Jeff Layton -Signed-off-by: Li Lingfeng -Signed-off-by: Chuck Lever ---- - fs/nfsd/nfssvc.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - ---- a/fs/nfsd/nfssvc.c -+++ b/fs/nfsd/nfssvc.c -@@ -396,13 +396,13 @@ static int nfsd_startup_net(struct net * - if (ret) - goto out_filecache; - -+#ifdef CONFIG_NFSD_V4_2_INTER_SSC -+ nfsd4_ssc_init_umount_work(nn); -+#endif - ret = nfs4_state_start_net(net); - if (ret) - goto out_reply_cache; - --#ifdef CONFIG_NFSD_V4_2_INTER_SSC -- nfsd4_ssc_init_umount_work(nn); --#endif - nn->nfsd_net_up = true; - return 0; - diff --git a/debian/patches/patchset-pf/nfs/0006-NFSD-Implement-FATTR4_CLONE_BLKSIZE-attribute.patch b/debian/patches/patchset-pf/nfs/0006-NFSD-Implement-FATTR4_CLONE_BLKSIZE-attribute.patch deleted file mode 100644 index 17b3b56..0000000 --- a/debian/patches/patchset-pf/nfs/0006-NFSD-Implement-FATTR4_CLONE_BLKSIZE-attribute.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 01089ae8fff5bcc6e9949d50d76b70f2a16abe89 Mon Sep 17 00:00:00 2001 -From: Chuck Lever -Date: Wed, 7 May 2025 10:45:15 -0400 -Subject: NFSD: Implement FATTR4_CLONE_BLKSIZE attribute - -RFC 7862 states that if an NFS server implements a CLONE operation, -it MUST also implement FATTR4_CLONE_BLKSIZE. NFSD implements CLONE, -but does not implement FATTR4_CLONE_BLKSIZE. - -Note that in Section 12.2, RFC 7862 claims that -FATTR4_CLONE_BLKSIZE is RECOMMENDED, not REQUIRED. Likely this is -because a minor version is not permitted to add a REQUIRED -attribute. Confusing. - -We assume this attribute reports a block size as a count of bytes, -as RFC 7862 does not specify a unit. - -Reported-by: Roland Mainz -Suggested-by: Christoph Hellwig -Reviewed-by: Roland Mainz -Cc: stable@vger.kernel.org # v6.7+ -Reviewed-by: Jeff Layton -Signed-off-by: Chuck Lever ---- - fs/nfsd/nfs4xdr.c | 19 ++++++++++++++++++- - 1 file changed, 18 insertions(+), 1 deletion(-) - ---- a/fs/nfsd/nfs4xdr.c -+++ b/fs/nfsd/nfs4xdr.c -@@ -3391,6 +3391,23 @@ static __be32 nfsd4_encode_fattr4_suppat - return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]); - } - -+/* -+ * Copied from generic_remap_checks/generic_remap_file_range_prep. -+ * -+ * These generic functions use the file system's s_blocksize, but -+ * individual file systems aren't required to use -+ * generic_remap_file_range_prep. Until there is a mechanism for -+ * determining a particular file system's (or file's) clone block -+ * size, this is the best NFSD can do. -+ */ -+static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr, -+ const struct nfsd4_fattr_args *args) -+{ -+ struct inode *inode = d_inode(args->dentry); -+ -+ return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize); -+} -+ - #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr, - const struct nfsd4_fattr_args *args) -@@ -3545,7 +3562,7 @@ static const nfsd4_enc_attr nfsd4_enc_fa - [FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop, - [FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat, - [FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop, -- [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4__noop, -+ [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize, - [FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop, - [FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop, - diff --git a/debian/patches/patchset-pf/nfs/0007-fs-nfs-read-fix-double-unlock-bug-in-nfs_return_empt.patch b/debian/patches/patchset-pf/nfs/0007-fs-nfs-read-fix-double-unlock-bug-in-nfs_return_empt.patch deleted file mode 100644 index 002eeb2..0000000 --- a/debian/patches/patchset-pf/nfs/0007-fs-nfs-read-fix-double-unlock-bug-in-nfs_return_empt.patch +++ /dev/null @@ -1,65 +0,0 @@ -From e0246422dfc08dec0fc3c96f3201bab6ceec6774 Mon Sep 17 00:00:00 2001 -From: Max Kellermann -Date: Wed, 23 Apr 2025 15:22:50 +0200 -Subject: fs/nfs/read: fix double-unlock bug in nfs_return_empty_folio() - -Sometimes, when a file was read while it was being truncated by -another NFS client, the kernel could deadlock because folio_unlock() -was called twice, and the second call would XOR back the `PG_locked` -flag. - -Most of the time (depending on the timing of the truncation), nobody -notices the problem because folio_unlock() gets called three times, -which flips `PG_locked` back off: - - 1. vfs_read, nfs_read_folio, ... nfs_read_add_folio, - nfs_return_empty_folio - 2. vfs_read, nfs_read_folio, ... netfs_read_collection, - netfs_unlock_abandoned_read_pages - 3. vfs_read, ... nfs_do_read_folio, nfs_read_add_folio, - nfs_return_empty_folio - -The problem is that nfs_read_add_folio() is not supposed to unlock the -folio if fscache is enabled, and a nfs_netfs_folio_unlock() check is -missing in nfs_return_empty_folio(). - -Rarely this leads to a warning in netfs_read_collection(): - - ------------[ cut here ]------------ - R=0000031c: folio 10 is not locked - WARNING: CPU: 0 PID: 29 at fs/netfs/read_collect.c:133 netfs_read_collection+0x7c0/0xf00 - [...] - Workqueue: events_unbound netfs_read_collection_worker - RIP: 0010:netfs_read_collection+0x7c0/0xf00 - [...] - Call Trace: - - netfs_read_collection_worker+0x67/0x80 - process_one_work+0x12e/0x2c0 - worker_thread+0x295/0x3a0 - -Most of the time, however, processes just get stuck forever in -folio_wait_bit_common(), waiting for `PG_locked` to disappear, which -never happens because nobody is really holding the folio lock. - -Fixes: 000dbe0bec05 ("NFS: Convert buffered read paths to use netfs when fscache is enabled") -Cc: stable@vger.kernel.org -Signed-off-by: Max Kellermann -Reviewed-by: Dave Wysochanski -Signed-off-by: Anna Schumaker ---- - fs/nfs/read.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/fs/nfs/read.c -+++ b/fs/nfs/read.c -@@ -56,7 +56,8 @@ static int nfs_return_empty_folio(struct - { - folio_zero_segment(folio, 0, folio_size(folio)); - folio_mark_uptodate(folio); -- folio_unlock(folio); -+ if (nfs_netfs_folio_unlock(folio)) -+ folio_unlock(folio); - return 0; - } - diff --git a/debian/patches/patchset-pf/nfs/0008-NFSv4-Don-t-check-for-OPEN-feature-support-in-v4.1.patch b/debian/patches/patchset-pf/nfs/0008-NFSv4-Don-t-check-for-OPEN-feature-support-in-v4.1.patch deleted file mode 100644 index bb70674..0000000 --- a/debian/patches/patchset-pf/nfs/0008-NFSv4-Don-t-check-for-OPEN-feature-support-in-v4.1.patch +++ /dev/null @@ -1,32 +0,0 @@ -From d9f4762296075cc67d9974d093a87064075853e1 Mon Sep 17 00:00:00 2001 -From: Scott Mayhew -Date: Wed, 30 Apr 2025 07:12:29 -0400 -Subject: NFSv4: Don't check for OPEN feature support in v4.1 - -fattr4_open_arguments is a v4.2 recommended attribute, so we shouldn't -be sending it to v4.1 servers. - -Fixes: cb78f9b7d0c0 ("nfs: fix the fetch of FATTR4_OPEN_ARGUMENTS") -Signed-off-by: Scott Mayhew -Reviewed-by: Jeff Layton -Reviewed-by: Benjamin Coddington -Cc: stable@vger.kernel.org # 6.11+ -Signed-off-by: Anna Schumaker ---- - fs/nfs/nfs4proc.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - ---- a/fs/nfs/nfs4proc.c -+++ b/fs/nfs/nfs4proc.c -@@ -3976,8 +3976,9 @@ static int _nfs4_server_capabilities(str - FATTR4_WORD0_CASE_INSENSITIVE | - FATTR4_WORD0_CASE_PRESERVING; - if (minorversion) -- bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT | -- FATTR4_WORD2_OPEN_ARGUMENTS; -+ bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; -+ if (minorversion > 1) -+ bitmask[2] |= FATTR4_WORD2_OPEN_ARGUMENTS; - - status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); - if (status == 0) { diff --git a/debian/patches/patchset-pf/nfs/0009-NFS-always-probe-for-LOCALIO-support-asynchronously.patch b/debian/patches/patchset-pf/nfs/0009-NFS-always-probe-for-LOCALIO-support-asynchronously.patch deleted file mode 100644 index 2c75969..0000000 --- a/debian/patches/patchset-pf/nfs/0009-NFS-always-probe-for-LOCALIO-support-asynchronously.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 7147868788966e9032cdeb0cf33bd1ae47785088 Mon Sep 17 00:00:00 2001 -From: Mike Snitzer -Date: Tue, 13 May 2025 12:08:31 -0400 -Subject: NFS: always probe for LOCALIO support asynchronously - -It was reported that NFS client mounts of AWS Elastic File System -(EFS) volumes is slow, this is because the AWS firewall disallows -LOCALIO (because it doesn't consider the use of NFS_LOCALIO_PROGRAM -valid), see: https://bugzilla.redhat.com/show_bug.cgi?id=2335129 - -Switch to performing the LOCALIO probe asynchronously to address the -potential for the NFS LOCALIO protocol being disallowed and/or slowed -by the remote server's response. - -While at it, fix nfs_local_probe_async() to always take/put a -reference on the nfs_client that is using the LOCALIO protocol. -Also, unexport the nfs_local_probe() symbol and make it private to -fs/nfs/localio.c - -This change has the side-effect of initially issuing reads, writes and -commits over the wire via SUNRPC until the LOCALIO probe completes. - -Suggested-by: Jeff Layton # to always probe async -Fixes: 76d4cb6345da ("nfs: probe for LOCALIO when v4 client reconnects to server") -Cc: stable@vger.kernel.org # 6.14+ -Signed-off-by: Mike Snitzer -Reviewed-by: Jeff Layton -Signed-off-by: Anna Schumaker ---- - fs/nfs/client.c | 2 +- - fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- - fs/nfs/internal.h | 1 - - fs/nfs/localio.c | 6 ++++-- - 4 files changed, 6 insertions(+), 5 deletions(-) - ---- a/fs/nfs/client.c -+++ b/fs/nfs/client.c -@@ -439,7 +439,7 @@ struct nfs_client *nfs_get_client(const - spin_unlock(&nn->nfs_client_lock); - new = rpc_ops->init_client(new, cl_init); - if (!IS_ERR(new)) -- nfs_local_probe(new); -+ nfs_local_probe_async(new); - return new; - } - ---- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c -+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c -@@ -400,7 +400,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_la - * keep ds_clp even if DS is local, so that if local IO cannot - * proceed somehow, we can fall back to NFS whenever we want. - */ -- nfs_local_probe(ds->ds_clp); -+ nfs_local_probe_async(ds->ds_clp); - max_payload = - nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), - NULL); ---- a/fs/nfs/internal.h -+++ b/fs/nfs/internal.h -@@ -455,7 +455,6 @@ extern int nfs_wait_bit_killable(struct - - #if IS_ENABLED(CONFIG_NFS_LOCALIO) - /* localio.c */ --extern void nfs_local_probe(struct nfs_client *); - extern void nfs_local_probe_async(struct nfs_client *); - extern void nfs_local_probe_async_work(struct work_struct *); - extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *, ---- a/fs/nfs/localio.c -+++ b/fs/nfs/localio.c -@@ -171,7 +171,7 @@ static bool nfs_server_uuid_is_local(str - * - called after alloc_client and init_client (so cl_rpcclient exists) - * - this function is idempotent, it can be called for old or new clients - */ --void nfs_local_probe(struct nfs_client *clp) -+static void nfs_local_probe(struct nfs_client *clp) - { - /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */ - if (!localio_enabled || -@@ -191,14 +191,16 @@ void nfs_local_probe(struct nfs_client * - nfs_localio_enable_client(clp); - nfs_uuid_end(&clp->cl_uuid); - } --EXPORT_SYMBOL_GPL(nfs_local_probe); - - void nfs_local_probe_async_work(struct work_struct *work) - { - struct nfs_client *clp = - container_of(work, struct nfs_client, cl_local_probe_work); - -+ if (!refcount_inc_not_zero(&clp->cl_count)) -+ return; - nfs_local_probe(clp); -+ nfs_put_client(clp); - } - - void nfs_local_probe_async(struct nfs_client *clp) diff --git a/debian/patches/patchset-pf/smb/0001-smb-client-add-NULL-check-in-automount_fullpath.patch b/debian/patches/patchset-pf/smb/0001-smb-client-add-NULL-check-in-automount_fullpath.patch deleted file mode 100644 index b6f5185..0000000 --- a/debian/patches/patchset-pf/smb/0001-smb-client-add-NULL-check-in-automount_fullpath.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 97831e31e43bb023d208b2344546a4e51e580dc6 Mon Sep 17 00:00:00 2001 -From: Ruben Devos -Date: Sun, 1 Jun 2025 19:18:55 +0200 -Subject: smb: client: add NULL check in automount_fullpath - -page is checked for null in __build_path_from_dentry_optional_prefix -when tcon->origin_fullpath is not set. However, the check is missing when -it is set. -Add a check to prevent a potential NULL pointer dereference. - -Signed-off-by: Ruben Devos -Cc: stable@vger.kernel.org -Signed-off-by: Steve French ---- - fs/smb/client/namespace.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/fs/smb/client/namespace.c -+++ b/fs/smb/client/namespace.c -@@ -146,6 +146,9 @@ static char *automount_fullpath(struct d - } - spin_unlock(&tcon->tc_lock); - -+ if (unlikely(!page)) -+ return ERR_PTR(-ENOMEM); -+ - s = dentry_path_raw(dentry, page, PATH_MAX); - if (IS_ERR(s)) - return s; diff --git a/debian/patches/patchset-pf/smb/0002-cifs-reset-connections-for-all-channels-when-reconne.patch b/debian/patches/patchset-pf/smb/0002-cifs-reset-connections-for-all-channels-when-reconne.patch deleted file mode 100644 index f75147c..0000000 --- a/debian/patches/patchset-pf/smb/0002-cifs-reset-connections-for-all-channels-when-reconne.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 0ca6d39b6d40b868eb6b4021f918de7a0f6a0f2e Mon Sep 17 00:00:00 2001 -From: Shyam Prasad N -Date: Mon, 2 Jun 2025 22:37:13 +0530 -Subject: cifs: reset connections for all channels when reconnect requested - -cifs_reconnect can be called with a flag to mark the session as needing -reconnect too. When this is done, we expect the connections of all -channels to be reconnected too, which is not happening today. - -Without doing this, we have seen bad things happen when primary and -secondary channels are connected to different servers (in case of cloud -services like Azure Files SMB). - -This change would force all connections to reconnect as well, not just -the sessions and tcons. - -Cc: -Signed-off-by: Shyam Prasad N -Signed-off-by: Steve French ---- - fs/smb/client/connect.c | 7 +++++++ - 1 file changed, 7 insertions(+) - ---- a/fs/smb/client/connect.c -+++ b/fs/smb/client/connect.c -@@ -377,6 +377,13 @@ static int __cifs_reconnect(struct TCP_S - if (!cifs_tcp_ses_needs_reconnect(server, 1)) - return 0; - -+ /* -+ * if smb session has been marked for reconnect, also reconnect all -+ * connections. This way, the other connections do not end up bad. -+ */ -+ if (mark_smb_session) -+ cifs_signal_cifsd_for_reconnect(server, mark_smb_session); -+ - cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session); - - cifs_abort_connection(server); diff --git a/debian/patches/patchset-pf/smb/0003-cifs-update-dstaddr-whenever-channel-iface-is-update.patch b/debian/patches/patchset-pf/smb/0003-cifs-update-dstaddr-whenever-channel-iface-is-update.patch deleted file mode 100644 index 23b12a7..0000000 --- a/debian/patches/patchset-pf/smb/0003-cifs-update-dstaddr-whenever-channel-iface-is-update.patch +++ /dev/null @@ -1,31 +0,0 @@ -From d1f84c6baebc480106c9558dea4842ecb3059017 Mon Sep 17 00:00:00 2001 -From: Shyam Prasad N -Date: Mon, 2 Jun 2025 22:37:14 +0530 -Subject: cifs: update dstaddr whenever channel iface is updated - -When the server interface info changes (more common in clustered -servers like Azure Files), the per-channel iface gets updated. -However, this did not update the corresponding dstaddr. As a result -these channels will still connect (or try connecting) to older addresses. - -Fixes: b54034a73baf ("cifs: during reconnect, update interface if necessary") -Cc: -Signed-off-by: Shyam Prasad N -Signed-off-by: Steve French ---- - fs/smb/client/sess.c | 4 ++++ - 1 file changed, 4 insertions(+) - ---- a/fs/smb/client/sess.c -+++ b/fs/smb/client/sess.c -@@ -445,6 +445,10 @@ cifs_chan_update_iface(struct cifs_ses * - - ses->chans[chan_index].iface = iface; - spin_unlock(&ses->chan_lock); -+ -+ spin_lock(&server->srv_lock); -+ memcpy(&server->dstaddr, &iface->sockaddr, sizeof(server->dstaddr)); -+ spin_unlock(&server->srv_lock); - } - - static int diff --git a/debian/patches/patchset-pf/smb/0004-cifs-dns-resolution-is-needed-only-for-primary-chann.patch b/debian/patches/patchset-pf/smb/0004-cifs-dns-resolution-is-needed-only-for-primary-chann.patch deleted file mode 100644 index 99c65dc..0000000 --- a/debian/patches/patchset-pf/smb/0004-cifs-dns-resolution-is-needed-only-for-primary-chann.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 2bffd71a70fa4695f62712688a720393cc92032b Mon Sep 17 00:00:00 2001 -From: Shyam Prasad N -Date: Mon, 2 Jun 2025 22:37:16 +0530 -Subject: cifs: dns resolution is needed only for primary channel - -When calling cifs_reconnect, before the connection to the -server is reestablished, the code today does a DNS resolution and -updates server->dstaddr. - -However, this is not necessary for secondary channels. Secondary -channels use the interface list returned by the server to decide -which address to connect to. And that happens after tcon is reconnected -and server interfaces are requested. - -Signed-off-by: Shyam Prasad N -Cc: stable@vger.kernel.org -Signed-off-by: Steve French ---- - fs/smb/client/connect.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/fs/smb/client/connect.c -+++ b/fs/smb/client/connect.c -@@ -392,7 +392,8 @@ static int __cifs_reconnect(struct TCP_S - try_to_freeze(); - cifs_server_lock(server); - -- if (!cifs_swn_set_server_dstaddr(server)) { -+ if (!cifs_swn_set_server_dstaddr(server) && -+ !SERVER_IS_CHAN(server)) { - /* resolve the hostname again to make sure that IP address is up-to-date */ - rc = reconn_set_ipaddr_from_hostname(server); - cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc); diff --git a/debian/patches/patchset-pf/smb/0005-cifs-deal-with-the-channel-loading-lag-while-picking.patch b/debian/patches/patchset-pf/smb/0005-cifs-deal-with-the-channel-loading-lag-while-picking.patch deleted file mode 100644 index a824997..0000000 --- a/debian/patches/patchset-pf/smb/0005-cifs-deal-with-the-channel-loading-lag-while-picking.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 918f494c058028cee8bdff33a4aa613377da61f0 Mon Sep 17 00:00:00 2001 -From: Shyam Prasad N -Date: Mon, 2 Jun 2025 22:37:12 +0530 -Subject: cifs: deal with the channel loading lag while picking channels - -Our current approach to select a channel for sending requests is this: -1. iterate all channels to find the min and max queue depth -2. if min and max are not the same, pick the channel with min depth -3. if min and max are same, round robin, as all channels are equally loaded - -The problem with this approach is that there's a lag between selecting -a channel and sending the request (that increases the queue depth on the channel). -While these numbers will eventually catch up, there could be a skew in the -channel usage, depending on the application's I/O parallelism and the server's -speed of handling requests. - -With sufficient parallelism, this lag can artificially increase the queue depth, -thereby impacting the performance negatively. - -This change will change the step 1 above to start the iteration from the last -selected channel. This is to reduce the skew in channel usage even in the presence -of this lag. - -Fixes: ea90708d3cf3 ("cifs: use the least loaded channel for sending requests") -Cc: -Signed-off-by: Shyam Prasad N -Signed-off-by: Steve French ---- - fs/smb/client/transport.c | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - ---- a/fs/smb/client/transport.c -+++ b/fs/smb/client/transport.c -@@ -1018,14 +1018,16 @@ struct TCP_Server_Info *cifs_pick_channe - uint index = 0; - unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; - struct TCP_Server_Info *server = NULL; -- int i; -+ int i, start, cur; - - if (!ses) - return NULL; - - spin_lock(&ses->chan_lock); -+ start = atomic_inc_return(&ses->chan_seq); - for (i = 0; i < ses->chan_count; i++) { -- server = ses->chans[i].server; -+ cur = (start + i) % ses->chan_count; -+ server = ses->chans[cur].server; - if (!server || server->terminate) - continue; - -@@ -1042,17 +1044,15 @@ struct TCP_Server_Info *cifs_pick_channe - */ - if (server->in_flight < min_in_flight) { - min_in_flight = server->in_flight; -- index = i; -+ index = cur; - } - if (server->in_flight > max_in_flight) - max_in_flight = server->in_flight; - } - - /* if all channels are equally loaded, fall back to round-robin */ -- if (min_in_flight == max_in_flight) { -- index = (uint)atomic_inc_return(&ses->chan_seq); -- index %= ses->chan_count; -- } -+ if (min_in_flight == max_in_flight) -+ index = (uint)start % ses->chan_count; - - server = ses->chans[index].server; - spin_unlock(&ses->chan_lock); diff --git a/debian/patches/patchset-pf/smb/0006-cifs-serialize-other-channels-when-query-server-inte.patch b/debian/patches/patchset-pf/smb/0006-cifs-serialize-other-channels-when-query-server-inte.patch deleted file mode 100644 index 2afe01e..0000000 --- a/debian/patches/patchset-pf/smb/0006-cifs-serialize-other-channels-when-query-server-inte.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 2cc6528030c91406031698e047896faa99fc0092 Mon Sep 17 00:00:00 2001 -From: Shyam Prasad N -Date: Mon, 2 Jun 2025 22:37:15 +0530 -Subject: cifs: serialize other channels when query server interfaces is - pending - -Today, during smb2_reconnect, session_mutex is released as soon as -the tcon is reconnected and is in a good state. However, in case -multichannel is enabled, there is also a query of server interfaces that -follows. We've seen that this query can race with reconnects of other -channels, causing them to step on each other with reconnects. - -This change extends the hold of session_mutex till after the query of -server interfaces is complete. In order to avoid recursive smb2_reconnect -checks during query ioctl, this change also introduces a session flag -for sessions where such a query is in progress. - -Signed-off-by: Shyam Prasad N -Cc: stable@vger.kernel.org -Signed-off-by: Steve French ---- - fs/smb/client/cifsglob.h | 1 + - fs/smb/client/smb2pdu.c | 24 ++++++++++++++++++------ - 2 files changed, 19 insertions(+), 6 deletions(-) - ---- a/fs/smb/client/cifsglob.h -+++ b/fs/smb/client/cifsglob.h -@@ -1084,6 +1084,7 @@ struct cifs_chan { - }; - - #define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) -+#define CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES (0x2) - - /* - * Session structure. One of these for each uid session with a particular host ---- a/fs/smb/client/smb2pdu.c -+++ b/fs/smb/client/smb2pdu.c -@@ -411,14 +411,19 @@ skip_sess_setup: - if (!rc && - (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL) && - server->ops->query_server_interfaces) { -- mutex_unlock(&ses->session_mutex); -- - /* -- * query server network interfaces, in case they change -+ * query server network interfaces, in case they change. -+ * Also mark the session as pending this update while the query -+ * is in progress. This will be used to avoid calling -+ * smb2_reconnect recursively. - */ -+ ses->flags |= CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; - xid = get_xid(); - rc = server->ops->query_server_interfaces(xid, tcon, false); - free_xid(xid); -+ ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; -+ -+ mutex_unlock(&ses->session_mutex); - - if (rc == -EOPNOTSUPP && ses->chan_count > 1) { - /* -@@ -560,11 +565,18 @@ static int smb2_ioctl_req_init(u32 opcod - struct TCP_Server_Info *server, - void **request_buf, unsigned int *total_len) - { -- /* Skip reconnect only for FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs */ -- if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) { -+ /* -+ * Skip reconnect in one of the following cases: -+ * 1. For FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs -+ * 2. For FSCTL_QUERY_NETWORK_INTERFACE_INFO IOCTL when called from -+ * smb2_reconnect (indicated by CIFS_SES_FLAG_SCALE_CHANNELS ses flag) -+ */ -+ if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO || -+ (opcode == FSCTL_QUERY_NETWORK_INTERFACE_INFO && -+ (tcon->ses->flags & CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES))) - return __smb2_plain_req_init(SMB2_IOCTL, tcon, server, - request_buf, total_len); -- } -+ - return smb2_plain_req_init(SMB2_IOCTL, tcon, server, - request_buf, total_len); - } diff --git a/debian/patches/patchset-pf/smb/0007-cifs-do-not-disable-interface-polling-on-failure.patch b/debian/patches/patchset-pf/smb/0007-cifs-do-not-disable-interface-polling-on-failure.patch deleted file mode 100644 index 2e6535f..0000000 --- a/debian/patches/patchset-pf/smb/0007-cifs-do-not-disable-interface-polling-on-failure.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 48fd713e7c35aba7a4c3ed327977897909575e3e Mon Sep 17 00:00:00 2001 -From: Shyam Prasad N -Date: Mon, 2 Jun 2025 22:37:17 +0530 -Subject: cifs: do not disable interface polling on failure - -When a server has multichannel enabled, we keep polling the server -for interfaces periodically. However, when this query fails, we -disable the polling. This can be problematic as it takes away the -chance for the server to start advertizing again. - -This change reschedules the delayed work, even if the current call -failed. That way, multichannel sessions can recover. - -Signed-off-by: Shyam Prasad N -Cc: stable@vger.kernel.org -Signed-off-by: Steve French ---- - fs/smb/client/connect.c | 6 +----- - fs/smb/client/smb2pdu.c | 9 +++++---- - 2 files changed, 6 insertions(+), 9 deletions(-) - ---- a/fs/smb/client/connect.c -+++ b/fs/smb/client/connect.c -@@ -116,13 +116,9 @@ static void smb2_query_server_interfaces - rc = server->ops->query_server_interfaces(xid, tcon, false); - free_xid(xid); - -- if (rc) { -- if (rc == -EOPNOTSUPP) -- return; -- -+ if (rc) - cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", - __func__, rc); -- } - - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); ---- a/fs/smb/client/smb2pdu.c -+++ b/fs/smb/client/smb2pdu.c -@@ -423,6 +423,10 @@ skip_sess_setup: - free_xid(xid); - ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; - -+ /* regardless of rc value, setup polling */ -+ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, -+ (SMB_INTERFACE_POLL_INTERVAL * HZ)); -+ - mutex_unlock(&ses->session_mutex); - - if (rc == -EOPNOTSUPP && ses->chan_count > 1) { -@@ -443,11 +447,8 @@ skip_sess_setup: - if (ses->chan_max > ses->chan_count && - ses->iface_count && - !SERVER_IS_CHAN(server)) { -- if (ses->chan_count == 1) { -+ if (ses->chan_count == 1) - cifs_server_dbg(VFS, "supports multichannel now\n"); -- queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, -- (SMB_INTERFACE_POLL_INTERVAL * HZ)); -- } - - cifs_try_adding_channels(ses); - } diff --git a/debian/patches/patchset-pf/smb/0008-smb-improve-directory-cache-reuse-for-readdir-operat.patch b/debian/patches/patchset-pf/smb/0008-smb-improve-directory-cache-reuse-for-readdir-operat.patch deleted file mode 100644 index 355bd11..0000000 --- a/debian/patches/patchset-pf/smb/0008-smb-improve-directory-cache-reuse-for-readdir-operat.patch +++ /dev/null @@ -1,148 +0,0 @@ -From 17457c5d0fa0b98cef9d2236a1518b1ded25fa5d Mon Sep 17 00:00:00 2001 -From: Bharath SM -Date: Wed, 11 Jun 2025 16:59:02 +0530 -Subject: smb: improve directory cache reuse for readdir operations - -Currently, cached directory contents were not reused across subsequent -'ls' operations because the cache validity check relied on comparing -the ctx pointer, which changes with each readdir invocation. As a -result, the cached dir entries was not marked as valid and the cache was -not utilized for subsequent 'ls' operations. - -This change uses the file pointer, which remains consistent across all -readdir calls for a given directory instance, to associate and validate -the cache. As a result, cached directory contents can now be -correctly reused, improving performance for repeated directory listings. - -Performance gains with local windows SMB server: - -Without the patch and default actimeo=1: - 1000 directory enumeration operations on dir with 10k files took 135.0s - -With this patch and actimeo=0: - 1000 directory enumeration operations on dir with 10k files took just 5.1s - -Signed-off-by: Bharath SM -Reviewed-by: Shyam Prasad N -Cc: stable@vger.kernel.org -Signed-off-by: Steve French ---- - fs/smb/client/cached_dir.h | 8 ++++---- - fs/smb/client/readdir.c | 28 +++++++++++++++------------- - 2 files changed, 19 insertions(+), 17 deletions(-) - ---- a/fs/smb/client/cached_dir.h -+++ b/fs/smb/client/cached_dir.h -@@ -21,10 +21,10 @@ struct cached_dirent { - struct cached_dirents { - bool is_valid:1; - bool is_failed:1; -- struct dir_context *ctx; /* -- * Only used to make sure we only take entries -- * from a single context. Never dereferenced. -- */ -+ struct file *file; /* -+ * Used to associate the cache with a single -+ * open file instance. -+ */ - struct mutex de_mutex; - int pos; /* Expected ctx->pos */ - struct list_head entries; ---- a/fs/smb/client/readdir.c -+++ b/fs/smb/client/readdir.c -@@ -850,9 +850,9 @@ static bool emit_cached_dirents(struct c - } - - static void update_cached_dirents_count(struct cached_dirents *cde, -- struct dir_context *ctx) -+ struct file *file) - { -- if (cde->ctx != ctx) -+ if (cde->file != file) - return; - if (cde->is_valid || cde->is_failed) - return; -@@ -861,9 +861,9 @@ static void update_cached_dirents_count( - } - - static void finished_cached_dirents_count(struct cached_dirents *cde, -- struct dir_context *ctx) -+ struct dir_context *ctx, struct file *file) - { -- if (cde->ctx != ctx) -+ if (cde->file != file) - return; - if (cde->is_valid || cde->is_failed) - return; -@@ -876,11 +876,12 @@ static void finished_cached_dirents_coun - static void add_cached_dirent(struct cached_dirents *cde, - struct dir_context *ctx, - const char *name, int namelen, -- struct cifs_fattr *fattr) -+ struct cifs_fattr *fattr, -+ struct file *file) - { - struct cached_dirent *de; - -- if (cde->ctx != ctx) -+ if (cde->file != file) - return; - if (cde->is_valid || cde->is_failed) - return; -@@ -910,7 +911,8 @@ static void add_cached_dirent(struct cac - static bool cifs_dir_emit(struct dir_context *ctx, - const char *name, int namelen, - struct cifs_fattr *fattr, -- struct cached_fid *cfid) -+ struct cached_fid *cfid, -+ struct file *file) - { - bool rc; - ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); -@@ -922,7 +924,7 @@ static bool cifs_dir_emit(struct dir_con - if (cfid) { - mutex_lock(&cfid->dirents.de_mutex); - add_cached_dirent(&cfid->dirents, ctx, name, namelen, -- fattr); -+ fattr, file); - mutex_unlock(&cfid->dirents.de_mutex); - } - -@@ -1022,7 +1024,7 @@ static int cifs_filldir(char *find_entry - cifs_prime_dcache(file_dentry(file), &name, &fattr); - - return !cifs_dir_emit(ctx, name.name, name.len, -- &fattr, cfid); -+ &fattr, cfid, file); - } - - -@@ -1073,8 +1075,8 @@ int cifs_readdir(struct file *file, stru - * we need to initialize scanning and storing the - * directory content. - */ -- if (ctx->pos == 0 && cfid->dirents.ctx == NULL) { -- cfid->dirents.ctx = ctx; -+ if (ctx->pos == 0 && cfid->dirents.file == NULL) { -+ cfid->dirents.file = file; - cfid->dirents.pos = 2; - } - /* -@@ -1142,7 +1144,7 @@ int cifs_readdir(struct file *file, stru - } else { - if (cfid) { - mutex_lock(&cfid->dirents.de_mutex); -- finished_cached_dirents_count(&cfid->dirents, ctx); -+ finished_cached_dirents_count(&cfid->dirents, ctx, file); - mutex_unlock(&cfid->dirents.de_mutex); - } - cifs_dbg(FYI, "Could not find entry\n"); -@@ -1183,7 +1185,7 @@ int cifs_readdir(struct file *file, stru - ctx->pos++; - if (cfid) { - mutex_lock(&cfid->dirents.de_mutex); -- update_cached_dirents_count(&cfid->dirents, ctx); -+ update_cached_dirents_count(&cfid->dirents, file); - mutex_unlock(&cfid->dirents.de_mutex); - } - diff --git a/debian/patches/patchset-pf/smb/0009-ksmbd-fix-null-pointer-dereference-in-destroy_previo.patch b/debian/patches/patchset-pf/smb/0009-ksmbd-fix-null-pointer-dereference-in-destroy_previo.patch deleted file mode 100644 index 1a7b7f0..0000000 --- a/debian/patches/patchset-pf/smb/0009-ksmbd-fix-null-pointer-dereference-in-destroy_previo.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 9d330e139e9993f2489fcfe3048c8e737085646d Mon Sep 17 00:00:00 2001 -From: Namjae Jeon -Date: Fri, 13 Jun 2025 10:12:43 +0900 -Subject: ksmbd: fix null pointer dereference in destroy_previous_session - -If client set ->PreviousSessionId on kerberos session setup stage, -NULL pointer dereference error will happen. Since sess->user is not -set yet, It can pass the user argument as NULL to destroy_previous_session. -sess->user will be set in ksmbd_krb5_authenticate(). So this patch move -calling destroy_previous_session() after ksmbd_krb5_authenticate(). - -Cc: stable@vger.kernel.org -Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-27391 -Signed-off-by: Namjae Jeon -Signed-off-by: Steve French ---- - fs/smb/server/smb2pdu.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - ---- a/fs/smb/server/smb2pdu.c -+++ b/fs/smb/server/smb2pdu.c -@@ -1607,17 +1607,18 @@ static int krb5_authenticate(struct ksmb - out_len = work->response_sz - - (le16_to_cpu(rsp->SecurityBufferOffset) + 4); - -- /* Check previous session */ -- prev_sess_id = le64_to_cpu(req->PreviousSessionId); -- if (prev_sess_id && prev_sess_id != sess->id) -- destroy_previous_session(conn, sess->user, prev_sess_id); -- - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, - out_blob, &out_len); - if (retval) { - ksmbd_debug(SMB, "krb5 authentication failed\n"); - return -EINVAL; - } -+ -+ /* Check previous session */ -+ prev_sess_id = le64_to_cpu(req->PreviousSessionId); -+ if (prev_sess_id && prev_sess_id != sess->id) -+ destroy_previous_session(conn, sess->user, prev_sess_id); -+ - rsp->SecurityBufferLength = cpu_to_le16(out_len); - - if ((conn->sign || server_conf.enforced_signing) || diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch index 3fc123c..e48af1d 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0001-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch @@ -32,7 +32,7 @@ Signed-off-by: Alexandre Frade --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -3994,6 +3994,7 @@ static int tcp_ack(struct sock *sk, cons +@@ -4003,6 +4003,7 @@ static int tcp_ack(struct sock *sk, cons prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch index 3569dbc..57064a5 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0005-net-tcp_bbr-v2-export-FLAG_ECE-in-rate_sample.is_ece.patch @@ -28,7 +28,7 @@ Signed-off-by: Alexandre Frade struct tcp_congestion_ops { --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -4084,6 +4084,7 @@ static int tcp_ack(struct sock *sk, cons +@@ -4093,6 +4093,7 @@ static int tcp_ack(struct sock *sk, cons delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch index 33306b6..40b643c 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0006-net-tcp_bbr-v2-introduce-ca_ops-skb_marked_lost-CC-m.patch @@ -42,7 +42,7 @@ Signed-off-by: Alexandre Frade */ --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -1139,7 +1139,12 @@ static void tcp_verify_retransmit_hint(s +@@ -1135,7 +1135,12 @@ static void tcp_verify_retransmit_hint(s */ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) { diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch index 2ad901a..8a2526c 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0007-net-tcp_bbr-v2-adjust-skb-tx.in_flight-upon-merge-in.patch @@ -39,7 +39,7 @@ Signed-off-by: Alexandre Frade --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -1516,6 +1516,17 @@ static bool tcp_shifted_skb(struct sock +@@ -1512,6 +1512,17 @@ static bool tcp_shifted_skb(struct sock WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch index 4bc995d..35245fb 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0011-net-tcp-add-fast_ack_mode-1-skip-rwin-check-in-tcp_f.patch @@ -54,7 +54,7 @@ Signed-off-by: Alexandre Frade if (tcp_ca_needs_ecn(sk)) --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -5800,13 +5800,14 @@ static void __tcp_ack_snd_check(struct s +@@ -5809,13 +5809,14 @@ static void __tcp_ack_snd_check(struct s /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch index 8173be7..d1185f8 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0013-net-tcp_bbr-v2-inform-CC-module-of-losses-repaired-b.patch @@ -35,7 +35,7 @@ Signed-off-by: Alexandre Frade /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -3881,6 +3881,7 @@ static void tcp_process_tlp_ack(struct s +@@ -3890,6 +3890,7 @@ static void tcp_process_tlp_ack(struct s /* ACK advances: there was a loss, so reduce cwnd. Reset * tlp_high_seq in tcp_init_cwnd_reduction() */ diff --git a/debian/patches/patchset-xanmod/net/tcp/bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch b/debian/patches/patchset-xanmod/net/tcp/bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch index 33fe186..f35b65a 100644 --- a/debian/patches/patchset-xanmod/net/tcp/bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch +++ b/debian/patches/patchset-xanmod/net/tcp/bbr3/0014-net-tcp_bbr-v2-introduce-is_acking_tlp_retrans_seq-i.patch @@ -31,7 +31,7 @@ Signed-off-by: Alexandre Frade }; --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -3864,7 +3864,8 @@ static int tcp_replace_ts_recent(struct +@@ -3873,7 +3873,8 @@ static int tcp_replace_ts_recent(struct /* This routine deals with acks during a TLP episode and ends an episode by * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack */ @@ -41,7 +41,7 @@ Signed-off-by: Alexandre Frade { struct tcp_sock *tp = tcp_sk(sk); -@@ -3892,6 +3893,11 @@ static void tcp_process_tlp_ack(struct s +@@ -3901,6 +3902,11 @@ static void tcp_process_tlp_ack(struct s FLAG_NOT_DUP | FLAG_DATA_SACKED))) { /* Pure dupack: original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; @@ -53,7 +53,7 @@ Signed-off-by: Alexandre Frade } } -@@ -4077,7 +4083,7 @@ static int tcp_ack(struct sock *sk, cons +@@ -4086,7 +4092,7 @@ static int tcp_ack(struct sock *sk, cons tcp_in_ack_event(sk, flag); if (tp->tlp_high_seq) @@ -62,7 +62,7 @@ Signed-off-by: Alexandre Frade if (tcp_ack_is_dubious(sk, flag)) { if (!(flag & (FLAG_SND_UNA_ADVANCED | -@@ -4122,7 +4128,7 @@ no_queue: +@@ -4131,7 +4137,7 @@ no_queue: tcp_ack_probe(sk); if (tp->tlp_high_seq) diff --git a/debian/patches/patchset-xanmod/net/tcp/cloudflare/0001-tcp-Add-a-sysctl-to-skip-tcp-collapse-processing-whe.patch b/debian/patches/patchset-xanmod/net/tcp/cloudflare/0001-tcp-Add-a-sysctl-to-skip-tcp-collapse-processing-whe.patch index 72cfd44..9a25b9c 100644 --- a/debian/patches/patchset-xanmod/net/tcp/cloudflare/0001-tcp-Add-a-sysctl-to-skip-tcp-collapse-processing-whe.patch +++ b/debian/patches/patchset-xanmod/net/tcp/cloudflare/0001-tcp-Add-a-sysctl-to-skip-tcp-collapse-processing-whe.patch @@ -83,7 +83,7 @@ Signed-off-by: Alexandre Frade .maxlen = sizeof(u8), --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c -@@ -5682,6 +5682,7 @@ static bool tcp_prune_ofo_queue(struct s +@@ -5691,6 +5691,7 @@ static bool tcp_prune_ofo_queue(struct s static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -91,7 +91,7 @@ Signed-off-by: Alexandre Frade NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED); -@@ -5693,6 +5694,39 @@ static int tcp_prune_queue(struct sock * +@@ -5702,6 +5703,39 @@ static int tcp_prune_queue(struct sock * if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; @@ -131,7 +131,7 @@ Signed-off-by: Alexandre Frade tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) tcp_collapse(sk, &sk->sk_receive_queue, NULL, -@@ -5711,6 +5745,8 @@ static int tcp_prune_queue(struct sock * +@@ -5720,6 +5754,8 @@ static int tcp_prune_queue(struct sock * if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; diff --git a/debian/patches/patchset-xanmod/pci_acso/0001-PCI-Enable-overrides-for-missing-ACS-capabilities.patch b/debian/patches/patchset-xanmod/pci_acso/0001-PCI-Enable-overrides-for-missing-ACS-capabilities.patch index 872ede6..c8644d2 100644 --- a/debian/patches/patchset-xanmod/pci_acso/0001-PCI-Enable-overrides-for-missing-ACS-capabilities.patch +++ b/debian/patches/patchset-xanmod/pci_acso/0001-PCI-Enable-overrides-for-missing-ACS-capabilities.patch @@ -180,7 +180,7 @@ Signed-off-by: Alexandre Frade /* * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be * prevented for those affected devices. -@@ -5171,6 +5271,8 @@ static const struct pci_dev_acs_enabled +@@ -5194,6 +5294,8 @@ static const struct pci_dev_acs_enabled { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, diff --git a/debian/patches/patchset-zen/sauce/0002-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch b/debian/patches/patchset-zen/sauce/0002-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch index 55126fe..8a622da 100644 --- a/debian/patches/patchset-zen/sauce/0002-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch +++ b/debian/patches/patchset-zen/sauce/0002-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch @@ -94,7 +94,7 @@ Contains: -#endif --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c -@@ -1629,7 +1629,7 @@ static irqreturn_t ahci_thunderx_irq_han +@@ -1662,7 +1662,7 @@ static irqreturn_t ahci_thunderx_irq_han } #endif @@ -103,7 +103,7 @@ Contains: struct ahci_host_priv *hpriv) { int i; -@@ -1642,7 +1642,7 @@ static void ahci_remap_check(struct pci_ +@@ -1675,7 +1675,7 @@ static void ahci_remap_check(struct pci_ pci_resource_len(pdev, bar) < SZ_512K || bar != AHCI_PCI_BAR_STANDARD || !(readl(hpriv->mmio + AHCI_VSCAP) & 1)) @@ -112,7 +112,7 @@ Contains: cap = readq(hpriv->mmio + AHCI_REMAP_CAP); for (i = 0; i < AHCI_MAX_REMAP; i++) { -@@ -1657,18 +1657,11 @@ static void ahci_remap_check(struct pci_ +@@ -1690,18 +1690,11 @@ static void ahci_remap_check(struct pci_ } if (!hpriv->remapped_nvme) @@ -135,7 +135,7 @@ Contains: } static int ahci_get_irq_vector(struct ata_host *host, int port) -@@ -1912,7 +1905,9 @@ static int ahci_init_one(struct pci_dev +@@ -1945,7 +1938,9 @@ static int ahci_init_one(struct pci_dev return -ENOMEM; /* detect remapped nvme devices */ diff --git a/debian/patches/series b/debian/patches/series index 9fce1a7..c962ea4 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -129,9 +129,8 @@ misc-openwrt/0005-mac80211-minstrel_ht-reduce-fluctuations-in-rate-pro.patch misc-openwrt/0006-mac80211-minstrel_ht-rework-rate-downgrade-code-and-.patch misc-openwrt/0007-mac80211-increase-quantum-for-airtime-scheduler.patch misc-openwrt/0008-mac80211-add-AQL-support-for-broadcast-packets.patch -misc-openwrt/0009-mac80211-revert-dynamically-set-codel-parameters-per-station.patch -misc-openwrt/0010-mac80211-txq-tune.patch -misc-openwrt/0011-cfg80211-aql-txq-limit.patch +misc-openwrt/0009-mac80211-txq-tune.patch +misc-openwrt/0010-cfg80211-aql-txq-limit.patch misc-openwrt/0101-sched-sch_cake-fix-bulk-flow-accounting-logic-for-host.patch misc-openwrt/0201-fq-adjust-memory-size.patch @@ -140,26 +139,6 @@ patchset-pf/cpuidle/0001-cpuidle-Prefer-teo-over-menu-governor.patch patchset-pf/kbuild/0001-ice-mark-ice_write_prof_mask_reg-as-noinline.patch patchset-pf/kbuild/0002-wifi-mac80211-mark-copy_mesh_setup-as-noinline.patch -patchset-pf/nfs/0001-NFSD-unregister-filesystem-in-case-genl_register_fam.patch -patchset-pf/nfs/0002-NFSD-fix-race-between-nfsd-registration-and-exports_.patch -patchset-pf/nfs/0003-nfsd-fix-access-checking-for-NLM-under-XPRTSEC-polic.patch -patchset-pf/nfs/0004-nfsd-nfsd4_spo_must_allow-must-check-this-is-a-v4-co.patch -patchset-pf/nfs/0005-nfsd-Initialize-ssc-before-laundromat_work-to-preven.patch -patchset-pf/nfs/0006-NFSD-Implement-FATTR4_CLONE_BLKSIZE-attribute.patch -patchset-pf/nfs/0007-fs-nfs-read-fix-double-unlock-bug-in-nfs_return_empt.patch -patchset-pf/nfs/0008-NFSv4-Don-t-check-for-OPEN-feature-support-in-v4.1.patch -patchset-pf/nfs/0009-NFS-always-probe-for-LOCALIO-support-asynchronously.patch - -patchset-pf/smb/0001-smb-client-add-NULL-check-in-automount_fullpath.patch -patchset-pf/smb/0002-cifs-reset-connections-for-all-channels-when-reconne.patch -patchset-pf/smb/0003-cifs-update-dstaddr-whenever-channel-iface-is-update.patch -patchset-pf/smb/0004-cifs-dns-resolution-is-needed-only-for-primary-chann.patch -patchset-pf/smb/0005-cifs-deal-with-the-channel-loading-lag-while-picking.patch -patchset-pf/smb/0006-cifs-serialize-other-channels-when-query-server-inte.patch -patchset-pf/smb/0007-cifs-do-not-disable-interface-polling-on-failure.patch -patchset-pf/smb/0008-smb-improve-directory-cache-reuse-for-readdir-operat.patch -patchset-pf/smb/0009-ksmbd-fix-null-pointer-dereference-in-destroy_previo.patch - patchset-xanmod/binder/0001-binder-turn-into-module.patch patchset-xanmod/clearlinux/0001-sched-wait-Do-accept-in-LIFO-order-for-cache-efficie.patch @@ -241,50 +220,16 @@ patchset-zen/sauce/0020-ZEN-INTERACTIVE-dm-crypt-Disable-workqueues-for-cryp.pat patchset-zen/sauce/0021-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch patchset-zen/sauce/0022-ZEN-INTERACTIVE-Document-PDS-BMQ-configuration.patch -patchset-pf/fixes/0001-mm-fix-ratelimit_pages-update-error-in-dirty_ratio_h.patch -patchset-pf/fixes/0002-vgacon-Add-check-for-vc_origin-address-range-in-vgac.patch -patchset-pf/fixes/0003-fbdev-Fix-do_register_framebuffer-to-prevent-null-pt.patch -patchset-pf/fixes/0004-fbdev-Fix-fb_set_var-to-prevent-null-ptr-deref-in-fb.patch -patchset-pf/fixes/0005-anon_inode-use-a-proper-mode-internally.patch -patchset-pf/fixes/0006-anon_inode-explicitly-block-setattr.patch -patchset-pf/fixes/0007-anon_inode-raise-SB_I_NODEV-and-SB_I_NOEXEC.patch -patchset-pf/fixes/0008-fs-add-S_ANON_INODE.patch -patchset-pf/fixes/0009-configfs-Do-not-override-creating-attribute-file-fai.patch -patchset-pf/fixes/0010-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch -patchset-pf/fixes/0011-mm-filemap-unify-read-write-dropbehind-naming.patch -patchset-pf/fixes/0012-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch -patchset-pf/fixes/0013-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch -patchset-pf/fixes/0014-mm-add-folio_expected_ref_count-for-reference-count-.patch -patchset-pf/fixes/0015-mm-fix-uprobe-pte-be-overwritten-when-expanding-vma.patch -patchset-pf/fixes/0016-mm-hugetlb-unshare-page-tables-during-VMA-split-not-.patch -patchset-pf/fixes/0017-mm-hugetlb-fix-huge_pmd_unshare-vs-GUP-fast-race.patch -patchset-pf/fixes/0018-mm-madvise-handle-madvise_lock-failure-during-race-u.patch -patchset-pf/fixes/0019-video-screen_info-Relocate-framebuffers-behind-PCI-b.patch -patchset-pf/fixes/0020-sysfb-Fix-screen_info-type-check-for-VGA.patch -patchset-pf/fixes/0021-watchdog-fix-watchdog-may-detect-false-positive-of-s.patch -patchset-pf/fixes/0022-sched-rt-Fix-race-in-push_rt_task.patch -patchset-pf/fixes/0023-sched-fair-Adhere-to-place_entity-constraints.patch -patchset-pf/fixes/0024-alloc_tag-handle-module-codetag-load-errors-as-modul.patch -patchset-pf/fixes/0025-svcrdma-Unregister-the-device-if-svc_rdma_accept-fai.patch -patchset-pf/fixes/0026-SUNRPC-Prevent-hang-on-NFS-mount-with-xprtsec-m-tls.patch -patchset-pf/fixes/0027-hv_netvsc-fix-potential-deadlock-in-netvsc_vf_setxdp.patch -patchset-pf/fixes/0028-net-clear-the-dst-when-changing-skb-protocol.patch -patchset-pf/fixes/0029-net_sched-sch_sfq-reject-invalid-perturb-period.patch -patchset-pf/fixes/0030-mm-vma-reset-VMA-iterator-on-commit_merge-OOM-failur.patch -patchset-pf/fixes/0031-mm-close-theoretical-race-where-stale-TLB-entries-co.patch -patchset-pf/fixes/0032-io_uring-kbuf-don-t-truncate-end-buffer-for-multiple.patch -patchset-pf/fixes/0033-nvme-always-punt-polled-uring_cmd-end_io-work-to-tas.patch -patchset-pf/fixes/0034-block-Clear-BIO_EMULATES_ZONE_APPEND-flag-on-BIO-com.patch -patchset-pf/fixes/0035-block-use-plug-request-list-tail-for-one-shot-backme.patch -patchset-pf/fixes/0036-Revert-mm-execmem-Unify-early-execmem_cache-behaviou.patch -patchset-pf/fixes/0037-x86-virt-tdx-Avoid-indirect-calls-to-TDX-assembly-fu.patch -patchset-pf/fixes/0038-x86-mm-pat-don-t-collapse-pages-without-PSE-set.patch -patchset-pf/fixes/0039-x86-Kconfig-only-enable-ROX-cache-in-execmem-when-ST.patch -patchset-pf/fixes/0040-x86-its-move-its_pages-array-to-struct-mod_arch_spec.patch -patchset-pf/fixes/0041-x86-its-explicitly-manage-permissions-for-ITS-pages.patch -patchset-pf/fixes/0042-KVM-SVM-Clear-current_vmcb-during-vCPU-free-for-all-.patch -patchset-pf/fixes/0043-KVM-VMX-Flush-shadow-VMCS-on-emergency-reboot.patch +patchset-pf/fixes/0001-Revert-Disable-FOP_DONTCACHE-for-now-due-to-bugs.patch +patchset-pf/fixes/0002-mm-filemap-unify-read-write-dropbehind-naming.patch +patchset-pf/fixes/0003-mm-filemap-unify-dropbehind-flag-testing-and-clearin.patch +patchset-pf/fixes/0004-mm-khugepaged-fix-race-with-folio-split-free-using-t.patch +patchset-pf/fixes/0005-mm-add-folio_expected_ref_count-for-reference-count-.patch +patchset-pf/fixes/0006-drm-i915-snps_hdmi_pll-Fix-64-bit-divisor-truncation.patch +patchset-pf/fixes/0007-mm-shmem-swap-fix-softlockup-with-mTHP-swapin.patch +patchset-pf/fixes/0008-mm-gup-revert-mm-gup-fix-infinite-loop-within-__get_.patch +patchset-pf/fixes/0009-mm-userfaultfd-fix-race-of-userfaultfd_move-and-swap.patch +patchset-pf/fixes/0010-dm-raid-fix-variable-in-journal-device-check.patch patchset-zen/fixes/0001-drivers-firmware-skip-simpledrm-if-nvidia-drm.modese.patch patchset-zen/fixes/0002-x86-cpu-Help-users-notice-when-running-old-Intel-mic.patch -patchset-zen/fixes/0003-drm-i915-snps_hdmi_pll-Fix-64-bit-divisor-truncation.patch diff --git a/debian/rules.real b/debian/rules.real index 76bc9b3..8dd9c95 100644 --- a/debian/rules.real +++ b/debian/rules.real @@ -225,7 +225,7 @@ define dh_binary_post dh_lintian dh_icons dh_link - dh_compress + dh_compress $(DH_COMPRESS_ARGS) dh_fixperms dh_missing dh_strip $(DH_STRIP_ARGS) -Xvmlinux -Xvmlinuz @@ -491,6 +491,7 @@ binary_perf build_perf: export _PYTHON_SYSCONFIGDATA_NAME = _sysconfigdata__$(DE build_perf: $(STAMPS_DIR)/build-tools-headers $(call make-tools,tools/perf) +binary_perf: DH_COMPRESS_ARGS = -Xtips.txt binary_perf: DH_SHLIBDEPS_ARGS = -Xperf-read-vdso binary_perf: build_perf $(dh_binary_pre)