release 6.16.5
This commit is contained in:
7
debian/changelog
vendored
7
debian/changelog
vendored
@@ -1,3 +1,10 @@
|
||||
linux (6.16.5-1) sid; urgency=medium
|
||||
|
||||
* New upstream stable update:
|
||||
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.16.5
|
||||
|
||||
-- Konstantin Demin <rockdrilla@gmail.com> Thu, 04 Sep 2025 18:03:41 +0300
|
||||
|
||||
linux (6.16.4-1) sid; urgency=medium
|
||||
|
||||
* New upstream stable update:
|
||||
|
3
debian/config/amd64/config.cloud
vendored
3
debian/config/amd64/config.cloud
vendored
@@ -2114,7 +2114,7 @@ CONFIG_MAX_SKB_FRAGS=19
|
||||
##
|
||||
## file: net/9p/Kconfig
|
||||
##
|
||||
# CONFIG_NET_9P is not set
|
||||
CONFIG_NET_9P_RDMA=m
|
||||
|
||||
##
|
||||
## file: net/appletalk/Kconfig
|
||||
@@ -2376,7 +2376,6 @@ CONFIG_LZ4_COMPRESS=m
|
||||
CONFIG_MFD_CORE=m
|
||||
CONFIG_ND_BTT=m
|
||||
CONFIG_ND_PFN=m
|
||||
CONFIG_NETFS_SUPPORT=m
|
||||
# CONFIG_NVMEM_LAYOUTS is not set
|
||||
CONFIG_PLDMFW=y
|
||||
CONFIG_PREEMPT_NONE_BUILD=y
|
||||
|
14
debian/config/amd64/config.mobile
vendored
14
debian/config/amd64/config.mobile
vendored
@@ -7089,14 +7089,6 @@ CONFIG_XEN_SYMS=y
|
||||
##
|
||||
CONFIG_VALIDATE_FS_PARSER=y
|
||||
|
||||
##
|
||||
## file: fs/9p/Kconfig
|
||||
##
|
||||
CONFIG_9P_FS=m
|
||||
CONFIG_9P_FSCACHE=y
|
||||
CONFIG_9P_FS_POSIX_ACL=y
|
||||
CONFIG_9P_FS_SECURITY=y
|
||||
|
||||
##
|
||||
## file: fs/adfs/Kconfig
|
||||
##
|
||||
@@ -7610,12 +7602,7 @@ CONFIG_6LOWPAN_GHC_EXT_HDR_ROUTE=m
|
||||
##
|
||||
## file: net/9p/Kconfig
|
||||
##
|
||||
CONFIG_NET_9P=m
|
||||
CONFIG_NET_9P_FD=m
|
||||
CONFIG_NET_9P_VIRTIO=m
|
||||
CONFIG_NET_9P_XEN=m
|
||||
CONFIG_NET_9P_USBG=m
|
||||
# CONFIG_NET_9P_DEBUG is not set
|
||||
|
||||
##
|
||||
## file: net/appletalk/Kconfig
|
||||
@@ -8960,7 +8947,6 @@ CONFIG_MTK_NET_PHYLIB=m
|
||||
CONFIG_MULTIPLEXER=m
|
||||
CONFIG_ND_BTT=y
|
||||
CONFIG_ND_PFN=y
|
||||
CONFIG_NETFS_SUPPORT=m
|
||||
CONFIG_NET_CLS=y
|
||||
CONFIG_NFC_MICROREAD=m
|
||||
CONFIG_NFC_MRVL=m
|
||||
|
14
debian/config/amd64/config.vm
vendored
14
debian/config/amd64/config.vm
vendored
@@ -3268,14 +3268,6 @@ CONFIG_SOFT_WATCHDOG_PRETIMEOUT=y
|
||||
##
|
||||
# CONFIG_VALIDATE_FS_PARSER is not set
|
||||
|
||||
##
|
||||
## file: fs/9p/Kconfig
|
||||
##
|
||||
CONFIG_9P_FS=y
|
||||
CONFIG_9P_FSCACHE=y
|
||||
CONFIG_9P_FS_POSIX_ACL=y
|
||||
CONFIG_9P_FS_SECURITY=y
|
||||
|
||||
##
|
||||
## file: fs/adfs/Kconfig
|
||||
##
|
||||
@@ -3662,12 +3654,7 @@ CONFIG_NET_PKTGEN=m
|
||||
##
|
||||
## file: net/9p/Kconfig
|
||||
##
|
||||
CONFIG_NET_9P=y
|
||||
CONFIG_NET_9P_FD=y
|
||||
CONFIG_NET_9P_VIRTIO=y
|
||||
CONFIG_NET_9P_XEN=y
|
||||
CONFIG_NET_9P_RDMA=m
|
||||
# CONFIG_NET_9P_DEBUG is not set
|
||||
|
||||
##
|
||||
## file: net/appletalk/Kconfig
|
||||
@@ -4008,7 +3995,6 @@ CONFIG_MFD_CORE=m
|
||||
CONFIG_MOUSE_PS2_SMBUS=y
|
||||
CONFIG_ND_BTT=m
|
||||
CONFIG_ND_PFN=m
|
||||
CONFIG_NETFS_SUPPORT=y
|
||||
CONFIG_NET_CLS=y
|
||||
CONFIG_NLS_UCS2_UTILS=m
|
||||
# CONFIG_NVMEM_LAYOUTS is not set
|
||||
|
29
debian/config/config
vendored
29
debian/config/config
vendored
@@ -169,6 +169,7 @@ CONFIG_GENERIC_CPU=y
|
||||
# CONFIG_MRAPTORLAKE is not set
|
||||
# CONFIG_MMETEORLAKE is not set
|
||||
# CONFIG_MEMERALDRAPIDS is not set
|
||||
# CONFIG_MDIAMONDRAPIDS is not set
|
||||
## end choice
|
||||
# CONFIG_PROCESSOR_SELECT is not set
|
||||
CONFIG_CPU_SUP_INTEL=y
|
||||
@@ -1805,6 +1806,14 @@ CONFIG_BINFMT_SCRIPT=y
|
||||
CONFIG_BINFMT_MISC=m
|
||||
CONFIG_COREDUMP=y
|
||||
|
||||
##
|
||||
## file: fs/9p/Kconfig
|
||||
##
|
||||
CONFIG_9P_FS=y
|
||||
CONFIG_9P_FSCACHE=y
|
||||
CONFIG_9P_FS_POSIX_ACL=y
|
||||
CONFIG_9P_FS_SECURITY=y
|
||||
|
||||
##
|
||||
## file: fs/afs/Kconfig
|
||||
##
|
||||
@@ -2725,6 +2734,15 @@ CONFIG_VLAN_8021Q=m
|
||||
# CONFIG_VLAN_8021Q_GVRP is not set
|
||||
# CONFIG_VLAN_8021Q_MVRP is not set
|
||||
|
||||
##
|
||||
## file: net/9p/Kconfig
|
||||
##
|
||||
CONFIG_NET_9P=y
|
||||
CONFIG_NET_9P_FD=y
|
||||
CONFIG_NET_9P_VIRTIO=y
|
||||
CONFIG_NET_9P_XEN=y
|
||||
# CONFIG_NET_9P_DEBUG is not set
|
||||
|
||||
##
|
||||
## file: net/batman-adv/Kconfig
|
||||
##
|
||||
@@ -3251,11 +3269,11 @@ CONFIG_UNIX_DIAG=y
|
||||
##
|
||||
## file: net/vmw_vsock/Kconfig
|
||||
##
|
||||
CONFIG_VSOCKETS=m
|
||||
CONFIG_VSOCKETS_DIAG=m
|
||||
CONFIG_VSOCKETS_LOOPBACK=m
|
||||
CONFIG_VSOCKETS=y
|
||||
CONFIG_VSOCKETS_DIAG=y
|
||||
CONFIG_VSOCKETS_LOOPBACK=y
|
||||
CONFIG_VMWARE_VMCI_VSOCKETS=m
|
||||
CONFIG_VIRTIO_VSOCKETS=m
|
||||
CONFIG_VIRTIO_VSOCKETS=y
|
||||
CONFIG_HYPERV_VSOCKETS=m
|
||||
|
||||
##
|
||||
@@ -4121,6 +4139,7 @@ CONFIG_NETFILTER_FAMILY_BRIDGE=y
|
||||
CONFIG_NETFILTER_NETLINK=y
|
||||
CONFIG_NETFILTER_SKIP_EGRESS=y
|
||||
CONFIG_NETFILTER_SYNPROXY=m
|
||||
CONFIG_NETFS_SUPPORT=y
|
||||
CONFIG_NETPOLL=y
|
||||
CONFIG_NET_CRC32C=y
|
||||
CONFIG_NET_DEVLINK=y
|
||||
@@ -4342,7 +4361,7 @@ CONFIG_VIRTIO_ANCHOR=y
|
||||
CONFIG_VIRTIO_PCI_ADMIN_LEGACY=y
|
||||
CONFIG_VIRTIO_PCI_LIB=y
|
||||
CONFIG_VIRTIO_PCI_LIB_LEGACY=y
|
||||
CONFIG_VIRTIO_VSOCKETS_COMMON=m
|
||||
CONFIG_VIRTIO_VSOCKETS_COMMON=y
|
||||
CONFIG_VIRT_CPU_ACCOUNTING=y
|
||||
CONFIG_VMCORE_INFO=y
|
||||
CONFIG_VT_CONSOLE_SLEEP=y
|
||||
|
2
debian/config/defines.toml
vendored
2
debian/config/defines.toml
vendored
@@ -11,7 +11,7 @@ name = 'x86'
|
||||
name = 'none'
|
||||
|
||||
[build]
|
||||
compiler = 'gcc-14'
|
||||
compiler = 'gcc-15'
|
||||
|
||||
[relations.image]
|
||||
depends = [
|
||||
|
@@ -1,44 +0,0 @@
|
||||
From: Oscar Maes <oscmaes92@gmail.com>
|
||||
Date: Tue, 26 Aug 2025 14:17:49 +0200
|
||||
Subject: net: ipv4: fix regression in local-broadcast routes
|
||||
Origin: https://lore.kernel.org/regressions/20250826121750.8451-1-oscmaes92@gmail.com/
|
||||
|
||||
Commit 9e30ecf23b1b ("net: ipv4: fix incorrect MTU in broadcast routes")
|
||||
introduced a regression where local-broadcast packets would have their
|
||||
gateway set in __mkroute_output, which was caused by fi = NULL being
|
||||
removed.
|
||||
|
||||
Fix this by resetting the fib_info for local-broadcast packets. This
|
||||
preserves the intended changes for directed-broadcast packets.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Fixes: 9e30ecf23b1b ("net: ipv4: fix incorrect MTU in broadcast routes")
|
||||
Reported-by: Brett A C Sheffield <bacs@librecast.net>
|
||||
Closes: https://lore.kernel.org/regressions/20250822165231.4353-4-bacs@librecast.net
|
||||
Signed-off-by: Oscar Maes <oscmaes92@gmail.com>
|
||||
---
|
||||
net/ipv4/route.c | 10 +++++++---
|
||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/net/ipv4/route.c
|
||||
+++ b/net/ipv4/route.c
|
||||
@@ -2573,12 +2573,16 @@ static struct rtable *__mkroute_output(c
|
||||
!netif_is_l3_master(dev_out))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
- if (ipv4_is_lbcast(fl4->daddr))
|
||||
+ if (ipv4_is_lbcast(fl4->daddr)) {
|
||||
type = RTN_BROADCAST;
|
||||
- else if (ipv4_is_multicast(fl4->daddr))
|
||||
+
|
||||
+ /* reset fi to prevent gateway resolution */
|
||||
+ fi = NULL;
|
||||
+ } else if (ipv4_is_multicast(fl4->daddr)) {
|
||||
type = RTN_MULTICAST;
|
||||
- else if (ipv4_is_zeronet(fl4->daddr))
|
||||
+ } else if (ipv4_is_zeronet(fl4->daddr)) {
|
||||
return ERR_PTR(-EINVAL);
|
||||
+ }
|
||||
|
||||
if (dev_out->flags & IFF_LOOPBACK)
|
||||
flags |= RTCF_LOCAL;
|
@@ -37,7 +37,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
{
|
||||
--- a/net/rose/af_rose.c
|
||||
+++ b/net/rose/af_rose.c
|
||||
@@ -1638,7 +1638,7 @@ MODULE_PARM_DESC(rose_ndevs, "number of
|
||||
@@ -1639,7 +1639,7 @@ MODULE_PARM_DESC(rose_ndevs, "number of
|
||||
MODULE_AUTHOR("Jonathan Naylor G4KLX <g4klx@g4klx.demon.co.uk>");
|
||||
MODULE_DESCRIPTION("The amateur radio ROSE network layer protocol");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@@ -50,7 +50,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
|
||||
/* Minimum for 512 kiB + 1 user control page. 'free' kiB per user. */
|
||||
static int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
|
||||
@@ -13376,6 +13381,9 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
@@ -13382,6 +13387,9 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
@@ -399,7 +399,7 @@ Signed-off-by: Steve French <stfrench@microsoft.com>
|
||||
.close = cifs_close_file,
|
||||
--- a/fs/smb/client/smb2inode.c
|
||||
+++ b/fs/smb/client/smb2inode.c
|
||||
@@ -1320,7 +1320,7 @@ smb2_set_file_info(struct inode *inode,
|
||||
@@ -1323,7 +1323,7 @@ smb2_set_file_info(struct inode *inode,
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@@ -80,7 +80,7 @@ Signed-off-by: Steve French <stfrench@microsoft.com>
|
||||
* storing reparse points, which is available since
|
||||
--- a/fs/smb/client/smb2inode.c
|
||||
+++ b/fs/smb/client/smb2inode.c
|
||||
@@ -1345,9 +1345,8 @@ struct inode *smb2_create_reparse_inode(
|
||||
@@ -1348,9 +1348,8 @@ struct inode *smb2_create_reparse_inode(
|
||||
* attempt to create reparse point. This will prevent creating unusable
|
||||
* empty object on the server.
|
||||
*/
|
||||
|
@@ -0,0 +1,32 @@
|
||||
From 3bcf08cb1e242f8407315c5dea83a79340b1146a Mon Sep 17 00:00:00 2001
|
||||
From: Cryolitia PukNgae <cryolitia@uniontech.com>
|
||||
Date: Fri, 22 Aug 2025 20:58:08 +0800
|
||||
Subject: ALSA: usb-audio: Add mute TLV for playback volumes on some devices
|
||||
|
||||
Applying the quirk of that, the lowest Playback mixer volume setting
|
||||
mutes the audio output, on more devices.
|
||||
|
||||
Link: https://gitlab.freedesktop.org/pipewire/pipewire/-/merge_requests/2514
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Tested-by: Guoli An <anguoli@uniontech.com>
|
||||
Signed-off-by: Cryolitia PukNgae <cryolitia@uniontech.com>
|
||||
Link: https://patch.msgid.link/20250822-mixer-quirk-v1-1-b19252239c1c@uniontech.com
|
||||
Signed-off-by: Takashi Iwai <tiwai@suse.de>
|
||||
---
|
||||
sound/usb/mixer_quirks.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/sound/usb/mixer_quirks.c
|
||||
+++ b/sound/usb/mixer_quirks.c
|
||||
@@ -4339,9 +4339,11 @@ void snd_usb_mixer_fu_apply_quirk(struct
|
||||
snd_dragonfly_quirk_db_scale(mixer, cval, kctl);
|
||||
break;
|
||||
/* lowest playback value is muted on some devices */
|
||||
+ case USB_ID(0x0572, 0x1b09): /* Conexant Systems (Rockwell), Inc. */
|
||||
case USB_ID(0x0d8c, 0x000c): /* C-Media */
|
||||
case USB_ID(0x0d8c, 0x0014): /* C-Media */
|
||||
case USB_ID(0x19f7, 0x0003): /* RODE NT-USB */
|
||||
+ case USB_ID(0x2d99, 0x0026): /* HECATE G2 GAMING HEADSET */
|
||||
if (strstr(kctl->id.name, "Playback"))
|
||||
cval->min_mute = 1;
|
||||
break;
|
@@ -1,74 +0,0 @@
|
||||
From 7a6182b5469b0c09373c8c02517c75305a899291 Mon Sep 17 00:00:00 2001
|
||||
From: Nikolay Kuratov <kniv@yandex-team.ru>
|
||||
Date: Tue, 5 Aug 2025 16:09:17 +0300
|
||||
Subject: vhost/net: Protect ubufs with rcu read lock in vhost_net_ubuf_put()
|
||||
|
||||
When operating on struct vhost_net_ubuf_ref, the following execution
|
||||
sequence is theoretically possible:
|
||||
CPU0 is finalizing DMA operation CPU1 is doing VHOST_NET_SET_BACKEND
|
||||
// ubufs->refcount == 2
|
||||
vhost_net_ubuf_put() vhost_net_ubuf_put_wait_and_free(oldubufs)
|
||||
vhost_net_ubuf_put_and_wait()
|
||||
vhost_net_ubuf_put()
|
||||
int r = atomic_sub_return(1, &ubufs->refcount);
|
||||
// r = 1
|
||||
int r = atomic_sub_return(1, &ubufs->refcount);
|
||||
// r = 0
|
||||
wait_event(ubufs->wait, !atomic_read(&ubufs->refcount));
|
||||
// no wait occurs here because condition is already true
|
||||
kfree(ubufs);
|
||||
if (unlikely(!r))
|
||||
wake_up(&ubufs->wait); // use-after-free
|
||||
|
||||
This leads to use-after-free on ubufs access. This happens because CPU1
|
||||
skips waiting for wake_up() when refcount is already zero.
|
||||
|
||||
To prevent that use a read-side RCU critical section in vhost_net_ubuf_put(),
|
||||
as suggested by Hillf Danton. For this lock to take effect, free ubufs with
|
||||
kfree_rcu().
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Fixes: 0ad8b480d6ee9 ("vhost: fix ref cnt checking deadlock")
|
||||
Reported-by: Andrey Ryabinin <arbn@yandex-team.com>
|
||||
Suggested-by: Hillf Danton <hdanton@sina.com>
|
||||
Signed-off-by: Nikolay Kuratov <kniv@yandex-team.ru>
|
||||
Message-Id: <20250805130917.727332-1-kniv@yandex-team.ru>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
---
|
||||
drivers/vhost/net.c | 9 +++++++--
|
||||
1 file changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/vhost/net.c
|
||||
+++ b/drivers/vhost/net.c
|
||||
@@ -96,6 +96,7 @@ struct vhost_net_ubuf_ref {
|
||||
atomic_t refcount;
|
||||
wait_queue_head_t wait;
|
||||
struct vhost_virtqueue *vq;
|
||||
+ struct rcu_head rcu;
|
||||
};
|
||||
|
||||
#define VHOST_NET_BATCH 64
|
||||
@@ -247,9 +248,13 @@ vhost_net_ubuf_alloc(struct vhost_virtqu
|
||||
|
||||
static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs)
|
||||
{
|
||||
- int r = atomic_sub_return(1, &ubufs->refcount);
|
||||
+ int r;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ r = atomic_sub_return(1, &ubufs->refcount);
|
||||
if (unlikely(!r))
|
||||
wake_up(&ubufs->wait);
|
||||
+ rcu_read_unlock();
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -262,7 +267,7 @@ static void vhost_net_ubuf_put_and_wait(
|
||||
static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs)
|
||||
{
|
||||
vhost_net_ubuf_put_and_wait(ubufs);
|
||||
- kfree(ubufs);
|
||||
+ kfree_rcu(ubufs, rcu);
|
||||
}
|
||||
|
||||
static void vhost_net_clear_ubuf_info(struct vhost_net *n)
|
@@ -0,0 +1,27 @@
|
||||
From c97fe55036b080fd5342059e2ba8d6fc7a9157f0 Mon Sep 17 00:00:00 2001
|
||||
From: Aaron Erhardt <aer@tuxedocomputers.com>
|
||||
Date: Tue, 26 Aug 2025 16:10:54 +0200
|
||||
Subject: ALSA: hda/realtek: Fix headset mic for TongFang X6[AF]R5xxY
|
||||
|
||||
Add a PCI quirk to enable microphone detection on the headphone jack of
|
||||
TongFang X6AR5xxY and X6FR5xxY devices.
|
||||
|
||||
Signed-off-by: Aaron Erhardt <aer@tuxedocomputers.com>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Link: https://patch.msgid.link/20250826141054.1201482-1-aer@tuxedocomputers.com
|
||||
Signed-off-by: Takashi Iwai <tiwai@suse.de>
|
||||
---
|
||||
sound/pci/hda/patch_realtek.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/sound/pci/hda/patch_realtek.c
|
||||
+++ b/sound/pci/hda/patch_realtek.c
|
||||
@@ -11427,6 +11427,8 @@ static const struct hda_quirk alc269_fix
|
||||
SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
|
||||
SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
|
||||
SND_PCI_QUIRK(0x1d05, 0x1409, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
|
||||
+ SND_PCI_QUIRK(0x1d05, 0x300f, "TongFang X6AR5xxY", ALC2XX_FIXUP_HEADSET_MIC),
|
||||
+ SND_PCI_QUIRK(0x1d05, 0x3019, "TongFang X6FR5xxY", ALC2XX_FIXUP_HEADSET_MIC),
|
||||
SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
|
||||
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
|
||||
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
|
128
debian/patches/patchset-pf/steady/0010-of_numa-fix-uninitialized-memory-nodes-causing-kerne.patch
vendored
Normal file
128
debian/patches/patchset-pf/steady/0010-of_numa-fix-uninitialized-memory-nodes-causing-kerne.patch
vendored
Normal file
@@ -0,0 +1,128 @@
|
||||
From 488f95db8bbdc251cae8548294a09832840c9333 Mon Sep 17 00:00:00 2001
|
||||
From: Yin Tirui <yintirui@huawei.com>
|
||||
Date: Tue, 19 Aug 2025 15:55:10 +0800
|
||||
Subject: of_numa: fix uninitialized memory nodes causing kernel panic
|
||||
|
||||
When there are memory-only nodes (nodes without CPUs), these nodes are not
|
||||
properly initialized, causing kernel panic during boot.
|
||||
|
||||
of_numa_init
|
||||
of_numa_parse_cpu_nodes
|
||||
node_set(nid, numa_nodes_parsed);
|
||||
of_numa_parse_memory_nodes
|
||||
|
||||
In of_numa_parse_cpu_nodes, numa_nodes_parsed gets updated only for nodes
|
||||
containing CPUs. Memory-only nodes should have been updated in
|
||||
of_numa_parse_memory_nodes, but they weren't.
|
||||
|
||||
Subsequently, when free_area_init() attempts to access NODE_DATA() for
|
||||
these uninitialized memory nodes, the kernel panics due to NULL pointer
|
||||
dereference.
|
||||
|
||||
This can be reproduced on ARM64 QEMU with 1 CPU and 2 memory nodes:
|
||||
|
||||
qemu-system-aarch64 \
|
||||
-cpu host -nographic \
|
||||
-m 4G -smp 1 \
|
||||
-machine virt,accel=kvm,gic-version=3,iommu=smmuv3 \
|
||||
-object memory-backend-ram,size=2G,id=mem0 \
|
||||
-object memory-backend-ram,size=2G,id=mem1 \
|
||||
-numa node,nodeid=0,memdev=mem0 \
|
||||
-numa node,nodeid=1,memdev=mem1 \
|
||||
-kernel $IMAGE \
|
||||
-hda $DISK \
|
||||
-append "console=ttyAMA0 root=/dev/vda rw earlycon"
|
||||
|
||||
[ 0.000000] Booting Linux on physical CPU 0x0000000000 [0x481fd010]
|
||||
[ 0.000000] Linux version 6.17.0-rc1-00001-gabb4b3daf18c-dirty (yintirui@local) (gcc (GCC) 12.3.1, GNU ld (GNU Binutils) 2.41) #52 SMP PREEMPT Mon Aug 18 09:49:40 CST 2025
|
||||
[ 0.000000] KASLR enabled
|
||||
[ 0.000000] random: crng init done
|
||||
[ 0.000000] Machine model: linux,dummy-virt
|
||||
[ 0.000000] efi: UEFI not found.
|
||||
[ 0.000000] earlycon: pl11 at MMIO 0x0000000009000000 (options '')
|
||||
[ 0.000000] printk: legacy bootconsole [pl11] enabled
|
||||
[ 0.000000] OF: reserved mem: Reserved memory: No reserved-memory node in the DT
|
||||
[ 0.000000] NODE_DATA(0) allocated [mem 0xbfffd9c0-0xbfffffff]
|
||||
[ 0.000000] node 1 must be removed before remove section 23
|
||||
[ 0.000000] Zone ranges:
|
||||
[ 0.000000] DMA [mem 0x0000000040000000-0x00000000ffffffff]
|
||||
[ 0.000000] DMA32 empty
|
||||
[ 0.000000] Normal [mem 0x0000000100000000-0x000000013fffffff]
|
||||
[ 0.000000] Movable zone start for each node
|
||||
[ 0.000000] Early memory node ranges
|
||||
[ 0.000000] node 0: [mem 0x0000000040000000-0x00000000bfffffff]
|
||||
[ 0.000000] node 1: [mem 0x00000000c0000000-0x000000013fffffff]
|
||||
[ 0.000000] Initmem setup node 0 [mem 0x0000000040000000-0x00000000bfffffff]
|
||||
[ 0.000000] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0
|
||||
[ 0.000000] Mem abort info:
|
||||
[ 0.000000] ESR = 0x0000000096000004
|
||||
[ 0.000000] EC = 0x25: DABT (current EL), IL = 32 bits
|
||||
[ 0.000000] SET = 0, FnV = 0
|
||||
[ 0.000000] EA = 0, S1PTW = 0
|
||||
[ 0.000000] FSC = 0x04: level 0 translation fault
|
||||
[ 0.000000] Data abort info:
|
||||
[ 0.000000] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
|
||||
[ 0.000000] CM = 0, WnR = 0, TnD = 0, TagAccess = 0
|
||||
[ 0.000000] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
|
||||
[ 0.000000] [00000000000000a0] user address but active_mm is swapper
|
||||
[ 0.000000] Internal error: Oops: 0000000096000004 [#1] SMP
|
||||
[ 0.000000] Modules linked in:
|
||||
[ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.17.0-rc1-00001-g760c6dabf762-dirty #54 PREEMPT
|
||||
[ 0.000000] Hardware name: linux,dummy-virt (DT)
|
||||
[ 0.000000] pstate: 800000c5 (Nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
|
||||
[ 0.000000] pc : free_area_init+0x50c/0xf9c
|
||||
[ 0.000000] lr : free_area_init+0x5c0/0xf9c
|
||||
[ 0.000000] sp : ffffa02ca0f33c00
|
||||
[ 0.000000] x29: ffffa02ca0f33cb0 x28: 0000000000000000 x27: 0000000000000000
|
||||
[ 0.000000] x26: 4ec4ec4ec4ec4ec5 x25: 00000000000c0000 x24: 00000000000c0000
|
||||
[ 0.000000] x23: 0000000000040000 x22: 0000000000000000 x21: ffffa02ca0f3b368
|
||||
[ 0.000000] x20: ffffa02ca14c7b98 x19: 0000000000000000 x18: 0000000000000002
|
||||
[ 0.000000] x17: 000000000000cacc x16: 0000000000000001 x15: 0000000000000001
|
||||
[ 0.000000] x14: 0000000080000000 x13: 0000000000000018 x12: 0000000000000002
|
||||
[ 0.000000] x11: ffffa02ca0fd4f00 x10: ffffa02ca14bab20 x9 : ffffa02ca14bab38
|
||||
[ 0.000000] x8 : 00000000000c0000 x7 : 0000000000000001 x6 : 0000000000000002
|
||||
[ 0.000000] x5 : 0000000140000000 x4 : ffffa02ca0f33c90 x3 : ffffa02ca0f33ca0
|
||||
[ 0.000000] x2 : ffffa02ca0f33c98 x1 : 0000000080000000 x0 : 0000000000000001
|
||||
[ 0.000000] Call trace:
|
||||
[ 0.000000] free_area_init+0x50c/0xf9c (P)
|
||||
[ 0.000000] bootmem_init+0x110/0x1dc
|
||||
[ 0.000000] setup_arch+0x278/0x60c
|
||||
[ 0.000000] start_kernel+0x70/0x748
|
||||
[ 0.000000] __primary_switched+0x88/0x90
|
||||
[ 0.000000] Code: d503201f b98093e0 52800016 f8607a93 (f9405260)
|
||||
[ 0.000000] ---[ end trace 0000000000000000 ]---
|
||||
[ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
|
||||
[ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250819075510.2079961-1-yintirui@huawei.com
|
||||
Fixes: 767507654c22 ("arch_numa: switch over to numa_memblks")
|
||||
Signed-off-by: Yin Tirui <yintirui@huawei.com>
|
||||
Acked-by: David Hildenbrand <david@redhat.com>
|
||||
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
|
||||
Cc: Chen Jun <chenjun102@huawei.com>
|
||||
Cc: Dan Williams <dan.j.williams@intel.com>
|
||||
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
|
||||
Cc: Rob Herring <robh@kernel.org>
|
||||
Cc: Saravana Kannan <saravanak@google.com>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
drivers/of/of_numa.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/of/of_numa.c
|
||||
+++ b/drivers/of/of_numa.c
|
||||
@@ -59,8 +59,11 @@ static int __init of_numa_parse_memory_n
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
- for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++)
|
||||
+ for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++) {
|
||||
r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
|
||||
+ if (!r)
|
||||
+ node_set(nid, numa_nodes_parsed);
|
||||
+ }
|
||||
|
||||
if (!i || r) {
|
||||
of_node_put(np);
|
@@ -0,0 +1,56 @@
|
||||
From d165b8446312b442d98a7072eebdbf98f30cdb11 Mon Sep 17 00:00:00 2001
|
||||
From: Sasha Levin <sashal@kernel.org>
|
||||
Date: Thu, 31 Jul 2025 10:44:31 -0400
|
||||
Subject: mm/userfaultfd: fix kmap_local LIFO ordering for CONFIG_HIGHPTE
|
||||
|
||||
With CONFIG_HIGHPTE on 32-bit ARM, move_pages_pte() maps PTE pages using
|
||||
kmap_local_page(), which requires unmapping in Last-In-First-Out order.
|
||||
|
||||
The current code maps dst_pte first, then src_pte, but unmaps them in the
|
||||
same order (dst_pte, src_pte), violating the LIFO requirement. This
|
||||
causes the warning in kunmap_local_indexed():
|
||||
|
||||
WARNING: CPU: 0 PID: 604 at mm/highmem.c:622 kunmap_local_indexed+0x178/0x17c
|
||||
addr \!= __fix_to_virt(FIX_KMAP_BEGIN + idx)
|
||||
|
||||
Fix this by reversing the unmap order to respect LIFO ordering.
|
||||
|
||||
This issue follows the same pattern as similar fixes:
|
||||
- commit eca6828403b8 ("crypto: skcipher - fix mismatch between mapping and unmapping order")
|
||||
- commit 8cf57c6df818 ("nilfs2: eliminate staggered calls to kunmap in nilfs_rename")
|
||||
|
||||
Both of which addressed the same fundamental requirement that kmap_local
|
||||
operations must follow LIFO ordering.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250731144431.773923-1-sashal@kernel.org
|
||||
Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI")
|
||||
Signed-off-by: Sasha Levin <sashal@kernel.org>
|
||||
Acked-by: David Hildenbrand <david@redhat.com>
|
||||
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
|
||||
Cc: Andrea Arcangeli <aarcange@redhat.com>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
mm/userfaultfd.c | 9 +++++++--
|
||||
1 file changed, 7 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/mm/userfaultfd.c
|
||||
+++ b/mm/userfaultfd.c
|
||||
@@ -1453,10 +1453,15 @@ out:
|
||||
folio_unlock(src_folio);
|
||||
folio_put(src_folio);
|
||||
}
|
||||
- if (dst_pte)
|
||||
- pte_unmap(dst_pte);
|
||||
+ /*
|
||||
+ * Unmap in reverse order (LIFO) to maintain proper kmap_local
|
||||
+ * index ordering when CONFIG_HIGHPTE is enabled. We mapped dst_pte
|
||||
+ * first, then src_pte, so we must unmap src_pte first, then dst_pte.
|
||||
+ */
|
||||
if (src_pte)
|
||||
pte_unmap(src_pte);
|
||||
+ if (dst_pte)
|
||||
+ pte_unmap(dst_pte);
|
||||
mmu_notifier_invalidate_range_end(&range);
|
||||
if (si)
|
||||
put_swap_device(si);
|
@@ -0,0 +1,45 @@
|
||||
From b0eaacb0b9b0412916e3ca0c769f8aab34d82725 Mon Sep 17 00:00:00 2001
|
||||
From: Quanmin Yan <yanquanmin1@huawei.com>
|
||||
Date: Thu, 21 Aug 2025 20:55:55 +0800
|
||||
Subject: mm/damon/core: prevent unnecessary overflow in
|
||||
damos_set_effective_quota()
|
||||
|
||||
On 32-bit systems, the throughput calculation in
|
||||
damos_set_effective_quota() is prone to unnecessary multiplication
|
||||
overflow. Using mult_frac() to fix it.
|
||||
|
||||
Andrew Paniakin also recently found and privately reported this issue, on
|
||||
64 bit systems. This can also happen on 64-bit systems, once the charged
|
||||
size exceeds ~17 TiB. On systems running for long time in production,
|
||||
this issue can actually happen.
|
||||
|
||||
More specifically, when a DAMOS scheme having the time quota run for
|
||||
longtime, throughput calculation can overflow and set esz too small. As a
|
||||
result, speed of the scheme get unexpectedly slow.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250821125555.3020951-1-yanquanmin1@huawei.com
|
||||
Fixes: 1cd243030059 ("mm/damon/schemes: implement time quota")
|
||||
Signed-off-by: Quanmin Yan <yanquanmin1@huawei.com>
|
||||
Reported-by: Andrew Paniakin <apanyaki@amazon.com>
|
||||
Reviewed-by: SeongJae Park <sj@kernel.org>
|
||||
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
|
||||
Cc: ze zuo <zuoze1@huawei.com>
|
||||
Cc: <stable@vger.kernel.org> [5.16+]
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
mm/damon/core.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/mm/damon/core.c
|
||||
+++ b/mm/damon/core.c
|
||||
@@ -2026,8 +2026,8 @@ static void damos_set_effective_quota(st
|
||||
|
||||
if (quota->ms) {
|
||||
if (quota->total_charged_ns)
|
||||
- throughput = quota->total_charged_sz * 1000000 /
|
||||
- quota->total_charged_ns;
|
||||
+ throughput = mult_frac(quota->total_charged_sz, 1000000,
|
||||
+ quota->total_charged_ns);
|
||||
else
|
||||
throughput = PAGE_SIZE * 1024;
|
||||
esz = min(throughput * quota->ms, esz);
|
104
debian/patches/patchset-pf/steady/0013-mm-fix-accounting-of-memmap-pages.patch
vendored
Normal file
104
debian/patches/patchset-pf/steady/0013-mm-fix-accounting-of-memmap-pages.patch
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
From 3254cafc371c874a98f74f571decff95ca77df76 Mon Sep 17 00:00:00 2001
|
||||
From: Sumanth Korikkar <sumanthk@linux.ibm.com>
|
||||
Date: Thu, 7 Aug 2025 20:35:45 +0200
|
||||
Subject: mm: fix accounting of memmap pages
|
||||
|
||||
For !CONFIG_SPARSEMEM_VMEMMAP, memmap page accounting is currently done
|
||||
upfront in sparse_buffer_init(). However, sparse_buffer_alloc() may
|
||||
return NULL in failure scenario.
|
||||
|
||||
Also, memmap pages may be allocated either from the memblock allocator
|
||||
during early boot or from the buddy allocator. When removed via
|
||||
arch_remove_memory(), accounting of memmap pages must reflect the original
|
||||
allocation source.
|
||||
|
||||
To ensure correctness:
|
||||
* Account memmap pages after successful allocation in sparse_init_nid()
|
||||
and section_activate().
|
||||
* Account memmap pages in section_deactivate() based on allocation
|
||||
source.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250807183545.1424509-1-sumanthk@linux.ibm.com
|
||||
Fixes: 15995a352474 ("mm: report per-page metadata information")
|
||||
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
|
||||
Suggested-by: David Hildenbrand <david@redhat.com>
|
||||
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
|
||||
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
|
||||
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
|
||||
Cc: Heiko Carstens <hca@linux.ibm.com>
|
||||
Cc: Vasily Gorbik <gor@linux.ibm.com>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
mm/sparse-vmemmap.c | 5 -----
|
||||
mm/sparse.c | 15 +++++++++------
|
||||
2 files changed, 9 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/mm/sparse-vmemmap.c
|
||||
+++ b/mm/sparse-vmemmap.c
|
||||
@@ -578,11 +578,6 @@ struct page * __meminit __populate_secti
|
||||
if (r < 0)
|
||||
return NULL;
|
||||
|
||||
- if (system_state == SYSTEM_BOOTING)
|
||||
- memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
|
||||
- else
|
||||
- memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
|
||||
-
|
||||
return pfn_to_page(pfn);
|
||||
}
|
||||
|
||||
--- a/mm/sparse.c
|
||||
+++ b/mm/sparse.c
|
||||
@@ -454,9 +454,6 @@ static void __init sparse_buffer_init(un
|
||||
*/
|
||||
sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true);
|
||||
sparsemap_buf_end = sparsemap_buf + size;
|
||||
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
|
||||
- memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE));
|
||||
-#endif
|
||||
}
|
||||
|
||||
static void __init sparse_buffer_fini(void)
|
||||
@@ -567,6 +564,8 @@ static void __init sparse_init_nid(int n
|
||||
sparse_buffer_fini();
|
||||
goto failed;
|
||||
}
|
||||
+ memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page),
|
||||
+ PAGE_SIZE));
|
||||
sparse_init_early_section(nid, map, pnum, 0);
|
||||
}
|
||||
}
|
||||
@@ -680,7 +679,6 @@ static void depopulate_section_memmap(un
|
||||
unsigned long start = (unsigned long) pfn_to_page(pfn);
|
||||
unsigned long end = start + nr_pages * sizeof(struct page);
|
||||
|
||||
- memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE)));
|
||||
vmemmap_free(start, end, altmap);
|
||||
}
|
||||
static void free_map_bootmem(struct page *memmap)
|
||||
@@ -856,10 +854,14 @@ static void section_deactivate(unsigned
|
||||
* The memmap of early sections is always fully populated. See
|
||||
* section_activate() and pfn_valid() .
|
||||
*/
|
||||
- if (!section_is_early)
|
||||
+ if (!section_is_early) {
|
||||
+ memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)));
|
||||
depopulate_section_memmap(pfn, nr_pages, altmap);
|
||||
- else if (memmap)
|
||||
+ } else if (memmap) {
|
||||
+ memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page),
|
||||
+ PAGE_SIZE)));
|
||||
free_map_bootmem(memmap);
|
||||
+ }
|
||||
|
||||
if (empty)
|
||||
ms->section_mem_map = (unsigned long)NULL;
|
||||
@@ -904,6 +906,7 @@ static struct page * __meminit section_a
|
||||
section_deactivate(pfn, nr_pages, altmap);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
+ memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE));
|
||||
|
||||
return memmap;
|
||||
}
|
209
debian/patches/patchset-pf/steady/0014-mm-move-page-table-sync-declarations-to-linux-pgtabl.patch
vendored
Normal file
209
debian/patches/patchset-pf/steady/0014-mm-move-page-table-sync-declarations-to-linux-pgtabl.patch
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
From adfa22dd89640042c0df2d8906781ea74da9166c Mon Sep 17 00:00:00 2001
|
||||
From: Harry Yoo <harry.yoo@oracle.com>
|
||||
Date: Mon, 18 Aug 2025 11:02:04 +0900
|
||||
Subject: mm: move page table sync declarations to linux/pgtable.h
|
||||
|
||||
During our internal testing, we started observing intermittent boot
|
||||
failures when the machine uses 4-level paging and has a large amount of
|
||||
persistent memory:
|
||||
|
||||
BUG: unable to handle page fault for address: ffffe70000000034
|
||||
#PF: supervisor write access in kernel mode
|
||||
#PF: error_code(0x0002) - not-present page
|
||||
PGD 0 P4D 0
|
||||
Oops: 0002 [#1] SMP NOPTI
|
||||
RIP: 0010:__init_single_page+0x9/0x6d
|
||||
Call Trace:
|
||||
<TASK>
|
||||
__init_zone_device_page+0x17/0x5d
|
||||
memmap_init_zone_device+0x154/0x1bb
|
||||
pagemap_range+0x2e0/0x40f
|
||||
memremap_pages+0x10b/0x2f0
|
||||
devm_memremap_pages+0x1e/0x60
|
||||
dev_dax_probe+0xce/0x2ec [device_dax]
|
||||
dax_bus_probe+0x6d/0xc9
|
||||
[... snip ...]
|
||||
</TASK>
|
||||
|
||||
It turns out that the kernel panics while initializing vmemmap (struct
|
||||
page array) when the vmemmap region spans two PGD entries, because the new
|
||||
PGD entry is only installed in init_mm.pgd, but not in the page tables of
|
||||
other tasks.
|
||||
|
||||
And looking at __populate_section_memmap():
|
||||
if (vmemmap_can_optimize(altmap, pgmap))
|
||||
// does not sync top level page tables
|
||||
r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
|
||||
else
|
||||
// sync top level page tables in x86
|
||||
r = vmemmap_populate(start, end, nid, altmap);
|
||||
|
||||
In the normal path, vmemmap_populate() in arch/x86/mm/init_64.c
|
||||
synchronizes the top level page table (See commit 9b861528a801 ("x86-64,
|
||||
mem: Update all PGDs for direct mapping and vmemmap mapping changes")) so
|
||||
that all tasks in the system can see the new vmemmap area.
|
||||
|
||||
However, when vmemmap_can_optimize() returns true, the optimized path
|
||||
skips synchronization of top-level page tables. This is because
|
||||
vmemmap_populate_compound_pages() is implemented in core MM code, which
|
||||
does not handle synchronization of the top-level page tables. Instead,
|
||||
the core MM has historically relied on each architecture to perform this
|
||||
synchronization manually.
|
||||
|
||||
We're not the first party to encounter a crash caused by not-sync'd top
|
||||
level page tables: earlier this year, Gwan-gyeong Mun attempted to address
|
||||
the issue [1] [2] after hitting a kernel panic when x86 code accessed the
|
||||
vmemmap area before the corresponding top-level entries were synced. At
|
||||
that time, the issue was believed to be triggered only when struct page
|
||||
was enlarged for debugging purposes, and the patch did not get further
|
||||
updates.
|
||||
|
||||
It turns out that current approach of relying on each arch to handle the
|
||||
page table sync manually is fragile because 1) it's easy to forget to sync
|
||||
the top level page table, and 2) it's also easy to overlook that the
|
||||
kernel should not access the vmemmap and direct mapping areas before the
|
||||
sync.
|
||||
|
||||
To address this, Dave Hansen suggested [3] [4] introducing
|
||||
{pgd,p4d}_populate_kernel() for updating kernel portion of the page tables
|
||||
and allow each architecture to explicitly perform synchronization when
|
||||
installing top-level entries. With this approach, we no longer need to
|
||||
worry about missing the sync step, reducing the risk of future
|
||||
regressions.
|
||||
|
||||
The new interface reuses existing ARCH_PAGE_TABLE_SYNC_MASK,
|
||||
PGTBL_P*D_MODIFIED and arch_sync_kernel_mappings() facility used by
|
||||
vmalloc and ioremap to synchronize page tables.
|
||||
|
||||
pgd_populate_kernel() looks like this:
|
||||
static inline void pgd_populate_kernel(unsigned long addr, pgd_t *pgd,
|
||||
p4d_t *p4d)
|
||||
{
|
||||
pgd_populate(&init_mm, pgd, p4d);
|
||||
if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED)
|
||||
arch_sync_kernel_mappings(addr, addr);
|
||||
}
|
||||
|
||||
It is worth noting that vmalloc() and apply_to_range() carefully
|
||||
synchronizes page tables by calling p*d_alloc_track() and
|
||||
arch_sync_kernel_mappings(), and thus they are not affected by this patch
|
||||
series.
|
||||
|
||||
This series was hugely inspired by Dave Hansen's suggestion and hence
|
||||
added Suggested-by: Dave Hansen.
|
||||
|
||||
Cc stable because lack of this series opens the door to intermittent
|
||||
boot failures.
|
||||
|
||||
This patch (of 3):
|
||||
|
||||
Move ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to
|
||||
linux/pgtable.h so that they can be used outside of vmalloc and ioremap.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250818020206.4517-1-harry.yoo@oracle.com
|
||||
Link: https://lkml.kernel.org/r/20250818020206.4517-2-harry.yoo@oracle.com
|
||||
Link: https://lore.kernel.org/linux-mm/20250220064105.808339-1-gwan-gyeong.mun@intel.com [1]
|
||||
Link: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@intel.com [2]
|
||||
Link: https://lore.kernel.org/linux-mm/d1da214c-53d3-45ac-a8b6-51821c5416e4@intel.com [3]
|
||||
Link: https://lore.kernel.org/linux-mm/4d800744-7b88-41aa-9979-b245e8bf794b@intel.com [4]
|
||||
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
|
||||
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
|
||||
Acked-by: Kiryl Shutsemau <kas@kernel.org>
|
||||
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Reviewed-by: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
|
||||
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||
Acked-by: David Hildenbrand <david@redhat.com>
|
||||
Cc: Alexander Potapenko <glider@google.com>
|
||||
Cc: Alistair Popple <apopple@nvidia.com>
|
||||
Cc: Andrey Konovalov <andreyknvl@gmail.com>
|
||||
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
|
||||
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
|
||||
Cc: Ard Biesheuvel <ardb@kernel.org>
|
||||
Cc: Arnd Bergmann <arnd@arndb.de>
|
||||
Cc: bibo mao <maobibo@loongson.cn>
|
||||
Cc: Borislav Betkov <bp@alien8.de>
|
||||
Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
|
||||
Cc: Dennis Zhou <dennis@kernel.org>
|
||||
Cc: Dev Jain <dev.jain@arm.com>
|
||||
Cc: Dmitriy Vyukov <dvyukov@google.com>
|
||||
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
|
||||
Cc: Ingo Molnar <mingo@redhat.com>
|
||||
Cc: Jane Chu <jane.chu@oracle.com>
|
||||
Cc: Joao Martins <joao.m.martins@oracle.com>
|
||||
Cc: Joerg Roedel <joro@8bytes.org>
|
||||
Cc: John Hubbard <jhubbard@nvidia.com>
|
||||
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
|
||||
Cc: Liam Howlett <liam.howlett@oracle.com>
|
||||
Cc: Michal Hocko <mhocko@suse.com>
|
||||
Cc: Oscar Salvador <osalvador@suse.de>
|
||||
Cc: Peter Xu <peterx@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
|
||||
Cc: Ryan Roberts <ryan.roberts@arm.com>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Cc: Tejun Heo <tj@kernel.org>
|
||||
Cc: Thomas Gleinxer <tglx@linutronix.de>
|
||||
Cc: Thomas Huth <thuth@redhat.com>
|
||||
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
include/linux/pgtable.h | 17 +++++++++++++++++
|
||||
include/linux/vmalloc.h | 16 ----------------
|
||||
2 files changed, 17 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/include/linux/pgtable.h
|
||||
+++ b/include/linux/pgtable.h
|
||||
@@ -1329,6 +1329,23 @@ static inline void ptep_modify_prot_comm
|
||||
__ptep_modify_prot_commit(vma, addr, ptep, pte);
|
||||
}
|
||||
#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
|
||||
+
|
||||
+/*
|
||||
+ * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
|
||||
+ * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
|
||||
+ * needs to be called.
|
||||
+ */
|
||||
+#ifndef ARCH_PAGE_TABLE_SYNC_MASK
|
||||
+#define ARCH_PAGE_TABLE_SYNC_MASK 0
|
||||
+#endif
|
||||
+
|
||||
+/*
|
||||
+ * There is no default implementation for arch_sync_kernel_mappings(). It is
|
||||
+ * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
|
||||
+ * is 0.
|
||||
+ */
|
||||
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
|
||||
+
|
||||
#endif /* CONFIG_MMU */
|
||||
|
||||
/*
|
||||
--- a/include/linux/vmalloc.h
|
||||
+++ b/include/linux/vmalloc.h
|
||||
@@ -220,22 +220,6 @@ int vmap_pages_range(unsigned long addr,
|
||||
struct page **pages, unsigned int page_shift);
|
||||
|
||||
/*
|
||||
- * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
|
||||
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
|
||||
- * needs to be called.
|
||||
- */
|
||||
-#ifndef ARCH_PAGE_TABLE_SYNC_MASK
|
||||
-#define ARCH_PAGE_TABLE_SYNC_MASK 0
|
||||
-#endif
|
||||
-
|
||||
-/*
|
||||
- * There is no default implementation for arch_sync_kernel_mappings(). It is
|
||||
- * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
|
||||
- * is 0.
|
||||
- */
|
||||
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
|
||||
-
|
||||
-/*
|
||||
* Lowlevel-APIs (not for driver use!)
|
||||
*/
|
||||
|
278
debian/patches/patchset-pf/steady/0015-mm-introduce-and-use-pgd-p4d-_populate_kernel.patch
vendored
Normal file
278
debian/patches/patchset-pf/steady/0015-mm-introduce-and-use-pgd-p4d-_populate_kernel.patch
vendored
Normal file
@@ -0,0 +1,278 @@
|
||||
From ed7c5f96ef6426a46c255706667dde67063110cb Mon Sep 17 00:00:00 2001
|
||||
From: Harry Yoo <harry.yoo@oracle.com>
|
||||
Date: Mon, 18 Aug 2025 11:02:05 +0900
|
||||
Subject: mm: introduce and use {pgd,p4d}_populate_kernel()
|
||||
|
||||
Introduce and use {pgd,p4d}_populate_kernel() in core MM code when
|
||||
populating PGD and P4D entries for the kernel address space. These
|
||||
helpers ensure proper synchronization of page tables when updating the
|
||||
kernel portion of top-level page tables.
|
||||
|
||||
Until now, the kernel has relied on each architecture to handle
|
||||
synchronization of top-level page tables in an ad-hoc manner. For
|
||||
example, see commit 9b861528a801 ("x86-64, mem: Update all PGDs for direct
|
||||
mapping and vmemmap mapping changes").
|
||||
|
||||
However, this approach has proven fragile for following reasons:
|
||||
|
||||
1) It is easy to forget to perform the necessary page table
|
||||
synchronization when introducing new changes.
|
||||
For instance, commit 4917f55b4ef9 ("mm/sparse-vmemmap: improve memory
|
||||
savings for compound devmaps") overlooked the need to synchronize
|
||||
page tables for the vmemmap area.
|
||||
|
||||
2) It is also easy to overlook that the vmemmap and direct mapping areas
|
||||
must not be accessed before explicit page table synchronization.
|
||||
For example, commit 8d400913c231 ("x86/vmemmap: handle unpopulated
|
||||
sub-pmd ranges")) caused crashes by accessing the vmemmap area
|
||||
before calling sync_global_pgds().
|
||||
|
||||
To address this, as suggested by Dave Hansen, introduce _kernel() variants
|
||||
of the page table population helpers, which invoke architecture-specific
|
||||
hooks to properly synchronize page tables. These are introduced in a new
|
||||
header file, include/linux/pgalloc.h, so they can be called from common
|
||||
code.
|
||||
|
||||
They reuse existing infrastructure for vmalloc and ioremap.
|
||||
Synchronization requirements are determined by ARCH_PAGE_TABLE_SYNC_MASK,
|
||||
and the actual synchronization is performed by
|
||||
arch_sync_kernel_mappings().
|
||||
|
||||
This change currently targets only x86_64, so only PGD and P4D level
|
||||
helpers are introduced. Currently, these helpers are no-ops since no
|
||||
architecture sets PGTBL_{PGD,P4D}_MODIFIED in ARCH_PAGE_TABLE_SYNC_MASK.
|
||||
|
||||
In theory, PUD and PMD level helpers can be added later if needed by other
|
||||
architectures. For now, 32-bit architectures (x86-32 and arm) only handle
|
||||
PGTBL_PMD_MODIFIED, so p*d_populate_kernel() will never affect them unless
|
||||
we introduce a PMD level helper.
|
||||
|
||||
[harry.yoo@oracle.com: fix KASAN build error due to p*d_populate_kernel()]
|
||||
Link: https://lkml.kernel.org/r/20250822020727.202749-1-harry.yoo@oracle.com
|
||||
Link: https://lkml.kernel.org/r/20250818020206.4517-3-harry.yoo@oracle.com
|
||||
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
|
||||
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
|
||||
Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Acked-by: Kiryl Shutsemau <kas@kernel.org>
|
||||
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||
Acked-by: David Hildenbrand <david@redhat.com>
|
||||
Cc: Alexander Potapenko <glider@google.com>
|
||||
Cc: Alistair Popple <apopple@nvidia.com>
|
||||
Cc: Andrey Konovalov <andreyknvl@gmail.com>
|
||||
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
|
||||
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
|
||||
Cc: Ard Biesheuvel <ardb@kernel.org>
|
||||
Cc: Arnd Bergmann <arnd@arndb.de>
|
||||
Cc: bibo mao <maobibo@loongson.cn>
|
||||
Cc: Borislav Betkov <bp@alien8.de>
|
||||
Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
|
||||
Cc: Dennis Zhou <dennis@kernel.org>
|
||||
Cc: Dev Jain <dev.jain@arm.com>
|
||||
Cc: Dmitriy Vyukov <dvyukov@google.com>
|
||||
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
|
||||
Cc: Ingo Molnar <mingo@redhat.com>
|
||||
Cc: Jane Chu <jane.chu@oracle.com>
|
||||
Cc: Joao Martins <joao.m.martins@oracle.com>
|
||||
Cc: Joerg Roedel <joro@8bytes.org>
|
||||
Cc: John Hubbard <jhubbard@nvidia.com>
|
||||
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
|
||||
Cc: Liam Howlett <liam.howlett@oracle.com>
|
||||
Cc: Michal Hocko <mhocko@suse.com>
|
||||
Cc: Oscar Salvador <osalvador@suse.de>
|
||||
Cc: Peter Xu <peterx@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
|
||||
Cc: Ryan Roberts <ryan.roberts@arm.com>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Cc: Tejun Heo <tj@kernel.org>
|
||||
Cc: Thomas Gleinxer <tglx@linutronix.de>
|
||||
Cc: Thomas Huth <thuth@redhat.com>
|
||||
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
|
||||
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
include/linux/pgalloc.h | 29 +++++++++++++++++++++++++++++
|
||||
include/linux/pgtable.h | 13 +++++++------
|
||||
mm/kasan/init.c | 12 ++++++------
|
||||
mm/percpu.c | 6 +++---
|
||||
mm/sparse-vmemmap.c | 6 +++---
|
||||
5 files changed, 48 insertions(+), 18 deletions(-)
|
||||
create mode 100644 include/linux/pgalloc.h
|
||||
|
||||
--- /dev/null
|
||||
+++ b/include/linux/pgalloc.h
|
||||
@@ -0,0 +1,29 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+#ifndef _LINUX_PGALLOC_H
|
||||
+#define _LINUX_PGALLOC_H
|
||||
+
|
||||
+#include <linux/pgtable.h>
|
||||
+#include <asm/pgalloc.h>
|
||||
+
|
||||
+/*
|
||||
+ * {pgd,p4d}_populate_kernel() are defined as macros to allow
|
||||
+ * compile-time optimization based on the configured page table levels.
|
||||
+ * Without this, linking may fail because callers (e.g., KASAN) may rely
|
||||
+ * on calls to these functions being optimized away when passing symbols
|
||||
+ * that exist only for certain page table levels.
|
||||
+ */
|
||||
+#define pgd_populate_kernel(addr, pgd, p4d) \
|
||||
+ do { \
|
||||
+ pgd_populate(&init_mm, pgd, p4d); \
|
||||
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_PGD_MODIFIED) \
|
||||
+ arch_sync_kernel_mappings(addr, addr); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define p4d_populate_kernel(addr, p4d, pud) \
|
||||
+ do { \
|
||||
+ p4d_populate(&init_mm, p4d, pud); \
|
||||
+ if (ARCH_PAGE_TABLE_SYNC_MASK & PGTBL_P4D_MODIFIED) \
|
||||
+ arch_sync_kernel_mappings(addr, addr); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#endif /* _LINUX_PGALLOC_H */
|
||||
--- a/include/linux/pgtable.h
|
||||
+++ b/include/linux/pgtable.h
|
||||
@@ -1332,8 +1332,8 @@ static inline void ptep_modify_prot_comm
|
||||
|
||||
/*
|
||||
* Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
|
||||
- * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
|
||||
- * needs to be called.
|
||||
+ * and let generic vmalloc, ioremap and page table update code know when
|
||||
+ * arch_sync_kernel_mappings() needs to be called.
|
||||
*/
|
||||
#ifndef ARCH_PAGE_TABLE_SYNC_MASK
|
||||
#define ARCH_PAGE_TABLE_SYNC_MASK 0
|
||||
@@ -1832,10 +1832,11 @@ static inline bool arch_has_pfn_modify_c
|
||||
/*
|
||||
* Page Table Modification bits for pgtbl_mod_mask.
|
||||
*
|
||||
- * These are used by the p?d_alloc_track*() set of functions an in the generic
|
||||
- * vmalloc/ioremap code to track at which page-table levels entries have been
|
||||
- * modified. Based on that the code can better decide when vmalloc and ioremap
|
||||
- * mapping changes need to be synchronized to other page-tables in the system.
|
||||
+ * These are used by the p?d_alloc_track*() and p*d_populate_kernel()
|
||||
+ * functions in the generic vmalloc, ioremap and page table update code
|
||||
+ * to track at which page-table levels entries have been modified.
|
||||
+ * Based on that the code can better decide when page table changes need
|
||||
+ * to be synchronized to other page-tables in the system.
|
||||
*/
|
||||
#define __PGTBL_PGD_MODIFIED 0
|
||||
#define __PGTBL_P4D_MODIFIED 1
|
||||
--- a/mm/kasan/init.c
|
||||
+++ b/mm/kasan/init.c
|
||||
@@ -13,9 +13,9 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/slab.h>
|
||||
+#include <linux/pgalloc.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
-#include <asm/pgalloc.h>
|
||||
|
||||
#include "kasan.h"
|
||||
|
||||
@@ -191,7 +191,7 @@ static int __ref zero_p4d_populate(pgd_t
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
- p4d_populate(&init_mm, p4d,
|
||||
+ p4d_populate_kernel(addr, p4d,
|
||||
lm_alias(kasan_early_shadow_pud));
|
||||
pud = pud_offset(p4d, addr);
|
||||
pud_populate(&init_mm, pud,
|
||||
@@ -212,7 +212,7 @@ static int __ref zero_p4d_populate(pgd_t
|
||||
} else {
|
||||
p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
|
||||
pud_init(p);
|
||||
- p4d_populate(&init_mm, p4d, p);
|
||||
+ p4d_populate_kernel(addr, p4d, p);
|
||||
}
|
||||
}
|
||||
zero_pud_populate(p4d, addr, next);
|
||||
@@ -251,10 +251,10 @@ int __ref kasan_populate_early_shadow(co
|
||||
* puds,pmds, so pgd_populate(), pud_populate()
|
||||
* is noops.
|
||||
*/
|
||||
- pgd_populate(&init_mm, pgd,
|
||||
+ pgd_populate_kernel(addr, pgd,
|
||||
lm_alias(kasan_early_shadow_p4d));
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
- p4d_populate(&init_mm, p4d,
|
||||
+ p4d_populate_kernel(addr, p4d,
|
||||
lm_alias(kasan_early_shadow_pud));
|
||||
pud = pud_offset(p4d, addr);
|
||||
pud_populate(&init_mm, pud,
|
||||
@@ -273,7 +273,7 @@ int __ref kasan_populate_early_shadow(co
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
- pgd_populate(&init_mm, pgd,
|
||||
+ pgd_populate_kernel(addr, pgd,
|
||||
early_alloc(PAGE_SIZE, NUMA_NO_NODE));
|
||||
}
|
||||
}
|
||||
--- a/mm/percpu.c
|
||||
+++ b/mm/percpu.c
|
||||
@@ -3108,7 +3108,7 @@ out_free:
|
||||
#endif /* BUILD_EMBED_FIRST_CHUNK */
|
||||
|
||||
#ifdef BUILD_PAGE_FIRST_CHUNK
|
||||
-#include <asm/pgalloc.h>
|
||||
+#include <linux/pgalloc.h>
|
||||
|
||||
#ifndef P4D_TABLE_SIZE
|
||||
#define P4D_TABLE_SIZE PAGE_SIZE
|
||||
@@ -3134,13 +3134,13 @@ void __init __weak pcpu_populate_pte(uns
|
||||
|
||||
if (pgd_none(*pgd)) {
|
||||
p4d = memblock_alloc_or_panic(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
|
||||
- pgd_populate(&init_mm, pgd, p4d);
|
||||
+ pgd_populate_kernel(addr, pgd, p4d);
|
||||
}
|
||||
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
if (p4d_none(*p4d)) {
|
||||
pud = memblock_alloc_or_panic(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
|
||||
- p4d_populate(&init_mm, p4d, pud);
|
||||
+ p4d_populate_kernel(addr, p4d, pud);
|
||||
}
|
||||
|
||||
pud = pud_offset(p4d, addr);
|
||||
--- a/mm/sparse-vmemmap.c
|
||||
+++ b/mm/sparse-vmemmap.c
|
||||
@@ -27,9 +27,9 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched.h>
|
||||
+#include <linux/pgalloc.h>
|
||||
|
||||
#include <asm/dma.h>
|
||||
-#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#include "hugetlb_vmemmap.h"
|
||||
@@ -229,7 +229,7 @@ p4d_t * __meminit vmemmap_p4d_populate(p
|
||||
if (!p)
|
||||
return NULL;
|
||||
pud_init(p);
|
||||
- p4d_populate(&init_mm, p4d, p);
|
||||
+ p4d_populate_kernel(addr, p4d, p);
|
||||
}
|
||||
return p4d;
|
||||
}
|
||||
@@ -241,7 +241,7 @@ pgd_t * __meminit vmemmap_pgd_populate(u
|
||||
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
|
||||
if (!p)
|
||||
return NULL;
|
||||
- pgd_populate(&init_mm, pgd, p);
|
||||
+ pgd_populate_kernel(addr, pgd, p);
|
||||
}
|
||||
return pgd;
|
||||
}
|
149
debian/patches/patchset-pf/steady/0016-x86-mm-64-define-ARCH_PAGE_TABLE_SYNC_MASK-and-arch_.patch
vendored
Normal file
149
debian/patches/patchset-pf/steady/0016-x86-mm-64-define-ARCH_PAGE_TABLE_SYNC_MASK-and-arch_.patch
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
From 2bde279ccdb076a93a167ab4a2b7202e46d83a2f Mon Sep 17 00:00:00 2001
|
||||
From: Harry Yoo <harry.yoo@oracle.com>
|
||||
Date: Mon, 18 Aug 2025 11:02:06 +0900
|
||||
Subject: x86/mm/64: define ARCH_PAGE_TABLE_SYNC_MASK and
|
||||
arch_sync_kernel_mappings()
|
||||
|
||||
Define ARCH_PAGE_TABLE_SYNC_MASK and arch_sync_kernel_mappings() to ensure
|
||||
page tables are properly synchronized when calling p*d_populate_kernel().
|
||||
|
||||
For 5-level paging, synchronization is performed via
|
||||
pgd_populate_kernel(). In 4-level paging, pgd_populate() is a no-op, so
|
||||
synchronization is instead performed at the P4D level via
|
||||
p4d_populate_kernel().
|
||||
|
||||
This fixes intermittent boot failures on systems using 4-level paging and
|
||||
a large amount of persistent memory:
|
||||
|
||||
BUG: unable to handle page fault for address: ffffe70000000034
|
||||
#PF: supervisor write access in kernel mode
|
||||
#PF: error_code(0x0002) - not-present page
|
||||
PGD 0 P4D 0
|
||||
Oops: 0002 [#1] SMP NOPTI
|
||||
RIP: 0010:__init_single_page+0x9/0x6d
|
||||
Call Trace:
|
||||
<TASK>
|
||||
__init_zone_device_page+0x17/0x5d
|
||||
memmap_init_zone_device+0x154/0x1bb
|
||||
pagemap_range+0x2e0/0x40f
|
||||
memremap_pages+0x10b/0x2f0
|
||||
devm_memremap_pages+0x1e/0x60
|
||||
dev_dax_probe+0xce/0x2ec [device_dax]
|
||||
dax_bus_probe+0x6d/0xc9
|
||||
[... snip ...]
|
||||
</TASK>
|
||||
|
||||
It also fixes a crash in vmemmap_set_pmd() caused by accessing vmemmap
|
||||
before sync_global_pgds() [1]:
|
||||
|
||||
BUG: unable to handle page fault for address: ffffeb3ff1200000
|
||||
#PF: supervisor write access in kernel mode
|
||||
#PF: error_code(0x0002) - not-present page
|
||||
PGD 0 P4D 0
|
||||
Oops: Oops: 0002 [#1] PREEMPT SMP NOPTI
|
||||
Tainted: [W]=WARN
|
||||
RIP: 0010:vmemmap_set_pmd+0xff/0x230
|
||||
<TASK>
|
||||
vmemmap_populate_hugepages+0x176/0x180
|
||||
vmemmap_populate+0x34/0x80
|
||||
__populate_section_memmap+0x41/0x90
|
||||
sparse_add_section+0x121/0x3e0
|
||||
__add_pages+0xba/0x150
|
||||
add_pages+0x1d/0x70
|
||||
memremap_pages+0x3dc/0x810
|
||||
devm_memremap_pages+0x1c/0x60
|
||||
xe_devm_add+0x8b/0x100 [xe]
|
||||
xe_tile_init_noalloc+0x6a/0x70 [xe]
|
||||
xe_device_probe+0x48c/0x740 [xe]
|
||||
[... snip ...]
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250818020206.4517-4-harry.yoo@oracle.com
|
||||
Fixes: 8d400913c231 ("x86/vmemmap: handle unpopulated sub-pmd ranges")
|
||||
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
|
||||
Closes: https://lore.kernel.org/linux-mm/20250311114420.240341-1-gwan-gyeong.mun@intel.com [1]
|
||||
Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
|
||||
Acked-by: Kiryl Shutsemau <kas@kernel.org>
|
||||
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
|
||||
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||
Acked-by: David Hildenbrand <david@redhat.com>
|
||||
Cc: Alexander Potapenko <glider@google.com>
|
||||
Cc: Alistair Popple <apopple@nvidia.com>
|
||||
Cc: Andrey Konovalov <andreyknvl@gmail.com>
|
||||
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
|
||||
Cc: Andy Lutomirski <luto@kernel.org>
|
||||
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
|
||||
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
|
||||
Cc: Ard Biesheuvel <ardb@kernel.org>
|
||||
Cc: Arnd Bergmann <arnd@arndb.de>
|
||||
Cc: bibo mao <maobibo@loongson.cn>
|
||||
Cc: Borislav Betkov <bp@alien8.de>
|
||||
Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
|
||||
Cc: Dennis Zhou <dennis@kernel.org>
|
||||
Cc: Dev Jain <dev.jain@arm.com>
|
||||
Cc: Dmitriy Vyukov <dvyukov@google.com>
|
||||
Cc: Ingo Molnar <mingo@redhat.com>
|
||||
Cc: Jane Chu <jane.chu@oracle.com>
|
||||
Cc: Joao Martins <joao.m.martins@oracle.com>
|
||||
Cc: Joerg Roedel <joro@8bytes.org>
|
||||
Cc: John Hubbard <jhubbard@nvidia.com>
|
||||
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
|
||||
Cc: Liam Howlett <liam.howlett@oracle.com>
|
||||
Cc: Michal Hocko <mhocko@suse.com>
|
||||
Cc: Oscar Salvador <osalvador@suse.de>
|
||||
Cc: Peter Xu <peterx@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
|
||||
Cc: Ryan Roberts <ryan.roberts@arm.com>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Cc: Tejun Heo <tj@kernel.org>
|
||||
Cc: Thomas Gleinxer <tglx@linutronix.de>
|
||||
Cc: Thomas Huth <thuth@redhat.com>
|
||||
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
|
||||
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
|
||||
Cc: Vlastimil Babka <vbabka@suse.cz>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
arch/x86/include/asm/pgtable_64_types.h | 3 +++
|
||||
arch/x86/mm/init_64.c | 18 ++++++++++++++++++
|
||||
2 files changed, 21 insertions(+)
|
||||
|
||||
--- a/arch/x86/include/asm/pgtable_64_types.h
|
||||
+++ b/arch/x86/include/asm/pgtable_64_types.h
|
||||
@@ -36,6 +36,9 @@ static inline bool pgtable_l5_enabled(vo
|
||||
#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
|
||||
#endif /* USE_EARLY_PGTABLE_L5 */
|
||||
|
||||
+#define ARCH_PAGE_TABLE_SYNC_MASK \
|
||||
+ (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
|
||||
+
|
||||
extern unsigned int pgdir_shift;
|
||||
extern unsigned int ptrs_per_p4d;
|
||||
|
||||
--- a/arch/x86/mm/init_64.c
|
||||
+++ b/arch/x86/mm/init_64.c
|
||||
@@ -224,6 +224,24 @@ static void sync_global_pgds(unsigned lo
|
||||
}
|
||||
|
||||
/*
|
||||
+ * Make kernel mappings visible in all page tables in the system.
|
||||
+ * This is necessary except when the init task populates kernel mappings
|
||||
+ * during the boot process. In that case, all processes originating from
|
||||
+ * the init task copies the kernel mappings, so there is no issue.
|
||||
+ * Otherwise, missing synchronization could lead to kernel crashes due
|
||||
+ * to missing page table entries for certain kernel mappings.
|
||||
+ *
|
||||
+ * Synchronization is performed at the top level, which is the PGD in
|
||||
+ * 5-level paging systems. But in 4-level paging systems, however,
|
||||
+ * pgd_populate() is a no-op, so synchronization is done at the P4D level.
|
||||
+ * sync_global_pgds() handles this difference between paging levels.
|
||||
+ */
|
||||
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
|
||||
+{
|
||||
+ sync_global_pgds(start, end);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
* NOTE: This function is marked __ref because it calls __init function
|
||||
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
|
||||
*/
|
41
debian/patches/patchset-pf/steady/0017-ALSA-hda-tas2781-fix-tas2563-EFI-data-endianness.patch
vendored
Normal file
41
debian/patches/patchset-pf/steady/0017-ALSA-hda-tas2781-fix-tas2563-EFI-data-endianness.patch
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
From 8622915ef6b2bdd5779ebe986d9ad1a360246377 Mon Sep 17 00:00:00 2001
|
||||
From: Gergo Koteles <soyer@irl.hu>
|
||||
Date: Fri, 29 Aug 2025 18:04:49 +0200
|
||||
Subject: ALSA: hda: tas2781: fix tas2563 EFI data endianness
|
||||
|
||||
Before conversion to unify the calibration data management, the
|
||||
tas2563_apply_calib() function performed the big endian conversion and
|
||||
wrote the calibration data to the device. The writing is now done by the
|
||||
common tasdev_load_calibrated_data() function, but without conversion.
|
||||
|
||||
Put the values into the calibration data buffer with the expected
|
||||
endianness.
|
||||
|
||||
Fixes: 4fe238513407 ("ALSA: hda/tas2781: Move and unified the calibrated-data getting function for SPI and I2C into the tas2781_hda lib")
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Gergo Koteles <soyer@irl.hu>
|
||||
Link: https://patch.msgid.link/20250829160450.66623-1-soyer@irl.hu
|
||||
Signed-off-by: Takashi Iwai <tiwai@suse.de>
|
||||
---
|
||||
sound/pci/hda/tas2781_hda_i2c.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/sound/pci/hda/tas2781_hda_i2c.c
|
||||
+++ b/sound/pci/hda/tas2781_hda_i2c.c
|
||||
@@ -292,6 +292,7 @@ static int tas2563_save_calibration(stru
|
||||
struct cali_reg *r = &cd->cali_reg_array;
|
||||
unsigned int offset = 0;
|
||||
unsigned char *data;
|
||||
+ __be32 bedata;
|
||||
efi_status_t status;
|
||||
unsigned int attr;
|
||||
int ret, i, j, k;
|
||||
@@ -333,6 +334,8 @@ static int tas2563_save_calibration(stru
|
||||
i, j, status);
|
||||
return -EINVAL;
|
||||
}
|
||||
+ bedata = cpu_to_be32(*(uint32_t *)&data[offset]);
|
||||
+ memcpy(&data[offset], &bedata, sizeof(bedata));
|
||||
offset += TAS2563_CAL_DATA_SIZE;
|
||||
}
|
||||
}
|
@@ -0,0 +1,32 @@
|
||||
From b5891607a373a8585971c9365748382bfdd7dc6f Mon Sep 17 00:00:00 2001
|
||||
From: Gergo Koteles <soyer@irl.hu>
|
||||
Date: Fri, 29 Aug 2025 18:04:50 +0200
|
||||
Subject: ALSA: hda: tas2781: reorder tas2563 calibration variables
|
||||
|
||||
The tasdev_load_calibrated_data() function expects the calibration data
|
||||
values in the cali_data buffer as R0, R0Low, InvR0, Power, TLim which
|
||||
is not the same as what tas2563_save_calibration() writes to the buffer.
|
||||
|
||||
Reorder the EFI variables in the tas2563_save_calibration() function
|
||||
to put the values in the buffer in the correct order.
|
||||
|
||||
Fixes: 4fe238513407 ("ALSA: hda/tas2781: Move and unified the calibrated-data getting function for SPI and I2C into the tas2781_hda lib")
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Gergo Koteles <soyer@irl.hu>
|
||||
Link: https://patch.msgid.link/20250829160450.66623-2-soyer@irl.hu
|
||||
Signed-off-by: Takashi Iwai <tiwai@suse.de>
|
||||
---
|
||||
sound/pci/hda/tas2781_hda_i2c.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/sound/pci/hda/tas2781_hda_i2c.c
|
||||
+++ b/sound/pci/hda/tas2781_hda_i2c.c
|
||||
@@ -282,7 +282,7 @@ static int tas2563_save_calibration(stru
|
||||
{
|
||||
efi_guid_t efi_guid = tasdev_fct_efi_guid[LENOVO];
|
||||
char *vars[TASDEV_CALIB_N] = {
|
||||
- "R0_%d", "InvR0_%d", "R0_Low_%d", "Power_%d", "TLim_%d"
|
||||
+ "R0_%d", "R0_Low_%d", "InvR0_%d", "Power_%d", "TLim_%d"
|
||||
};
|
||||
efi_char16_t efi_name[TAS2563_CAL_VAR_NAME_MAX];
|
||||
unsigned long max_size = TAS2563_CAL_DATA_SIZE;
|
@@ -0,0 +1,26 @@
|
||||
From 25878599a2cfe0b5bd6c6cd5a978aa3f05cb9afd Mon Sep 17 00:00:00 2001
|
||||
From: Takashi Iwai <tiwai@suse.de>
|
||||
Date: Mon, 1 Sep 2025 13:50:08 +0200
|
||||
Subject: ALSA: hda/hdmi: Add pin fix for another HP EliteDesk 800 G4 model
|
||||
|
||||
It was reported that HP EliteDesk 800 G4 DM 65W (SSID 103c:845a) needs
|
||||
the similar quirk for enabling HDMI outputs, too. This patch adds the
|
||||
corresponding quirk entry.
|
||||
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Link: https://patch.msgid.link/20250901115009.27498-1-tiwai@suse.de
|
||||
Signed-off-by: Takashi Iwai <tiwai@suse.de>
|
||||
---
|
||||
sound/pci/hda/patch_hdmi.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/sound/pci/hda/patch_hdmi.c
|
||||
+++ b/sound/pci/hda/patch_hdmi.c
|
||||
@@ -1991,6 +1991,7 @@ static int hdmi_add_cvt(struct hda_codec
|
||||
static const struct snd_pci_quirk force_connect_list[] = {
|
||||
SND_PCI_QUIRK(0x103c, 0x83e2, "HP EliteDesk 800 G4", 1),
|
||||
SND_PCI_QUIRK(0x103c, 0x83ef, "HP MP9 G4 Retail System AMS", 1),
|
||||
+ SND_PCI_QUIRK(0x103c, 0x845a, "HP EliteDesk 800 G4 DM 65W", 1),
|
||||
SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1),
|
||||
SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1),
|
||||
SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1),
|
110
debian/patches/patchset-pf/steady/0020-mm-fix-possible-deadlock-in-kmemleak.patch
vendored
Normal file
110
debian/patches/patchset-pf/steady/0020-mm-fix-possible-deadlock-in-kmemleak.patch
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
From 8b190cfe2c3ec71bbb031dcf4eab072a4c83c289 Mon Sep 17 00:00:00 2001
|
||||
From: Gu Bowen <gubowen5@huawei.com>
|
||||
Date: Fri, 22 Aug 2025 15:35:41 +0800
|
||||
Subject: mm: fix possible deadlock in kmemleak
|
||||
|
||||
There are some AA deadlock issues in kmemleak, similar to the situation
|
||||
reported by Breno [1]. The deadlock path is as follows:
|
||||
|
||||
mem_pool_alloc()
|
||||
-> raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
-> pr_warn()
|
||||
-> netconsole subsystem
|
||||
-> netpoll
|
||||
-> __alloc_skb
|
||||
-> __create_object
|
||||
-> raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
|
||||
To solve this problem, switch to printk_safe mode before printing warning
|
||||
message, this will redirect all printk()-s to a special per-CPU buffer,
|
||||
which will be flushed later from a safe context (irq work), and this
|
||||
deadlock problem can be avoided. The proper API to use should be
|
||||
printk_deferred_enter()/printk_deferred_exit() [2]. Another way is to
|
||||
place the warn print after kmemleak is released.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250822073541.1886469-1-gubowen5@huawei.com
|
||||
Link: https://lore.kernel.org/all/20250731-kmemleak_lock-v1-1-728fd470198f@debian.org/#t [1]
|
||||
Link: https://lore.kernel.org/all/5ca375cd-4a20-4807-b897-68b289626550@redhat.com/ [2]
|
||||
Signed-off-by: Gu Bowen <gubowen5@huawei.com>
|
||||
Reviewed-by: Waiman Long <longman@redhat.com>
|
||||
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
|
||||
Reviewed-by: Breno Leitao <leitao@debian.org>
|
||||
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
|
||||
Cc: John Ogness <john.ogness@linutronix.de>
|
||||
Cc: Lu Jialin <lujialin4@huawei.com>
|
||||
Cc: Petr Mladek <pmladek@suse.com>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
mm/kmemleak.c | 27 ++++++++++++++++++++-------
|
||||
1 file changed, 20 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/mm/kmemleak.c
|
||||
+++ b/mm/kmemleak.c
|
||||
@@ -437,9 +437,15 @@ static struct kmemleak_object *__lookup_
|
||||
else if (untagged_objp == untagged_ptr || alias)
|
||||
return object;
|
||||
else {
|
||||
+ /*
|
||||
+ * Printk deferring due to the kmemleak_lock held.
|
||||
+ * This is done to avoid deadlock.
|
||||
+ */
|
||||
+ printk_deferred_enter();
|
||||
kmemleak_warn("Found object by alias at 0x%08lx\n",
|
||||
ptr);
|
||||
dump_object_info(object);
|
||||
+ printk_deferred_exit();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -736,6 +742,11 @@ static int __link_object(struct kmemleak
|
||||
else if (untagged_objp + parent->size <= untagged_ptr)
|
||||
link = &parent->rb_node.rb_right;
|
||||
else {
|
||||
+ /*
|
||||
+ * Printk deferring due to the kmemleak_lock held.
|
||||
+ * This is done to avoid deadlock.
|
||||
+ */
|
||||
+ printk_deferred_enter();
|
||||
kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n",
|
||||
ptr);
|
||||
/*
|
||||
@@ -743,6 +754,7 @@ static int __link_object(struct kmemleak
|
||||
* be freed while the kmemleak_lock is held.
|
||||
*/
|
||||
dump_object_info(parent);
|
||||
+ printk_deferred_exit();
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
@@ -856,13 +868,8 @@ static void delete_object_part(unsigned
|
||||
|
||||
raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
object = __find_and_remove_object(ptr, 1, objflags);
|
||||
- if (!object) {
|
||||
-#ifdef DEBUG
|
||||
- kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n",
|
||||
- ptr, size);
|
||||
-#endif
|
||||
+ if (!object)
|
||||
goto unlock;
|
||||
- }
|
||||
|
||||
/*
|
||||
* Create one or two objects that may result from the memory block
|
||||
@@ -882,8 +889,14 @@ static void delete_object_part(unsigned
|
||||
|
||||
unlock:
|
||||
raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
- if (object)
|
||||
+ if (object) {
|
||||
__delete_object(object);
|
||||
+ } else {
|
||||
+#ifdef DEBUG
|
||||
+ kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n",
|
||||
+ ptr, size);
|
||||
+#endif
|
||||
+ }
|
||||
|
||||
out:
|
||||
if (object_l)
|
69
debian/patches/patchset-pf/steady/0021-kasan-fix-GCC-mem-intrinsic-prefix-with-sw-tags.patch
vendored
Normal file
69
debian/patches/patchset-pf/steady/0021-kasan-fix-GCC-mem-intrinsic-prefix-with-sw-tags.patch
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
From 50b23170b0c522695761d31faafceb68ccab6d87 Mon Sep 17 00:00:00 2001
|
||||
From: Ada Couprie Diaz <ada.coupriediaz@arm.com>
|
||||
Date: Thu, 21 Aug 2025 13:07:35 +0100
|
||||
Subject: kasan: fix GCC mem-intrinsic prefix with sw tags
|
||||
|
||||
GCC doesn't support "hwasan-kernel-mem-intrinsic-prefix", only
|
||||
"asan-kernel-mem-intrinsic-prefix"[0], while LLVM supports both. This is
|
||||
already taken into account when checking
|
||||
"CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX", but not in the KASAN Makefile
|
||||
adding those parameters when "CONFIG_KASAN_SW_TAGS" is enabled.
|
||||
|
||||
Replace the version check with "CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX",
|
||||
which already validates that mem-intrinsic prefix parameter can be used,
|
||||
and choose the correct name depending on compiler.
|
||||
|
||||
GCC 13 and above trigger "CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX" which
|
||||
prevents `mem{cpy,move,set}()` being redefined in "mm/kasan/shadow.c"
|
||||
since commit 36be5cba99f6 ("kasan: treat meminstrinsic as builtins in
|
||||
uninstrumented files"), as we expect the compiler to prefix those calls
|
||||
with `__(hw)asan_` instead. But as the option passed to GCC has been
|
||||
incorrect, the compiler has not been emitting those prefixes, effectively
|
||||
never calling the instrumented versions of `mem{cpy,move,set}()` with
|
||||
"CONFIG_KASAN_SW_TAGS" enabled.
|
||||
|
||||
If "CONFIG_FORTIFY_SOURCES" is enabled, this issue would be mitigated as
|
||||
it redefines `mem{cpy,move,set}()` and properly aliases the
|
||||
`__underlying_mem*()` that will be called to the instrumented versions.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20250821120735.156244-1-ada.coupriediaz@arm.com
|
||||
Link: https://gcc.gnu.org/onlinedocs/gcc-13.4.0/gcc/Optimize-Options.html [0]
|
||||
Signed-off-by: Ada Couprie Diaz <ada.coupriediaz@arm.com>
|
||||
Fixes: 36be5cba99f6 ("kasan: treat meminstrinsic as builtins in uninstrumented files")
|
||||
Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com>
|
||||
Cc: Alexander Potapenko <glider@google.com>
|
||||
Cc: Andrey Konovalov <andreyknvl@gmail.com>
|
||||
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
|
||||
Cc: Dmitriy Vyukov <dvyukov@google.com>
|
||||
Cc: Marco Elver <elver@google.com>
|
||||
Cc: Marc Rutland <mark.rutland@arm.com>
|
||||
Cc: Michael Ellerman <mpe@ellerman.id.au>
|
||||
Cc: Nathan Chancellor <nathan@kernel.org>
|
||||
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
|
||||
Cc: <stable@vger.kernel.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
---
|
||||
scripts/Makefile.kasan | 12 ++++++++----
|
||||
1 file changed, 8 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/scripts/Makefile.kasan
|
||||
+++ b/scripts/Makefile.kasan
|
||||
@@ -86,10 +86,14 @@ kasan_params += hwasan-instrument-stack=
|
||||
hwasan-use-short-granules=0 \
|
||||
hwasan-inline-all-checks=0
|
||||
|
||||
-# Instrument memcpy/memset/memmove calls by using instrumented __hwasan_mem*().
|
||||
-ifeq ($(call clang-min-version, 150000)$(call gcc-min-version, 130000),y)
|
||||
- kasan_params += hwasan-kernel-mem-intrinsic-prefix=1
|
||||
-endif
|
||||
+# Instrument memcpy/memset/memmove calls by using instrumented __(hw)asan_mem*().
|
||||
+ifdef CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX
|
||||
+ ifdef CONFIG_CC_IS_GCC
|
||||
+ kasan_params += asan-kernel-mem-intrinsic-prefix=1
|
||||
+ else
|
||||
+ kasan_params += hwasan-kernel-mem-intrinsic-prefix=1
|
||||
+ endif
|
||||
+endif # CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX
|
||||
|
||||
endif # CONFIG_KASAN_SW_TAGS
|
||||
|
105
debian/patches/patchset-pf/steady/0022-sched-Fix-sched_numa_find_nth_cpu-if-mask-offline.patch
vendored
Normal file
105
debian/patches/patchset-pf/steady/0022-sched-Fix-sched_numa_find_nth_cpu-if-mask-offline.patch
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
From a2324e3cf5378205b4a18c3fa2cfe702a26f81d4 Mon Sep 17 00:00:00 2001
|
||||
From: Christian Loehle <christian.loehle@arm.com>
|
||||
Date: Wed, 3 Sep 2025 16:48:32 +0100
|
||||
Subject: sched: Fix sched_numa_find_nth_cpu() if mask offline
|
||||
|
||||
sched_numa_find_nth_cpu() uses a bsearch to look for the 'closest'
|
||||
CPU in sched_domains_numa_masks and given cpus mask. However they
|
||||
might not intersect if all CPUs in the cpus mask are offline. bsearch
|
||||
will return NULL in that case, bail out instead of dereferencing a
|
||||
bogus pointer.
|
||||
|
||||
The previous behaviour lead to this bug when using maxcpus=4 on an
|
||||
rk3399 (LLLLbb) (i.e. booting with all big CPUs offline):
|
||||
|
||||
[ 1.422922] Unable to handle kernel paging request at virtual address ffffff8000000000
|
||||
[ 1.423635] Mem abort info:
|
||||
[ 1.423889] ESR = 0x0000000096000006
|
||||
[ 1.424227] EC = 0x25: DABT (current EL), IL = 32 bits
|
||||
[ 1.424715] SET = 0, FnV = 0
|
||||
[ 1.424995] EA = 0, S1PTW = 0
|
||||
[ 1.425279] FSC = 0x06: level 2 translation fault
|
||||
[ 1.425735] Data abort info:
|
||||
[ 1.425998] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
|
||||
[ 1.426499] CM = 0, WnR = 0, TnD = 0, TagAccess = 0
|
||||
[ 1.426952] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
|
||||
[ 1.427428] swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000004a9f000
|
||||
[ 1.428038] [ffffff8000000000] pgd=18000000f7fff403, p4d=18000000f7fff403, pud=18000000f7fff403, pmd=0000000000000000
|
||||
[ 1.429014] Internal error: Oops: 0000000096000006 [#1] SMP
|
||||
[ 1.429525] Modules linked in:
|
||||
[ 1.429813] CPU: 3 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.17.0-rc4-dirty #343 PREEMPT
|
||||
[ 1.430559] Hardware name: Pine64 RockPro64 v2.1 (DT)
|
||||
[ 1.431012] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
|
||||
[ 1.431634] pc : sched_numa_find_nth_cpu+0x2a0/0x488
|
||||
[ 1.432094] lr : sched_numa_find_nth_cpu+0x284/0x488
|
||||
[ 1.432543] sp : ffffffc084e1b960
|
||||
[ 1.432843] x29: ffffffc084e1b960 x28: ffffff80078a8800 x27: ffffffc0846eb1d0
|
||||
[ 1.433495] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
|
||||
[ 1.434144] x23: 0000000000000000 x22: fffffffffff7f093 x21: ffffffc081de6378
|
||||
[ 1.434792] x20: 0000000000000000 x19: 0000000ffff7f093 x18: 00000000ffffffff
|
||||
[ 1.435441] x17: 3030303866666666 x16: 66663d736b73616d x15: ffffffc104e1b5b7
|
||||
[ 1.436091] x14: 0000000000000000 x13: ffffffc084712860 x12: 0000000000000372
|
||||
[ 1.436739] x11: 0000000000000126 x10: ffffffc08476a860 x9 : ffffffc084712860
|
||||
[ 1.437389] x8 : 00000000ffffefff x7 : ffffffc08476a860 x6 : 0000000000000000
|
||||
[ 1.438036] x5 : 000000000000bff4 x4 : 0000000000000000 x3 : 0000000000000000
|
||||
[ 1.438683] x2 : 0000000000000000 x1 : ffffffc0846eb000 x0 : ffffff8000407b68
|
||||
[ 1.439332] Call trace:
|
||||
[ 1.439559] sched_numa_find_nth_cpu+0x2a0/0x488 (P)
|
||||
[ 1.440016] smp_call_function_any+0xc8/0xd0
|
||||
[ 1.440416] armv8_pmu_init+0x58/0x27c
|
||||
[ 1.440770] armv8_cortex_a72_pmu_init+0x20/0x2c
|
||||
[ 1.441199] arm_pmu_device_probe+0x1e4/0x5e8
|
||||
[ 1.441603] armv8_pmu_device_probe+0x1c/0x28
|
||||
[ 1.442007] platform_probe+0x5c/0xac
|
||||
[ 1.442347] really_probe+0xbc/0x298
|
||||
[ 1.442683] __driver_probe_device+0x78/0x12c
|
||||
[ 1.443087] driver_probe_device+0xdc/0x160
|
||||
[ 1.443475] __driver_attach+0x94/0x19c
|
||||
[ 1.443833] bus_for_each_dev+0x74/0xd4
|
||||
[ 1.444190] driver_attach+0x24/0x30
|
||||
[ 1.444525] bus_add_driver+0xe4/0x208
|
||||
[ 1.444874] driver_register+0x60/0x128
|
||||
[ 1.445233] __platform_driver_register+0x24/0x30
|
||||
[ 1.445662] armv8_pmu_driver_init+0x28/0x4c
|
||||
[ 1.446059] do_one_initcall+0x44/0x25c
|
||||
[ 1.446416] kernel_init_freeable+0x1dc/0x3bc
|
||||
[ 1.446820] kernel_init+0x20/0x1d8
|
||||
[ 1.447151] ret_from_fork+0x10/0x20
|
||||
[ 1.447493] Code: 90022e21 f000e5f5 910de2b5 2a1703e2 (f8767803)
|
||||
[ 1.448040] ---[ end trace 0000000000000000 ]---
|
||||
[ 1.448483] note: swapper/0[1] exited with preempt_count 1
|
||||
[ 1.449047] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
|
||||
[ 1.449741] SMP: stopping secondary CPUs
|
||||
[ 1.450105] Kernel Offset: disabled
|
||||
[ 1.450419] CPU features: 0x000000,00080000,20002001,0400421b
|
||||
[ 1.450935] Memory Limit: none
|
||||
[ 1.451217] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
|
||||
|
||||
Yury: with the fix, the function returns cpu == nr_cpu_ids, and later in
|
||||
|
||||
smp_call_function_any ->
|
||||
smp_call_function_single ->
|
||||
generic_exec_single
|
||||
|
||||
we test the cpu for '>= nr_cpu_ids' and return -ENXIO. So everything is
|
||||
handled correctly.
|
||||
|
||||
Fixes: cd7f55359c90 ("sched: add sched_numa_find_nth_cpu()")
|
||||
Cc: stable@vger.kernel.org
|
||||
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
|
||||
Signed-off-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
|
||||
---
|
||||
kernel/sched/topology.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/kernel/sched/topology.c
|
||||
+++ b/kernel/sched/topology.c
|
||||
@@ -2212,6 +2212,8 @@ int sched_numa_find_nth_cpu(const struct
|
||||
goto unlock;
|
||||
|
||||
hop_masks = bsearch(&k, k.masks, sched_domains_numa_levels, sizeof(k.masks[0]), hop_cmp);
|
||||
+ if (!hop_masks)
|
||||
+ goto unlock;
|
||||
hop = hop_masks - k.masks;
|
||||
|
||||
ret = hop ?
|
17
debian/patches/series
vendored
17
debian/patches/series
vendored
@@ -68,7 +68,6 @@ features/x86/x86-make-x32-syscall-support-conditional.patch
|
||||
bugfix/all/disable-some-marvell-phys.patch
|
||||
bugfix/all/fs-add-module_softdep-declarations-for-hard-coded-cr.patch
|
||||
bugfix/all/proc-fix-missing-pde_set_flags-for-net-proc-files.patch
|
||||
bugfix/all/net-ipv4-fix-regression-in-local-broadcast-routes.patch
|
||||
|
||||
# Miscellaneous features
|
||||
|
||||
@@ -213,4 +212,18 @@ patchset-pf/steady/0004-fs-proc-task_mmu-remove-conversion-of-seq_file-posit.pat
|
||||
patchset-pf/steady/0005-cifs-Add-support-for-creating-reparse-points-over-SM.patch
|
||||
patchset-pf/steady/0006-smb-client-fix-creating-symlinks-under-POSIX-mounts.patch
|
||||
patchset-pf/steady/0007-watchdog-intel_oc_wdt-Do-not-try-to-write-into-const.patch
|
||||
patchset-pf/steady/0008-vhost-net-Protect-ubufs-with-rcu-read-lock-in-vhost_.patch
|
||||
patchset-pf/steady/0008-ALSA-usb-audio-Add-mute-TLV-for-playback-volumes-on-.patch
|
||||
patchset-pf/steady/0009-ALSA-hda-realtek-Fix-headset-mic-for-TongFang-X6-AF-.patch
|
||||
patchset-pf/steady/0010-of_numa-fix-uninitialized-memory-nodes-causing-kerne.patch
|
||||
patchset-pf/steady/0011-mm-userfaultfd-fix-kmap_local-LIFO-ordering-for-CONF.patch
|
||||
patchset-pf/steady/0012-mm-damon-core-prevent-unnecessary-overflow-in-damos_.patch
|
||||
patchset-pf/steady/0013-mm-fix-accounting-of-memmap-pages.patch
|
||||
patchset-pf/steady/0014-mm-move-page-table-sync-declarations-to-linux-pgtabl.patch
|
||||
patchset-pf/steady/0015-mm-introduce-and-use-pgd-p4d-_populate_kernel.patch
|
||||
patchset-pf/steady/0016-x86-mm-64-define-ARCH_PAGE_TABLE_SYNC_MASK-and-arch_.patch
|
||||
patchset-pf/steady/0017-ALSA-hda-tas2781-fix-tas2563-EFI-data-endianness.patch
|
||||
patchset-pf/steady/0018-ALSA-hda-tas2781-reorder-tas2563-calibration-variabl.patch
|
||||
patchset-pf/steady/0019-ALSA-hda-hdmi-Add-pin-fix-for-another-HP-EliteDesk-8.patch
|
||||
patchset-pf/steady/0020-mm-fix-possible-deadlock-in-kmemleak.patch
|
||||
patchset-pf/steady/0021-kasan-fix-GCC-mem-intrinsic-prefix-with-sw-tags.patch
|
||||
patchset-pf/steady/0022-sched-Fix-sched_numa_find_nth_cpu-if-mask-offline.patch
|
||||
|
@@ -2,6 +2,9 @@ Package: krd-linux-headers-@abiname@-common@localversion@
|
||||
Meta-Rules-Target: headers-common
|
||||
Build-Profiles: <!pkg.linux.nokernel>
|
||||
Architecture: all
|
||||
Build-Depends:
|
||||
# used by debian/rules.real to build linux-headers
|
||||
cpio,
|
||||
Depends: ${misc:Depends}
|
||||
Multi-Arch: foreign
|
||||
Description: Common header files for KrD's Linux kernel @abiname@@localversion@
|
||||
|
17
debian/templates/image.control.in
vendored
17
debian/templates/image.control.in
vendored
@@ -3,6 +3,23 @@ Meta-Rules-Target: image
|
||||
Build-Profiles: <!pkg.linux.nokernel>
|
||||
Build-Depends:
|
||||
kmod,
|
||||
# used by upstream to build include/generated/timeconst.h
|
||||
bc,
|
||||
# used by upstream to build signing tools and to process certificates
|
||||
libssl-dev:native,
|
||||
openssl,
|
||||
# used by upstream to build objtool (native for images; host arch for
|
||||
# linux-kbuild), perf (host arch)
|
||||
libelf-dev:native,
|
||||
# used for bft debug info
|
||||
pahole,
|
||||
# used by upstream to compress kernel
|
||||
lz4,
|
||||
xz-utils,
|
||||
zstd,
|
||||
# used by upstream to build genksyms, kconfig, and perf
|
||||
bison,
|
||||
flex,
|
||||
Pre-Depends: linux-base (>= 4.3~)
|
||||
Depends: kmod, ${misc:Depends}
|
||||
Suggests: firmware-linux-free, debian-kernel-handbook
|
||||
|
24
debian/templates/source.control.in
vendored
24
debian/templates/source.control.in
vendored
@@ -13,30 +13,6 @@ Build-Depends:
|
||||
quilt,
|
||||
# used by debian/rules.real to build linux-perf
|
||||
dh-python <!pkg.linux.notools>,
|
||||
Build-Depends-Arch:
|
||||
# used by upstream to build include/generated/timeconst.h
|
||||
bc <!pkg.linux.nokernel>,
|
||||
# used by upstream to build signing tools and to process certificates
|
||||
libssl-dev:native <!pkg.linux.nokernel>,
|
||||
libssl-dev <!pkg.linux.notools>,
|
||||
openssl <!pkg.linux.nokernel>,
|
||||
# used by upstream to build objtool (native for images; host arch for
|
||||
# linux-kbuild), perf (host arch)
|
||||
libelf-dev:native <!pkg.linux.nokernel>,
|
||||
libelf-dev <!pkg.linux.notools>,
|
||||
lz4 [amd64 arm64] <!pkg.linux.nokernel>,
|
||||
# used for bft debug info
|
||||
pahole <!pkg.linux.nokernel> | dwarves:native (>= 1.16~) <!pkg.linux.nokernel>,
|
||||
# used by debian/rules.real to build linux-headers
|
||||
cpio <!pkg.linux.nokernel>,
|
||||
# used by upstream to compress kernel
|
||||
xz-utils <!pkg.linux.nokernel>,
|
||||
zstd <!pkg.linux.nokernel>,
|
||||
# used by upstream to build genksyms, kconfig, and perf
|
||||
bison <!pkg.linux.nokernel>,
|
||||
bison <!pkg.linux.notools>,
|
||||
flex <!pkg.linux.nokernel>,
|
||||
flex <!pkg.linux.notools>,
|
||||
Rules-Requires-Root: no
|
||||
Vcs-Git: https://salsa.debian.org/kernel-team/linux.git
|
||||
Vcs-Browser: https://salsa.debian.org/kernel-team/linux
|
||||
|
9
debian/templates/tools-versioned.control.in
vendored
9
debian/templates/tools-versioned.control.in
vendored
@@ -2,6 +2,15 @@ Package: krd-linux-kbuild-@abiname@
|
||||
Meta-Rules-Target: kbuild
|
||||
Build-Profiles: <!pkg.linux.notools>
|
||||
Architecture: linux-any
|
||||
Build-Depends:
|
||||
# used by upstream to build signing tools and to process certificates
|
||||
libssl-dev,
|
||||
# used by upstream to build objtool (native for images; host arch for
|
||||
# linux-kbuild), perf (host arch)
|
||||
libelf-dev,
|
||||
# used by upstream to build genksyms, kconfig, and perf
|
||||
bison,
|
||||
flex,
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, build-essential, pahole
|
||||
Multi-Arch: foreign
|
||||
Description: Kbuild infrastructure for KrD's Linux @abiname@
|
||||
|
Reference in New Issue
Block a user