1
0
This commit is contained in:
Konstantin Demin 2024-12-27 01:06:21 +03:00
parent d1cc1656f5
commit fb44366f3a
12 changed files with 532 additions and 37 deletions

7
debian/changelog vendored
View File

@ -1,3 +1,10 @@
linux (6.12.6-2) sid; urgency=medium
* Cherry-pick some patches.
* Refine/adjust configs.
-- Konstantin Demin <rockdrilla@gmail.com> Fri, 27 Dec 2024 00:47:14 +0300
linux (6.12.6-1) sid; urgency=medium linux (6.12.6-1) sid; urgency=medium
* New upstream stable update: * New upstream stable update:

View File

@ -24,11 +24,10 @@ CONFIG_JAILHOUSE_GUEST=y
CONFIG_ACRN_GUEST=y CONFIG_ACRN_GUEST=y
CONFIG_INTEL_TDX_GUEST=y CONFIG_INTEL_TDX_GUEST=y
# CONFIG_GART_IOMMU is not set # CONFIG_GART_IOMMU is not set
CONFIG_NR_CPUS=512 CONFIG_NR_CPUS=64
# CONFIG_X86_MCE_INJECT is not set # CONFIG_X86_MCE_INJECT is not set
CONFIG_X86_5LEVEL=y
# CONFIG_AMD_NUMA is not set # CONFIG_AMD_NUMA is not set
CONFIG_NODES_SHIFT=10 CONFIG_NODES_SHIFT=6
# CONFIG_X86_PMEM_LEGACY is not set # CONFIG_X86_PMEM_LEGACY is not set
# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set # CONFIG_X86_CHECK_BIOS_CORRUPTION is not set
## choice: TSX enable mode ## choice: TSX enable mode
@ -2110,14 +2109,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
# CONFIG_UCLAMP_TASK is not set # CONFIG_UCLAMP_TASK is not set
# CONFIG_RT_GROUP_SCHED is not set # CONFIG_RT_GROUP_SCHED is not set
##
## file: kernel/Kconfig.hz
##
## choice: Timer frequency
CONFIG_HZ_100=y
# CONFIG_HZ_500 is not set
## end choice
## ##
## file: kernel/Kconfig.kexec ## file: kernel/Kconfig.kexec
## ##
@ -2490,7 +2481,6 @@ CONFIG_CXL_PORT=y
CONFIG_FB_IOMEM_HELPERS_DEFERRED=y CONFIG_FB_IOMEM_HELPERS_DEFERRED=y
CONFIG_HVC_IRQ=y CONFIG_HVC_IRQ=y
CONFIG_HYPERV_TIMER=y CONFIG_HYPERV_TIMER=y
CONFIG_HZ=100
CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
CONFIG_INFINIBAND_RTRS=m CONFIG_INFINIBAND_RTRS=m
CONFIG_INFINIBAND_USER_MEM=y CONFIG_INFINIBAND_USER_MEM=y
@ -2509,7 +2499,6 @@ CONFIG_NETFS_SUPPORT=m
CONFIG_PARAVIRT_CLOCK=y CONFIG_PARAVIRT_CLOCK=y
CONFIG_PARAVIRT_XXL=y CONFIG_PARAVIRT_XXL=y
CONFIG_PCI_XEN=y CONFIG_PCI_XEN=y
CONFIG_PGTABLE_LEVELS=5
CONFIG_PREEMPT_NONE_BUILD=y CONFIG_PREEMPT_NONE_BUILD=y
CONFIG_RATIONAL=m CONFIG_RATIONAL=m
CONFIG_SCSI_COMMON=m CONFIG_SCSI_COMMON=m

View File

@ -17,7 +17,6 @@ CONFIG_GART_IOMMU=y
CONFIG_NR_CPUS=64 CONFIG_NR_CPUS=64
CONFIG_X86_MCE_INJECT=m CONFIG_X86_MCE_INJECT=m
CONFIG_X86_16BIT=y CONFIG_X86_16BIT=y
# CONFIG_X86_5LEVEL is not set
CONFIG_AMD_NUMA=y CONFIG_AMD_NUMA=y
CONFIG_NODES_SHIFT=6 CONFIG_NODES_SHIFT=6
CONFIG_X86_PMEM_LEGACY=y CONFIG_X86_PMEM_LEGACY=y
@ -7240,14 +7239,6 @@ CONFIG_UCLAMP_BUCKETS_COUNT=10
CONFIG_RT_GROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y
CONFIG_UCLAMP_TASK_GROUP=y CONFIG_UCLAMP_TASK_GROUP=y
##
## file: kernel/Kconfig.hz
##
## choice: Timer frequency
CONFIG_HZ_100=y
# CONFIG_HZ_500 is not set
## end choice
## ##
## file: kernel/Kconfig.kexec ## file: kernel/Kconfig.kexec
## ##
@ -8646,7 +8637,6 @@ CONFIG_HID_VIVALDI_COMMON=m
CONFIG_HSI_BOARDINFO=y CONFIG_HSI_BOARDINFO=y
CONFIG_HSU_DMA=y CONFIG_HSU_DMA=y
CONFIG_HWMON_VID=m CONFIG_HWMON_VID=m
CONFIG_HZ=100
CONFIG_I2C_BOARDINFO=y CONFIG_I2C_BOARDINFO=y
CONFIG_I2C_CCGX_UCSI=m CONFIG_I2C_CCGX_UCSI=m
CONFIG_I2C_HID_CORE=m CONFIG_I2C_HID_CORE=m
@ -8789,7 +8779,6 @@ CONFIG_PCIE_PLDA_HOST=y
CONFIG_PCI_ECAM=y CONFIG_PCI_ECAM=y
CONFIG_PCI_HOST_COMMON=y CONFIG_PCI_HOST_COMMON=y
CONFIG_PCS_LYNX=m CONFIG_PCS_LYNX=m
CONFIG_PGTABLE_LEVELS=4
CONFIG_PHYLIB_LEDS=y CONFIG_PHYLIB_LEDS=y
CONFIG_PINCTRL_CS47L15=y CONFIG_PINCTRL_CS47L15=y
CONFIG_PINCTRL_CS47L35=y CONFIG_PINCTRL_CS47L35=y

View File

@ -28,11 +28,10 @@ CONFIG_JAILHOUSE_GUEST=y
CONFIG_ACRN_GUEST=y CONFIG_ACRN_GUEST=y
CONFIG_INTEL_TDX_GUEST=y CONFIG_INTEL_TDX_GUEST=y
# CONFIG_GART_IOMMU is not set # CONFIG_GART_IOMMU is not set
CONFIG_NR_CPUS=16 CONFIG_NR_CPUS=256
# CONFIG_X86_MCE_INJECT is not set # CONFIG_X86_MCE_INJECT is not set
# CONFIG_X86_5LEVEL is not set
CONFIG_AMD_NUMA=y CONFIG_AMD_NUMA=y
CONFIG_NODES_SHIFT=6 CONFIG_NODES_SHIFT=8
# CONFIG_X86_PMEM_LEGACY is not set # CONFIG_X86_PMEM_LEGACY is not set
# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set # CONFIG_X86_CHECK_BIOS_CORRUPTION is not set
## choice: TSX enable mode ## choice: TSX enable mode
@ -3573,14 +3572,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
# CONFIG_UCLAMP_TASK is not set # CONFIG_UCLAMP_TASK is not set
# CONFIG_RT_GROUP_SCHED is not set # CONFIG_RT_GROUP_SCHED is not set
##
## file: kernel/Kconfig.hz
##
## choice: Timer frequency
# CONFIG_HZ_100 is not set
CONFIG_HZ_500=y
## end choice
## ##
## file: kernel/Kconfig.kexec ## file: kernel/Kconfig.kexec
## ##
@ -4056,7 +4047,6 @@ CONFIG_HDMI=y
CONFIG_HSI_BOARDINFO=y CONFIG_HSI_BOARDINFO=y
CONFIG_HVC_IRQ=y CONFIG_HVC_IRQ=y
CONFIG_HYPERV_TIMER=y CONFIG_HYPERV_TIMER=y
CONFIG_HZ=500
CONFIG_I2C_BOARDINFO=y CONFIG_I2C_BOARDINFO=y
CONFIG_I2C_HID_CORE=m CONFIG_I2C_HID_CORE=m
CONFIG_I2C_I801_MUX=y CONFIG_I2C_I801_MUX=y
@ -4085,7 +4075,6 @@ CONFIG_NLS_UCS2_UTILS=m
CONFIG_PARAVIRT_CLOCK=y CONFIG_PARAVIRT_CLOCK=y
CONFIG_PARAVIRT_XXL=y CONFIG_PARAVIRT_XXL=y
CONFIG_PCI_XEN=y CONFIG_PCI_XEN=y
CONFIG_PGTABLE_LEVELS=4
CONFIG_PM_CLK=y CONFIG_PM_CLK=y
CONFIG_PM_OPP=y CONFIG_PM_OPP=y
CONFIG_PM_SLEEP_DEBUG=y CONFIG_PM_SLEEP_DEBUG=y

View File

@ -50,6 +50,7 @@ CONFIG_X86_VSYSCALL_EMULATION=y
# CONFIG_MICROCODE_LATE_LOADING is not set # CONFIG_MICROCODE_LATE_LOADING is not set
CONFIG_X86_MSR=m CONFIG_X86_MSR=m
CONFIG_X86_CPUID=m CONFIG_X86_CPUID=m
# CONFIG_X86_5LEVEL is not set
# CONFIG_X86_CPA_STATISTICS is not set # CONFIG_X86_CPA_STATISTICS is not set
CONFIG_AMD_MEM_ENCRYPT=y CONFIG_AMD_MEM_ENCRYPT=y
CONFIG_NUMA=y CONFIG_NUMA=y
@ -2110,8 +2111,10 @@ CONFIG_PROFILING=y
## file: kernel/Kconfig.hz ## file: kernel/Kconfig.hz
## ##
## choice: Timer frequency ## choice: Timer frequency
CONFIG_HZ_100=y
# CONFIG_HZ_250 is not set # CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set # CONFIG_HZ_300 is not set
# CONFIG_HZ_500 is not set
# CONFIG_HZ_1000 is not set # CONFIG_HZ_1000 is not set
## end choice ## end choice
@ -3840,6 +3843,7 @@ CONFIG_HUGETLB_PAGE=y
CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y
CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING=y CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING=y
CONFIG_HVC_DRIVER=y CONFIG_HVC_DRIVER=y
CONFIG_HZ=100
CONFIG_IA32_FEAT_CTL=y CONFIG_IA32_FEAT_CTL=y
CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set # CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set
@ -4035,6 +4039,7 @@ CONFIG_PCI_LABEL=y
CONFIG_PCI_LOCKLESS_CONFIG=y CONFIG_PCI_LOCKLESS_CONFIG=y
CONFIG_PER_VMA_LOCK=y CONFIG_PER_VMA_LOCK=y
CONFIG_PGTABLE_HAS_HUGE_LEAVES=y CONFIG_PGTABLE_HAS_HUGE_LEAVES=y
CONFIG_PGTABLE_LEVELS=4
CONFIG_PHYLINK=m CONFIG_PHYLINK=m
CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_PHYS_ADDR_T_64BIT=y
CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP=y

View File

@ -0,0 +1,138 @@
From ce0dd337e839cfa3033b9035a37de98e28abde1c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Mon, 9 Dec 2024 12:52:48 -0600
Subject: cpufreq/amd-pstate: Drop boost_state variable
Currently boost_state is cached for every processor in cpudata structure
and driver boost state is set for every processor.
Both of these aren't necessary as the driver only needs to set once and
the policy stores whether boost is enabled.
Move the driver boost setting to registration and adjust all references
to cached value to pull from the policy instead.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 26 +++++++++++++-------------
drivers/cpufreq/amd-pstate.h | 1 -
2 files changed, 13 insertions(+), 14 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -345,9 +345,10 @@ static int shmem_set_epp(struct amd_cpud
return ret;
}
-static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
- int pref_index)
+static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy,
+ int pref_index)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
int epp;
if (!pref_index)
@@ -365,7 +366,7 @@ static int amd_pstate_set_energy_pref_in
epp,
AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached),
AMD_CPPC_MAX_PERF(cpudata->cppc_req_cached),
- cpudata->boost_state);
+ policy->boost_enabled);
}
return amd_pstate_set_epp(cpudata, epp);
@@ -776,7 +777,6 @@ static int amd_pstate_set_boost(struct c
guard(mutex)(&amd_pstate_driver_lock);
ret = amd_pstate_cpu_boost_update(policy, state);
- WRITE_ONCE(cpudata->boost_state, !ret ? state : false);
policy->boost_enabled = !ret ? state : false;
refresh_frequency_limits(policy);
@@ -798,9 +798,6 @@ static int amd_pstate_init_boost_support
goto exit_err;
}
- /* at least one CPU supports CPB, even if others fail later on to set up */
- current_pstate_driver->boost_enabled = true;
-
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val);
if (ret) {
pr_err_once("failed to read initial CPU boost state!\n");
@@ -1206,7 +1203,6 @@ static ssize_t show_energy_performance_a
static ssize_t store_energy_performance_preference(
struct cpufreq_policy *policy, const char *buf, size_t count)
{
- struct amd_cpudata *cpudata = policy->driver_data;
char str_preference[21];
ssize_t ret;
@@ -1220,7 +1216,7 @@ static ssize_t store_energy_performance_
guard(mutex)(&amd_pstate_limits_lock);
- ret = amd_pstate_set_energy_pref_index(cpudata, ret);
+ ret = amd_pstate_set_energy_pref_index(policy, ret);
return ret ? ret : count;
}
@@ -1295,6 +1291,9 @@ static int amd_pstate_register_driver(in
return ret;
}
+ /* at least one CPU supports CPB */
+ current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB);
+
ret = cpufreq_register_driver(current_pstate_driver);
if (ret) {
amd_pstate_driver_cleanup();
@@ -1636,8 +1635,9 @@ static int amd_pstate_epp_set_policy(str
return 0;
}
-static int amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
+static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
u64 max_perf;
int ret;
@@ -1651,7 +1651,7 @@ static int amd_pstate_epp_reenable(struc
trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
cpudata->epp_cached,
AMD_CPPC_MIN_PERF(cpudata->cppc_req_cached),
- max_perf, cpudata->boost_state);
+ max_perf, policy->boost_enabled);
}
return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false);
@@ -1664,7 +1664,7 @@ static int amd_pstate_epp_cpu_online(str
pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
- ret = amd_pstate_epp_reenable(cpudata);
+ ret = amd_pstate_epp_reenable(policy);
if (ret)
return ret;
cpudata->suspended = false;
@@ -1722,7 +1722,7 @@ static int amd_pstate_epp_resume(struct
guard(mutex)(&amd_pstate_limits_lock);
/* enable amd pstate from suspend state*/
- amd_pstate_epp_reenable(cpudata);
+ amd_pstate_epp_reenable(policy);
cpudata->suspended = false;
}
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -98,7 +98,6 @@ struct amd_cpudata {
u64 cppc_cap1_cached;
bool suspended;
s16 epp_default;
- bool boost_state;
};
/*

View File

@ -0,0 +1,104 @@
From 52ec78c86a07b3d36a51cd877695b16fe86b94dc Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 18 Dec 2024 11:50:52 -0800
Subject: xfs: fix off-by-one error in fsmap's end_daddr usage
In commit ca6448aed4f10a, we created an "end_daddr" variable to fix
fsmap reporting when the end of the range requested falls in the middle
of an unknown (aka free on the rmapbt) region. Unfortunately, I didn't
notice that the the code sets end_daddr to the last sector of the device
but then uses that quantity to compute the length of the synthesized
mapping.
Zizhi Wo later observed that when end_daddr isn't set, we still don't
report the last fsblock on a device because in that case (aka when
info->last is true), the info->high mapping that we pass to
xfs_getfsmap_group_helper has a startblock that points to the last
fsblock. This is also wrong because the code uses startblock to
compute the length of the synthesized mapping.
Fix the second problem by setting end_daddr unconditionally, and fix the
first problem by setting start_daddr to one past the end of the range to
query.
Cc: <stable@vger.kernel.org> # v6.11
Fixes: ca6448aed4f10a ("xfs: Fix missing interval for missing_owner in xfs fsmap")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reported-by: Zizhi Wo <wozizhi@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_fsmap.c | 29 ++++++++++++++++++-----------
1 file changed, 18 insertions(+), 11 deletions(-)
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -162,7 +162,8 @@ struct xfs_getfsmap_info {
xfs_daddr_t next_daddr; /* next daddr we expect */
/* daddr of low fsmap key when we're using the rtbitmap */
xfs_daddr_t low_daddr;
- xfs_daddr_t end_daddr; /* daddr of high fsmap key */
+ /* daddr of high fsmap key, or the last daddr on the device */
+ xfs_daddr_t end_daddr;
u64 missing_owner; /* owner of holes */
u32 dev; /* device id */
/*
@@ -306,7 +307,7 @@ xfs_getfsmap_helper(
* Note that if the btree query found a mapping, there won't be a gap.
*/
if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
- rec_daddr = info->end_daddr;
+ rec_daddr = info->end_daddr + 1;
/* Are we just counting mappings? */
if (info->head->fmh_count == 0) {
@@ -898,7 +899,10 @@ xfs_getfsmap(
struct xfs_trans *tp = NULL;
struct xfs_fsmap dkeys[2]; /* per-dev keys */
struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
- struct xfs_getfsmap_info info = { NULL };
+ struct xfs_getfsmap_info info = {
+ .fsmap_recs = fsmap_recs,
+ .head = head,
+ };
bool use_rmap;
int i;
int error = 0;
@@ -963,9 +967,6 @@ xfs_getfsmap(
info.next_daddr = head->fmh_keys[0].fmr_physical +
head->fmh_keys[0].fmr_length;
- info.end_daddr = XFS_BUF_DADDR_NULL;
- info.fsmap_recs = fsmap_recs;
- info.head = head;
/* For each device we support... */
for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
@@ -978,17 +979,23 @@ xfs_getfsmap(
break;
/*
- * If this device number matches the high key, we have
- * to pass the high key to the handler to limit the
- * query results. If the device number exceeds the
- * low key, zero out the low key so that we get
- * everything from the beginning.
+ * If this device number matches the high key, we have to pass
+ * the high key to the handler to limit the query results, and
+ * set the end_daddr so that we can synthesize records at the
+ * end of the query range or device.
*/
if (handlers[i].dev == head->fmh_keys[1].fmr_device) {
dkeys[1] = head->fmh_keys[1];
info.end_daddr = min(handlers[i].nr_sectors - 1,
dkeys[1].fmr_physical);
+ } else {
+ info.end_daddr = handlers[i].nr_sectors - 1;
}
+
+ /*
+ * If the device number exceeds the low key, zero out the low
+ * key so that we get everything from the beginning.
+ */
if (handlers[i].dev > head->fmh_keys[0].fmr_device)
memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));

View File

@ -0,0 +1,39 @@
From f5637450735989298399da1ee312d46b073e4e04 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 24 Oct 2024 13:51:05 +1100
Subject: xfs: sb_spino_align is not verified
It's just read in from the superblock and used without doing any
validity checks at all on the value.
Fixes: fb4f2b4e5a82 ("xfs: add sparse inode chunk alignment superblock field")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -391,6 +391,20 @@ xfs_validate_sb_common(
sbp->sb_inoalignmt, align);
return -EINVAL;
}
+
+ if (!sbp->sb_spino_align ||
+ sbp->sb_spino_align > sbp->sb_inoalignmt ||
+ (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) {
+ xfs_warn(mp,
+ "Sparse inode alignment (%u) is invalid.",
+ sbp->sb_spino_align);
+ return -EINVAL;
+ }
+ } else if (sbp->sb_spino_align) {
+ xfs_warn(mp,
+ "Sparse inode alignment (%u) should be zero.",
+ sbp->sb_spino_align);
+ return -EINVAL;
}
} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {

View File

@ -0,0 +1,79 @@
From 55ff504694e71727d485b27b4ae189954343238e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 2 Dec 2024 10:57:39 -0800
Subject: xfs: fix sb_spino_align checks for large fsblock sizes
For a sparse inodes filesystem, mkfs.xfs computes the values of
sb_spino_align and sb_inoalignmt with the following code:
int cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
if (cfg->sb_feat.crcs_enabled)
cluster_size *= cfg->inodesize / XFS_DINODE_MIN_SIZE;
sbp->sb_spino_align = cluster_size >> cfg->blocklog;
sbp->sb_inoalignmt = XFS_INODES_PER_CHUNK *
cfg->inodesize >> cfg->blocklog;
On a V5 filesystem with 64k fsblocks and 512 byte inodes, this results
in cluster_size = 8192 * (512 / 256) = 16384. As a result,
sb_spino_align and sb_inoalignmt are both set to zero. Unfortunately,
this trips the new sb_spino_align check that was just added to
xfs_validate_sb_common, and the mkfs fails:
# mkfs.xfs -f -b size=64k, /dev/sda
meta-data=/dev/sda isize=512 agcount=4, agsize=81136 blks
= sectsz=512 attr=2, projid32bit=1
= crc=1 finobt=1, sparse=1, rmapbt=1
= reflink=1 bigtime=1 inobtcount=1 nrext64=1
= exchange=0 metadir=0
data = bsize=65536 blocks=324544, imaxpct=25
= sunit=0 swidth=0 blks
naming =version 2 bsize=65536 ascii-ci=0, ftype=1, parent=0
log =internal log bsize=65536 blocks=5006, version=2
= sectsz=512 sunit=0 blks, lazy-count=1
realtime =none extsz=65536 blocks=0, rtextents=0
= rgcount=0 rgsize=0 extents
Discarding blocks...Sparse inode alignment (0) is invalid.
Metadata corruption detected at 0x560ac5a80bbe, xfs_sb block 0x0/0x200
libxfs_bwrite: write verifier failed on xfs_sb bno 0x0/0x1
mkfs.xfs: Releasing dirty buffer to free list!
found dirty buffer (bulk) on free list!
Sparse inode alignment (0) is invalid.
Metadata corruption detected at 0x560ac5a80bbe, xfs_sb block 0x0/0x200
libxfs_bwrite: write verifier failed on xfs_sb bno 0x0/0x1
mkfs.xfs: writing AG headers failed, err=22
Prior to commit 59e43f5479cce1 this all worked fine, even if "sparse"
inodes are somewhat meaningless when everything fits in a single
fsblock. Adjust the checks to handle existing filesystems.
Cc: <stable@vger.kernel.org> # v6.13-rc1
Fixes: 59e43f5479cce1 ("xfs: sb_spino_align is not verified")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_sb.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -392,12 +392,13 @@ xfs_validate_sb_common(
return -EINVAL;
}
- if (!sbp->sb_spino_align ||
- sbp->sb_spino_align > sbp->sb_inoalignmt ||
- (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) {
+ if (sbp->sb_spino_align &&
+ (sbp->sb_spino_align > sbp->sb_inoalignmt ||
+ (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0)) {
xfs_warn(mp,
- "Sparse inode alignment (%u) is invalid.",
- sbp->sb_spino_align);
+"Sparse inode alignment (%u) is invalid, must be integer factor of (%u).",
+ sbp->sb_spino_align,
+ sbp->sb_inoalignmt);
return -EINVAL;
}
} else if (sbp->sb_spino_align) {

View File

@ -0,0 +1,76 @@
From 26933a27fa749433c4a11034c9fb77971fc65049 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 18 Dec 2024 11:50:36 -0800
Subject: xfs: fix sparse inode limits on runt AG
The runt AG at the end of a filesystem is almost always smaller than
the mp->m_sb.sb_agblocks. Unfortunately, when setting the max_agbno
limit for the inode chunk allocation, we do not take this into
account. This means we can allocate a sparse inode chunk that
overlaps beyond the end of an AG. When we go to allocate an inode
from that sparse chunk, the irec fails validation because the
agbno of the start of the irec is beyond valid limits for the runt
AG.
Prevent this from happening by taking into account the size of the
runt AG when allocating inode chunks. Also convert the various
checks for valid inode chunk agbnos to use xfs_ag_block_count()
so that they will also catch such issues in the future.
Fixes: 56d1115c9bc7 ("xfs: allocate sparse inode chunks on full chunk allocation failure")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
---
fs/xfs/libxfs/xfs_ialloc.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -855,7 +855,8 @@ sparse_alloc:
* the end of the AG.
*/
args.min_agbno = args.mp->m_sb.sb_inoalignmt;
- args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
+ args.max_agbno = round_down(xfs_ag_block_count(args.mp,
+ pag->pag_agno),
args.mp->m_sb.sb_inoalignmt) -
igeo->ialloc_blks;
@@ -2332,9 +2333,9 @@ xfs_difree(
return -EINVAL;
}
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (agbno >= mp->m_sb.sb_agblocks) {
- xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
- __func__, agbno, mp->m_sb.sb_agblocks);
+ if (agbno >= xfs_ag_block_count(mp, pag->pag_agno)) {
+ xfs_warn(mp, "%s: agbno >= xfs_ag_block_count (%d >= %d).",
+ __func__, agbno, xfs_ag_block_count(mp, pag->pag_agno));
ASSERT(0);
return -EINVAL;
}
@@ -2457,7 +2458,7 @@ xfs_imap(
*/
agino = XFS_INO_TO_AGINO(mp, ino);
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
- if (agbno >= mp->m_sb.sb_agblocks ||
+ if (agbno >= xfs_ag_block_count(mp, pag->pag_agno) ||
ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
error = -EINVAL;
#ifdef DEBUG
@@ -2467,11 +2468,12 @@ xfs_imap(
*/
if (flags & XFS_IGET_UNTRUSTED)
return error;
- if (agbno >= mp->m_sb.sb_agblocks) {
+ if (agbno >= xfs_ag_block_count(mp, pag->pag_agno)) {
xfs_alert(mp,
"%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
__func__, (unsigned long long)agbno,
- (unsigned long)mp->m_sb.sb_agblocks);
+ (unsigned long)xfs_ag_block_count(mp,
+ pag->pag_agno));
}
if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
xfs_alert(mp,

View File

@ -0,0 +1,74 @@
From 51e3cc7d8642f4127493ee1e907f6c0abdae295f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 2 Dec 2024 10:57:42 -0800
Subject: xfs: fix zero byte checking in the superblock scrubber
The logic to check that the region past the end of the superblock is all
zeroes is wrong -- we don't want to check only the bytes past the end of
the maximally sized ondisk superblock structure as currently defined in
xfs_format.h; we want to check the bytes beyond the end of the ondisk as
defined by the feature bits.
Port the superblock size logic from xfs_repair and then put it to use in
xfs_scrub.
Cc: <stable@vger.kernel.org> # v4.15
Fixes: 21fb4cb1981ef7 ("xfs: scrub the secondary superblocks")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/scrub/agheader.c | 29 +++++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -60,6 +60,30 @@ xchk_superblock_xref(
}
/*
+ * Calculate the ondisk superblock size in bytes given the feature set of the
+ * mounted filesystem (aka the primary sb). This is subtlely different from
+ * the logic in xfs_repair, which computes the size of a secondary sb given the
+ * featureset listed in the secondary sb.
+ */
+STATIC size_t
+xchk_superblock_ondisk_size(
+ struct xfs_mount *mp)
+{
+ if (xfs_has_metauuid(mp))
+ return offsetofend(struct xfs_dsb, sb_meta_uuid);
+ if (xfs_has_crc(mp))
+ return offsetofend(struct xfs_dsb, sb_lsn);
+ if (xfs_sb_version_hasmorebits(&mp->m_sb))
+ return offsetofend(struct xfs_dsb, sb_bad_features2);
+ if (xfs_has_logv2(mp))
+ return offsetofend(struct xfs_dsb, sb_logsunit);
+ if (xfs_has_sector(mp))
+ return offsetofend(struct xfs_dsb, sb_logsectsize);
+ /* only support dirv2 or more recent */
+ return offsetofend(struct xfs_dsb, sb_dirblklog);
+}
+
+/*
* Scrub the filesystem superblock.
*
* Note: We do /not/ attempt to check AG 0's superblock. Mount is
@@ -75,6 +99,7 @@ xchk_superblock(
struct xfs_buf *bp;
struct xfs_dsb *sb;
struct xfs_perag *pag;
+ size_t sblen;
xfs_agnumber_t agno;
uint32_t v2_ok;
__be32 features_mask;
@@ -350,8 +375,8 @@ xchk_superblock(
}
/* Everything else must be zero. */
- if (memchr_inv(sb + 1, 0,
- BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
+ sblen = xchk_superblock_ondisk_size(mp);
+ if (memchr_inv((char *)sb + sblen, 0, BBTOB(bp->b_length) - sblen))
xchk_block_set_corrupt(sc, bp);
xchk_superblock_xref(sc, bp);

View File

@ -209,6 +209,7 @@ patchset-pf/amd-pstate/0041-cpufreq-amd-pstate-Always-write-EPP-value-when-updat
patchset-pf/amd-pstate/0042-cpufreq-amd-pstate-Check-if-CPPC-request-has-changed.patch patchset-pf/amd-pstate/0042-cpufreq-amd-pstate-Check-if-CPPC-request-has-changed.patch
patchset-pf/amd-pstate/0043-cpufreq-amd-pstate-Drop-ret-variable-from-amd_pstate.patch patchset-pf/amd-pstate/0043-cpufreq-amd-pstate-Drop-ret-variable-from-amd_pstate.patch
patchset-pf/amd-pstate/0044-cpufreq-amd-pstate-Set-different-default-EPP-policy-.patch patchset-pf/amd-pstate/0044-cpufreq-amd-pstate-Set-different-default-EPP-policy-.patch
patchset-pf/amd-pstate/0045-cpufreq-amd-pstate-Drop-boost_state-variable.patch
patchset-pf/amd-rapl/0001-perf-x86-rapl-Move-the-pmu-allocation-out-of-CPU-hot.patch patchset-pf/amd-rapl/0001-perf-x86-rapl-Move-the-pmu-allocation-out-of-CPU-hot.patch
patchset-pf/amd-rapl/0002-perf-x86-rapl-Clean-up-cpumask-and-hotplug.patch patchset-pf/amd-rapl/0002-perf-x86-rapl-Clean-up-cpumask-and-hotplug.patch
@ -234,6 +235,11 @@ patchset-pf/crypto/0003-crypto-x86-crc32c-eliminate-jump-table-and-excessive.pat
patchset-pf/pksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch patchset-pf/pksm/0001-mm-expose-per-process-KSM-control-via-syscalls.patch
patchset-pf/xfs/0001-xfs-fix-chown-with-rt-quota.patch patchset-pf/xfs/0001-xfs-fix-chown-with-rt-quota.patch
patchset-pf/xfs/0002-xfs-fix-off-by-one-error-in-fsmap-s-end_daddr-usage.patch
patchset-pf/xfs/0003-xfs-sb_spino_align-is-not-verified.patch
patchset-pf/xfs/0004-xfs-fix-sb_spino_align-checks-for-large-fsblock-size.patch
patchset-pf/xfs/0005-xfs-fix-sparse-inode-limits-on-runt-AG.patch
patchset-pf/xfs/0006-xfs-fix-zero-byte-checking-in-the-superblock-scrubbe.patch
patchset-pf/zstd/0001-zstd-import-upstream-v1.5.6.patch patchset-pf/zstd/0001-zstd-import-upstream-v1.5.6.patch
patchset-pf/zstd/0002-lib-zstd-Refactor-intentional-wrap-around-test.patch patchset-pf/zstd/0002-lib-zstd-Refactor-intentional-wrap-around-test.patch