1
0

release 6.15.2 (preliminary)

This commit is contained in:
2025-06-18 10:16:56 +03:00
parent 4d2691343a
commit c6ba88d5bc
230 changed files with 6762 additions and 32303 deletions

View File

@@ -5,9 +5,9 @@ export GIT_OPTIONAL_LOCKS=0
w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w"
dst='debian/patches/pf-tmp'
dst='debian/patches/tmp-pf'
src='../linux-extras'
branches='amd-pstate cpuidle crypto exfat fixes fuse invlpgb kbuild nfs smb zstd'
branches='fixes archlinux cpuidle kbuild nfs smb xfs'
if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi
mkdir -p "${dst}"
@@ -34,19 +34,17 @@ for b in ${branches} ; do
git switch --detach "${ref}"
git switch -C "$r"
rm -rf "$w/${dst}/$b" ; mkdir -p "$w/${dst}/$b"
if git rebase "${from}" ; then
[ -d "$w/${dst}/$b/" ] || mkdir -p "$w/${dst}/$b"
set +e
env -C "$w" git ls-files -z | grep -zF "${dst}/$b/" | grep -zFv '/.' | env -C "$w" -u GIT_OPTIONAL_LOCKS xargs -r -0 git rm -f
find "$w/${dst}/$b/" -name '*.patch' -type f -exec rm -f {} +
set -e
git format-patch -N --subject-prefix='' --output-directory "$w/${dst}/$b" "${from}..$r"
else
echo >&2
git rebase --abort
echo >&2
touch "$w/${dst}/$b/0000-rebase-failed"
base=$(git merge-base "${from}" "${ref}")
git format-patch -N --subject-prefix='' --output-directory "$w/${dst}/$b" "${base}..${ref}"
fi
git switch -q --detach "${ref}"
@@ -57,6 +55,6 @@ done
cd "$w" ; rm -rf "$t"
echo >&2
echo 'put in debian/patches/series' >&2
echo 'output:' >&2
echo >&2
find "${dst}/" -type f -name '*.patch' | sed -E 's#^debian/patches/##' | sort -V

60
debian/bin/genpatch-zen vendored Executable file
View File

@@ -0,0 +1,60 @@
#!/bin/sh
set -ef
export GIT_OPTIONAL_LOCKS=0
w=$(git rev-parse --path-format=absolute --show-toplevel) ; : "${w:?}" ; cd "$w"
dst='debian/patches/tmp-zen'
src='../linux-extras'
branches='zen-sauce fixes'
if [ -d "${dst}" ] ; then rm -rf "${dst}" ; fi
mkdir -p "${dst}"
kver=
if [ -n "$1" ] ; then
kver="$1"
else
kver=$(dpkg-parsechangelog --show-field=Version | sed -E 's/^[0-9]+://;s/-[^-]*$//' | cut -d. -f1-2)
fi
from="upstream/linux-${kver}.y"
t=$(mktemp -d) ; : "${t:?}"
cp -ar "${src}" "$t/"
cd "$t/${src##*/}"
git config advice.skippedCherryPicks false
for b in ${branches} ; do
ref="zen/${kver}/$b"
r="tmp-rebase-$b"
git switch --detach "${ref}"
git switch -C "$r"
rm -rf "$w/${dst}/$b" ; mkdir -p "$w/${dst}/$b"
if git rebase "${from}" ; then
git format-patch -N --subject-prefix='' --output-directory "$w/${dst}/$b" "${from}..$r"
else
echo >&2
git rebase --abort
touch "$w/${dst}/$b/0000-rebase-failed"
base=$(git merge-base "${from}" "${ref}")
git format-patch -N --subject-prefix='' --output-directory "$w/${dst}/$b" "${base}..${ref}"
fi
git switch -q --detach "${ref}"
git branch -D "$r"
echo >&2
done
cd "$w" ; rm -rf "$t"
echo >&2
echo 'output:' >&2
echo >&2
find "${dst}/" -type f -name '*.patch' | sed -E 's#^debian/patches/##' | sort -V

88
debian/changelog vendored
View File

@@ -1,86 +1,8 @@
linux (6.14.11-1) sid; urgency=medium
linux (6.15.2-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.11
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.15.1
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.15.2
* New upstream release: https://kernelnewbies.org/Linux_6.15
-- Konstantin Demin <rockdrilla@gmail.com> Tue, 10 Jun 2025 15:40:46 +0300
linux (6.14.10-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.10
-- Konstantin Demin <rockdrilla@gmail.com> Wed, 04 Jun 2025 16:09:02 +0300
linux (6.14.9-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.9
-- Konstantin Demin <rockdrilla@gmail.com> Thu, 29 May 2025 15:08:18 +0300
linux (6.14.8-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.8
-- Konstantin Demin <rockdrilla@gmail.com> Thu, 22 May 2025 17:02:41 +0300
linux (6.14.7-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.7
-- Konstantin Demin <rockdrilla@gmail.com> Sun, 18 May 2025 11:56:49 +0300
linux (6.14.6-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.6
-- Konstantin Demin <rockdrilla@gmail.com> Fri, 09 May 2025 12:23:42 +0300
linux (6.14.5-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.5
-- Konstantin Demin <rockdrilla@gmail.com> Fri, 02 May 2025 16:25:21 +0300
linux (6.14.4-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.4
-- Konstantin Demin <rockdrilla@gmail.com> Fri, 25 Apr 2025 20:05:32 +0300
linux (6.14.3-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.3
-- Konstantin Demin <rockdrilla@gmail.com> Mon, 21 Apr 2025 01:31:34 +0300
linux (6.14.2-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.2
-- Konstantin Demin <rockdrilla@gmail.com> Fri, 11 Apr 2025 00:21:57 +0300
linux (6.14.1-1) sid; urgency=medium
* New upstream stable update:
https://www.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.14.1
-- Konstantin Demin <rockdrilla@gmail.com> Mon, 07 Apr 2025 12:41:44 +0300
linux (6.14-1) sid; urgency=medium
* Sync with Debian.
* Refresh patches.
* Refine configs.
* New upstream release: https://kernelnewbies.org/Linux_6.13
* New upstream release: https://kernelnewbies.org/Linux_6.14
-- Konstantin Demin <rockdrilla@gmail.com> Thu, 27 Mar 2025 01:51:03 +0300
-- Konstantin Demin <rockdrilla@gmail.com> Tue, 17 Jun 2025 12:18:45 +0300

View File

@@ -92,8 +92,6 @@ CONFIG_IO_DELAY_NONE=y
## file: crypto/Kconfig
##
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
@@ -305,6 +303,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
##
CONFIG_CXL_BUS=y
# CONFIG_CXL_MEM_RAW_COMMANDS is not set
# CONFIG_CXL_FEATURES is not set
##
## file: drivers/devfreq/Kconfig
@@ -397,6 +396,11 @@ CONFIG_GOOGLE_COREBOOT_TABLE=m
CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
CONFIG_GOOGLE_VPD=m
##
## file: drivers/fwctl/Kconfig
##
# CONFIG_FWCTL is not set
##
## file: drivers/gnss/Kconfig
##
@@ -437,6 +441,11 @@ CONFIG_GOOGLE_VPD=m
##
# CONFIG_HTE is not set
##
## file: drivers/hv/Kconfig
##
# CONFIG_MSHV_ROOT is not set
##
## file: drivers/hwmon/Kconfig
##
@@ -1376,6 +1385,11 @@ CONFIG_PCIEASPM_DEFAULT=y
# CONFIG_PCIEASPM_POWERSAVE is not set
## end choice
##
## file: drivers/pci/pwrctrl/Kconfig
##
# CONFIG_PCI_PWRCTL_SLOT is not set
##
## file: drivers/pci/switch/Kconfig
##
@@ -1465,6 +1479,11 @@ CONFIG_PCIEASPM_DEFAULT=y
##
CONFIG_PPS=m
##
## file: drivers/pps/generators/Kconfig
##
# CONFIG_PPS_GENERATOR_TIO is not set
##
## file: drivers/ptp/Kconfig
##
@@ -1655,6 +1674,7 @@ CONFIG_SCSI_MPI3MR=m
# CONFIG_SERIAL_8250_RSA is not set
# CONFIG_SERIAL_8250_RT288X is not set
# CONFIG_SERIAL_8250_MID is not set
# CONFIG_SERIAL_8250_NI is not set
##
## file: drivers/ufs/Kconfig
@@ -1721,6 +1741,7 @@ CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=m
CONFIG_SOFT_WATCHDOG_PRETIMEOUT=y
# CONFIG_LENOVO_SE10_WDT is not set
# CONFIG_LENOVO_SE30_WDT is not set
# CONFIG_XILINX_WATCHDOG is not set
# CONFIG_CADENCE_WATCHDOG is not set
# CONFIG_DW_WATCHDOG is not set
@@ -1945,11 +1966,6 @@ CONFIG_PROC_VMCORE=y
##
# CONFIG_CIFS is not set
##
## file: fs/sysv/Kconfig
##
# CONFIG_SYSV_FS is not set
##
## file: fs/ufs/Kconfig
##
@@ -2049,6 +2065,7 @@ CONFIG_PANIC_TIMEOUT=5
## file: mm/Kconfig
##
# CONFIG_ZSWAP is not set
CONFIG_ZSMALLOC=m
# CONFIG_HWPOISON_INJECT is not set
# CONFIG_NUMA_EMU is not set
@@ -2310,6 +2327,7 @@ CONFIG_IPE_PROP_DM_VERITY_SIGNATURE=y
## file: security/keys/Kconfig
##
# CONFIG_KEYS_REQUEST_CACHE is not set
# CONFIG_BIG_KEYS is not set
# CONFIG_TRUSTED_KEYS is not set
# CONFIG_USER_DECRYPTED_DATA is not set
@@ -2327,7 +2345,16 @@ CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION=y
CONFIG_ARCH_SELECTS_KEXEC_FILE=y
CONFIG_BLK_DEV_RNBD=y
CONFIG_CRASH_RESERVE=y
CONFIG_CRYPTO_CHACHA20_X86_64=m
CONFIG_CRYPTO_LIB_AESCFB=m
CONFIG_CRYPTO_LIB_CHACHA=m
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
CONFIG_CRYPTO_LIB_CHACHA_INTERNAL=m
CONFIG_CRYPTO_LIB_POLY1305=m
CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
CONFIG_CRYPTO_LIB_POLY1305_INTERNAL=m
CONFIG_CRYPTO_POLY1305_X86_64=m
CONFIG_CXL_PORT=y
CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
CONFIG_INFINIBAND_RTRS=m

View File

@@ -108,8 +108,6 @@ CONFIG_SYSTEM_BLACKLIST_AUTH_UPDATE=y
## file: crypto/Kconfig
##
CONFIG_CRYPTO_ECDH=y
CONFIG_CRYPTO_CTS=y
CONFIG_CRYPTO_XTS=y
CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_842=y
CONFIG_CRYPTO_LZ4=y
@@ -557,6 +555,7 @@ CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION=y
##
CONFIG_CXL_BUS=m
CONFIG_CXL_MEM_RAW_COMMANDS=y
CONFIG_CXL_FEATURES=y
##
## file: drivers/devfreq/Kconfig
@@ -639,6 +638,9 @@ CONFIG_EDAC_LEGACY_SYSFS=y
# CONFIG_EDAC_DEBUG is not set
CONFIG_EDAC_DECODE_MCE=y
CONFIG_EDAC_GHES=y
# CONFIG_EDAC_SCRUB is not set
# CONFIG_EDAC_ECS is not set
# CONFIG_EDAC_MEM_REPAIR is not set
CONFIG_EDAC_AMD64=m
CONFIG_EDAC_E752X=m
CONFIG_EDAC_I82975X=m
@@ -723,6 +725,13 @@ CONFIG_FSI_SBEFIFO=m
CONFIG_FSI_OCC=m
CONFIG_I2CR_SCOM=m
##
## file: drivers/fwctl/Kconfig
##
CONFIG_FWCTL=m
CONFIG_FWCTL_MLX5=m
CONFIG_FWCTL_PDS=m
##
## file: drivers/gnss/Kconfig
##
@@ -899,6 +908,7 @@ CONFIG_DRM_AST=m
CONFIG_DRM_CHIPONE_ICN6211=m
CONFIG_DRM_CHRONTEL_CH7033=m
CONFIG_DRM_DISPLAY_CONNECTOR=m
CONFIG_DRM_I2C_NXP_TDA998X=m
CONFIG_DRM_ITE_IT6263=m
CONFIG_DRM_ITE_IT6505=m
CONFIG_DRM_LONTIUM_LT8912B=m
@@ -988,14 +998,6 @@ CONFIG_DRM_GUD=m
##
CONFIG_DRM_HISI_HIBMC=m
##
## file: drivers/gpu/drm/i2c/Kconfig
##
CONFIG_DRM_I2C_CH7006=m
CONFIG_DRM_I2C_SIL164=m
CONFIG_DRM_I2C_NXP_TDA998X=m
CONFIG_DRM_I2C_NXP_TDA9950=m
##
## file: drivers/gpu/drm/i915/Kconfig
##
@@ -1058,6 +1060,8 @@ CONFIG_NOUVEAU_DEBUG_DEFAULT=3
CONFIG_DRM_NOUVEAU_BACKLIGHT=y
# CONFIG_DRM_NOUVEAU_SVM is not set
# CONFIG_DRM_NOUVEAU_GSP_DEFAULT is not set
CONFIG_DRM_NOUVEAU_CH7006=m
CONFIG_DRM_NOUVEAU_SIL164=m
##
## file: drivers/gpu/drm/panel/Kconfig
@@ -1119,6 +1123,7 @@ CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS=m
CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m
# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set
CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
CONFIG_DRM_PANEL_RAYDIUM_RM67200=m
CONFIG_DRM_PANEL_RAYDIUM_RM68200=m
CONFIG_DRM_PANEL_RAYDIUM_RM692E5=m
CONFIG_DRM_PANEL_RAYDIUM_RM69380=m
@@ -1156,6 +1161,7 @@ CONFIG_DRM_PANEL_SONY_TULIP_TRULY_NT35521=m
CONFIG_DRM_PANEL_STARTEK_KD070FHFID015=m
CONFIG_DRM_PANEL_EDP=m
CONFIG_DRM_PANEL_SIMPLE=m
CONFIG_DRM_PANEL_SUMMIT=m
CONFIG_DRM_PANEL_SYNAPTICS_R63353=m
CONFIG_DRM_PANEL_TDO_TL070WSH30=m
CONFIG_DRM_PANEL_TPO_TD028TTEC1=m
@@ -1164,6 +1170,7 @@ CONFIG_DRM_PANEL_TPO_TPG110=m
# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set
CONFIG_DRM_PANEL_VISIONOX_R66451=m
# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set
CONFIG_DRM_PANEL_VISIONOX_RM692E5=m
CONFIG_DRM_PANEL_VISIONOX_VTDR6130=m
CONFIG_DRM_PANEL_WIDECHIPS_WS2401=m
CONFIG_DRM_PANEL_XINPENG_XPP055C272=m
@@ -1189,6 +1196,7 @@ CONFIG_DRM_SSD130X_SPI=m
##
## file: drivers/gpu/drm/tiny/Kconfig
##
CONFIG_DRM_APPLETBDRM=m
CONFIG_DRM_ARCPGU=m
CONFIG_DRM_BOCHS=m
CONFIG_DRM_CIRRUS_QEMU=m
@@ -1238,6 +1246,8 @@ CONFIG_DRM_VMWGFX=m
##
CONFIG_DRM_XE=m
CONFIG_DRM_XE_DISPLAY=y
CONFIG_DRM_XE_DP_TUNNEL=y
CONFIG_DRM_XE_DEVMEM_MIRROR=y
CONFIG_DRM_XE_FORCE_PROBE=""
##
@@ -1296,6 +1306,8 @@ CONFIG_HID_ACRUX=m
CONFIG_HID_ACRUX_FF=y
CONFIG_HID_APPLE=m
CONFIG_HID_APPLEIR=m
CONFIG_HID_APPLETB_BL=m
CONFIG_HID_APPLETB_KBD=m
CONFIG_HID_ASUS=m
CONFIG_HID_AUREAL=m
CONFIG_HID_BELKIN=m
@@ -1481,6 +1493,11 @@ CONFIG_HSI_CHAR=m
##
CONFIG_HTE=y
##
## file: drivers/hv/Kconfig
##
CONFIG_MSHV_ROOT=m
##
## file: drivers/hwmon/Kconfig
##
@@ -1515,6 +1532,7 @@ CONFIG_SENSORS_FAM15H_POWER=m
CONFIG_SENSORS_APPLESMC=m
CONFIG_SENSORS_ASB100=m
CONFIG_SENSORS_ATXP1=m
CONFIG_SENSORS_CGBC=m
CONFIG_SENSORS_CHIPCAP2=m
CONFIG_SENSORS_CORSAIR_CPRO=m
CONFIG_SENSORS_CORSAIR_PSU=m
@@ -1541,6 +1559,7 @@ CONFIG_SENSORS_G762=m
CONFIG_SENSORS_GPIO_FAN=m
CONFIG_SENSORS_HIH6130=m
CONFIG_SENSORS_HS3001=m
CONFIG_SENSORS_HTU31=m
CONFIG_SENSORS_IBMAEM=m
CONFIG_SENSORS_IBMPEX=m
CONFIG_SENSORS_I5500=m
@@ -1713,6 +1732,7 @@ CONFIG_SENSORS_DELTA_AHE50DC_FAN=m
CONFIG_SENSORS_FSP_3Y=m
CONFIG_SENSORS_IBM_CFFPS=m
CONFIG_SENSORS_DPS920AB=m
CONFIG_SENSORS_INA233=m
CONFIG_SENSORS_INSPUR_IPSPS=m
CONFIG_SENSORS_IR35221=m
CONFIG_SENSORS_IR36021=m
@@ -2043,7 +2063,6 @@ CONFIG_INPUT_TWL4030_VIBRA=m
CONFIG_INPUT_TWL6040_VIBRA=m
CONFIG_INPUT_UINPUT=m
CONFIG_INPUT_PALMAS_PWRBUTTON=m
CONFIG_INPUT_PCF50633_PMU=m
CONFIG_INPUT_PCF8574=m
CONFIG_INPUT_PWM_BEEPER=m
CONFIG_INPUT_PWM_VIBRA=m
@@ -2355,6 +2374,7 @@ CONFIG_LEDS_MC13783=m
CONFIG_LEDS_TCA6507=m
CONFIG_LEDS_TLC591XX=m
CONFIG_LEDS_MAX77650=m
CONFIG_LEDS_MAX77705=m
CONFIG_LEDS_MAX8997=m
CONFIG_LEDS_LM355x=m
CONFIG_LEDS_MENF21BMC=m
@@ -2406,7 +2426,7 @@ CONFIG_LEDS_QCOM_LPG=m
CONFIG_LEDS_MT6370_RGB=m
##
## file: drivers/leds/simple/Kconfig
## file: drivers/leds/simatic/Kconfig
##
CONFIG_LEDS_SIEMENS_SIMATIC_IPC=m
CONFIG_LEDS_SIEMENS_SIMATIC_IPC_APOLLOLAKE=m
@@ -2496,6 +2516,7 @@ CONFIG_MEDIA_CEC_SUPPORT=y
## file: drivers/media/cec/i2c/Kconfig
##
CONFIG_CEC_CH7322=m
CONFIG_CEC_NXP_TDA9950=m
##
## file: drivers/media/cec/platform/Kconfig
@@ -2558,6 +2579,7 @@ CONFIG_VIDEO_MSP3400=m
# CONFIG_VIDEO_BT856 is not set
# CONFIG_VIDEO_BT866 is not set
# CONFIG_VIDEO_ISL7998X is not set
CONFIG_VIDEO_LT6911UXE=m
# CONFIG_VIDEO_KS0127 is not set
# CONFIG_VIDEO_MAX9286 is not set
# CONFIG_VIDEO_ML86V7667 is not set
@@ -2883,6 +2905,7 @@ CONFIG_MFD_MAX77620=y
CONFIG_MFD_MAX77650=m
CONFIG_MFD_MAX77686=m
CONFIG_MFD_MAX77693=m
CONFIG_MFD_MAX77705=m
CONFIG_MFD_MAX77714=m
CONFIG_MFD_MAX77843=y
CONFIG_MFD_MAX8907=m
@@ -2899,9 +2922,6 @@ CONFIG_MFD_CPCAP=m
CONFIG_MFD_VIPERBOARD=m
CONFIG_MFD_NTXEC=m
CONFIG_MFD_RETU=m
CONFIG_MFD_PCF50633=m
CONFIG_PCF50633_ADC=m
CONFIG_PCF50633_GPIO=m
CONFIG_MFD_SY7636A=m
CONFIG_MFD_RDC321X=m
CONFIG_MFD_RT4831=m
@@ -3933,6 +3953,7 @@ CONFIG_8139TOO_TUNE_TWISTER=y
CONFIG_8139TOO_8129=y
# CONFIG_8139_OLD_RX_RESET is not set
CONFIG_R8169=m
CONFIG_R8169_LEDS=y
CONFIG_RTASE=m
##
@@ -4120,6 +4141,7 @@ CONFIG_IEEE802154_HWSIM=m
CONFIG_MCTP_SERIAL=m
CONFIG_MCTP_TRANSPORT_I2C=m
CONFIG_MCTP_TRANSPORT_I3C=m
CONFIG_MCTP_TRANSPORT_USB=m
##
## file: drivers/net/mdio/Kconfig
@@ -4220,6 +4242,7 @@ CONFIG_QCA807X_PHY=m
## file: drivers/net/phy/realtek/Kconfig
##
CONFIG_REALTEK_PHY=m
CONFIG_REALTEK_PHY_HWMON=y
##
## file: drivers/net/plip/Kconfig
@@ -4546,6 +4569,7 @@ CONFIG_IWLEGACY_DEBUGFS=y
CONFIG_IWLWIFI=m
CONFIG_IWLDVM=m
CONFIG_IWLMVM=m
CONFIG_IWLMLD=m
# CONFIG_IWLWIFI_DEBUG is not set
CONFIG_IWLWIFI_DEBUGFS=y
# CONFIG_IWLWIFI_DEVICE_TRACING is not set
@@ -4765,6 +4789,8 @@ CONFIG_RTW88_8821CS=m
CONFIG_RTW88_8821CU=m
CONFIG_RTW88_8821AU=m
CONFIG_RTW88_8812AU=m
CONFIG_RTW88_8814AE=m
CONFIG_RTW88_8814AU=m
CONFIG_RTW88_DEBUG=y
CONFIG_RTW88_DEBUGFS=y
@@ -5045,6 +5071,7 @@ CONFIG_PCIE_CADENCE_PLAT_HOST=y
##
## file: drivers/pci/controller/dwc/Kconfig
##
# CONFIG_PCIE_DW_DEBUGFS is not set
CONFIG_PCI_MESON=y
CONFIG_PCIE_INTEL_GW=y
CONFIG_PCIE_DW_PLAT_HOST=y
@@ -5073,6 +5100,11 @@ CONFIG_PCIE_ECRC=y
CONFIG_PCIEASPM_POWERSAVE=y
## end choice
##
## file: drivers/pci/pwrctrl/Kconfig
##
CONFIG_PCI_PWRCTL_SLOT=m
##
## file: drivers/pci/switch/Kconfig
##
@@ -5163,6 +5195,7 @@ CONFIG_PINMUX=y
CONFIG_PINCONF=y
# CONFIG_DEBUG_PINCTRL is not set
CONFIG_PINCTRL_AMD=y
CONFIG_PINCTRL_AMDISP=m
CONFIG_PINCTRL_AS3722=m
CONFIG_PINCTRL_AXP209=m
CONFIG_PINCTRL_AW9523=m
@@ -5249,6 +5282,7 @@ CONFIG_GPD_POCKET_FAN=m
CONFIG_WIRELESS_HOTKEY=m
CONFIG_IBM_RTL=m
CONFIG_IDEAPAD_LAPTOP=m
CONFIG_LENOVO_WMI_HOTKEY_UTILITIES=m
CONFIG_LENOVO_YMC=m
CONFIG_SENSORS_HDAPS=m
CONFIG_THINKPAD_ACPI=m
@@ -5267,6 +5301,7 @@ CONFIG_MSI_WMI=m
CONFIG_MSI_WMI_PLATFORM=m
CONFIG_PCENGINES_APU2=m
CONFIG_BARCO_P50_GPIO=m
CONFIG_SAMSUNG_GALAXYBOOK=m
CONFIG_SAMSUNG_LAPTOP=m
CONFIG_SAMSUNG_Q10=m
CONFIG_TOSHIBA_BT_RFKILL=m
@@ -5281,7 +5316,6 @@ CONFIG_SONYPI_COMPAT=y
CONFIG_SYSTEM76_ACPI=m
CONFIG_TOPSTAR_LAPTOP=m
CONFIG_SERIAL_MULTI_INSTANTIATE=m
CONFIG_MLX_PLATFORM=m
CONFIG_INSPUR_PLATFORM_PROFILE=m
CONFIG_LENOVO_WMI_CAMERA=m
CONFIG_INTEL_IPS=m
@@ -5321,6 +5355,8 @@ CONFIG_AMD_PMF=m
##
CONFIG_X86_PLATFORM_DRIVERS_DELL=y
CONFIG_ALIENWARE_WMI=m
CONFIG_ALIENWARE_WMI_LEGACY=y
CONFIG_ALIENWARE_WMI_WMAX=y
CONFIG_DCDBAS=m
CONFIG_DELL_LAPTOP=m
CONFIG_DELL_RBU=m
@@ -5498,7 +5534,6 @@ CONFIG_BATTERY_MAX17042=m
CONFIG_BATTERY_MAX1720X=m
CONFIG_BATTERY_MAX1721X=m
CONFIG_CHARGER_88PM860X=m
CONFIG_CHARGER_PCF50633=m
CONFIG_CHARGER_ISP1704=m
CONFIG_CHARGER_MAX8903=m
CONFIG_CHARGER_LP8727=m
@@ -5510,6 +5545,7 @@ CONFIG_CHARGER_MAX14577=m
CONFIG_CHARGER_DETECTOR_MAX14656=m
CONFIG_CHARGER_MAX77650=m
CONFIG_CHARGER_MAX77693=m
CONFIG_CHARGER_MAX77705=m
CONFIG_CHARGER_MAX77976=m
CONFIG_CHARGER_MAX8997=m
CONFIG_CHARGER_MAX8998=m
@@ -5556,6 +5592,11 @@ CONFIG_PPS=y
##
CONFIG_PPS_CLIENT_PARPORT=m
##
## file: drivers/pps/generators/Kconfig
##
CONFIG_PPS_GENERATOR_TIO=m
##
## file: drivers/ptp/Kconfig
##
@@ -5727,8 +5768,8 @@ CONFIG_REGULATOR_MT6370=m
CONFIG_REGULATOR_MT6397=m
CONFIG_REGULATOR_PALMAS=m
CONFIG_REGULATOR_PCA9450=m
CONFIG_REGULATOR_PF9453=m
CONFIG_REGULATOR_PCAP=m
CONFIG_REGULATOR_PCF50633=m
CONFIG_REGULATOR_PF8X00=m
CONFIG_REGULATOR_PFUZE100=m
CONFIG_REGULATOR_PV88060=m
@@ -5916,7 +5957,6 @@ CONFIG_RTC_DRV_MSM6242=m
CONFIG_RTC_DRV_RP5C01=m
CONFIG_RTC_DRV_WM831X=m
CONFIG_RTC_DRV_WM8350=m
CONFIG_RTC_DRV_PCF50633=m
CONFIG_RTC_DRV_ZYNQMP=m
CONFIG_RTC_DRV_NTXEC=m
CONFIG_RTC_DRV_CADENCE=m
@@ -6041,6 +6081,7 @@ CONFIG_SPI_TLE62X0=m
CONFIG_SPI_SLAVE=y
CONFIG_SPI_SLAVE_TIME=m
CONFIG_SPI_SLAVE_SYSTEM_CONTROL=m
CONFIG_SPI_OFFLOAD_TRIGGER_PWM=m
##
## file: drivers/spmi/Kconfig
@@ -6258,6 +6299,7 @@ CONFIG_SERIAL_8250_PCI1XXXX=m
CONFIG_SERIAL_8250_RSA=y
CONFIG_SERIAL_8250_RT288X=y
CONFIG_SERIAL_8250_MID=y
CONFIG_SERIAL_8250_NI=m
CONFIG_SERIAL_OF_PLATFORM=m
##
@@ -6707,6 +6749,7 @@ CONFIG_TYPEC_MUX_PI3USB30532=m
CONFIG_TYPEC_MUX_INTEL_PMC=m
CONFIG_TYPEC_MUX_IT5205=m
CONFIG_TYPEC_MUX_NB7VPQ904M=m
CONFIG_TYPEC_MUX_PS883X=m
CONFIG_TYPEC_MUX_PTN36502=m
CONFIG_TYPEC_MUX_TUSB1046=m
CONFIG_TYPEC_MUX_WCD939X_USBSS=m
@@ -6819,7 +6862,6 @@ CONFIG_BACKLIGHT_ADP5520=m
CONFIG_BACKLIGHT_ADP8860=m
CONFIG_BACKLIGHT_ADP8870=m
CONFIG_BACKLIGHT_88PM860X=m
CONFIG_BACKLIGHT_PCF50633=m
CONFIG_BACKLIGHT_AAT2870=m
CONFIG_BACKLIGHT_LM3509=m
CONFIG_BACKLIGHT_LM3630A=m
@@ -6913,6 +6955,7 @@ CONFIG_DA9063_WATCHDOG=m
CONFIG_DA9062_WATCHDOG=m
CONFIG_GPIO_WATCHDOG=m
CONFIG_LENOVO_SE10_WDT=m
CONFIG_LENOVO_SE30_WDT=m
CONFIG_MENF21BMC_WATCHDOG=m
CONFIG_WM831X_WATCHDOG=m
CONFIG_WM8350_WATCHDOG=m
@@ -7242,11 +7285,6 @@ CONFIG_CIFS_SWN_UPCALL=y
CONFIG_CIFS_FSCACHE=y
# CONFIG_CIFS_COMPRESSION is not set
##
## file: fs/sysv/Kconfig
##
CONFIG_SYSV_FS=m
##
## file: fs/ubifs/Kconfig
##
@@ -7389,8 +7427,6 @@ CONFIG_TEST_DHRY=m
# CONFIG_ASYNC_RAID6_TEST is not set
# CONFIG_TEST_HEXDUMP is not set
# CONFIG_TEST_KSTRTOX is not set
# CONFIG_TEST_PRINTF is not set
# CONFIG_TEST_SCANF is not set
# CONFIG_TEST_BITMAP is not set
# CONFIG_TEST_UUID is not set
# CONFIG_TEST_XARRAY is not set
@@ -7402,7 +7438,6 @@ CONFIG_TEST_DHRY=m
# CONFIG_TEST_BITOPS is not set
# CONFIG_TEST_VMALLOC is not set
CONFIG_TEST_BPF=m
CONFIG_TEST_BLACKHOLE_DEV=m
# CONFIG_FIND_BIT_BENCHMARK is not set
CONFIG_TEST_FIRMWARE=m
# CONFIG_TEST_SYSCTL is not set
@@ -7446,12 +7481,9 @@ CONFIG_ZSWAP_SHRINKER_DEFAULT_ON=y
CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y
## end choice
## choice: Default allocator
CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
# CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED is not set
# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set
CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC=y
## end choice
CONFIG_ZBUD=y
CONFIG_Z3FOLD_DEPRECATED=m
CONFIG_ZSMALLOC=y
CONFIG_HWPOISON_INJECT=m
CONFIG_NUMA_EMU=y
@@ -7873,6 +7905,7 @@ CONFIG_IPE_POLICY_SIG_PLATFORM_KEYRING=y
## file: security/keys/Kconfig
##
CONFIG_KEYS_REQUEST_CACHE=y
CONFIG_BIG_KEYS=y
CONFIG_TRUSTED_KEYS=m
CONFIG_USER_DECRYPTED_DATA=y
@@ -8146,6 +8179,7 @@ CONFIG_SND_SOC_AK5558=m
CONFIG_SND_SOC_ALC5623=m
CONFIG_SND_SOC_AW8738=m
CONFIG_SND_SOC_AW88395=m
CONFIG_SND_SOC_AW88166=m
CONFIG_SND_SOC_AW88261=m
CONFIG_SND_SOC_AW88081=m
CONFIG_SND_SOC_AW87390=m
@@ -8430,6 +8464,7 @@ CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98927=m
CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98357A=m
CONFIG_SND_SOC_INTEL_AVS_MACH_MAX98373=m
CONFIG_SND_SOC_INTEL_AVS_MACH_NAU8825=m
CONFIG_SND_SOC_INTEL_AVS_MACH_PCM3168A=m
CONFIG_SND_SOC_INTEL_AVS_MACH_PROBE=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT274=m
CONFIG_SND_SOC_INTEL_AVS_MACH_RT286=m
@@ -8648,14 +8683,28 @@ CONFIG_CHECK_SIGNATURE=y
CONFIG_CHELSIO_LIB=m
CONFIG_CLOSURES=y
CONFIG_COMPAT_NETLINK_MESSAGES=y
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
CONFIG_CRC_CCITT=m
CONFIG_CRYPTO_CHACHA20_X86_64=y
CONFIG_CRYPTO_DEV_ATMEL_I2C=m
CONFIG_CRYPTO_DEV_NITROX=m
CONFIG_CRYPTO_DEV_QAT=m
CONFIG_CRYPTO_LIB_AESCFB=y
CONFIG_CRYPTO_LIB_ARC4=m
CONFIG_CRYPTO_LIB_CHACHA=y
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=y
CONFIG_CRYPTO_LIB_CHACHA_GENERIC=y
CONFIG_CRYPTO_LIB_CHACHA_INTERNAL=y
CONFIG_CRYPTO_LIB_POLY1305=y
CONFIG_CRYPTO_LIB_POLY1305_GENERIC=y
CONFIG_CRYPTO_LIB_POLY1305_INTERNAL=y
CONFIG_CRYPTO_POLY1305_X86_64=y
CONFIG_CXL_PORT=m
CONFIG_DCA=m
CONFIG_DELL_WMI_DESCRIPTOR=m
CONFIG_DEV_SYNC_PROBE=m
CONFIG_DMA_DECLARE_COHERENT=y
CONFIG_DMA_ENGINE_RAID=y
CONFIG_DMA_OF=y
@@ -8685,6 +8734,7 @@ CONFIG_DRM_DISPLAY_HELPER=m
CONFIG_DRM_EXEC=m
CONFIG_DRM_GEM_DMA_HELPER=m
CONFIG_DRM_GEM_SHMEM_HELPER=m
CONFIG_DRM_GPUSVM=m
CONFIG_DRM_GPUVM=m
CONFIG_DRM_I915_GVT=y
CONFIG_DRM_KMS_HELPER=m
@@ -8871,6 +8921,7 @@ CONFIG_PCIE_DW_PLAT=y
CONFIG_PCIE_PLDA_HOST=y
CONFIG_PCI_ECAM=y
CONFIG_PCI_HOST_COMMON=y
CONFIG_PCI_PWRCTL=m
CONFIG_PCS_LYNX=m
CONFIG_PHYLIB_LEDS=y
CONFIG_PINCTRL_CS47L15=y
@@ -8894,7 +8945,6 @@ CONFIG_PPPOE_HASH_BITS=4
CONFIG_PREEMPTION=y
CONFIG_PREEMPT_BUILD=y
CONFIG_PREEMPT_COUNT=y
CONFIG_PREEMPT_RCU=y
CONFIG_PWM_DWC_CORE=m
CONFIG_PWM_LPSS=m
CONFIG_QCA7000=m
@@ -8903,10 +8953,8 @@ CONFIG_QCOM_PDR_HELPERS=m
CONFIG_QCOM_PDR_MSG=m
CONFIG_QCOM_QMI_HELPERS=m
CONFIG_QTNFMAC=m
CONFIG_R8169_LEDS=y
CONFIG_RAID6_PQ=m
CONFIG_RATIONAL=y
CONFIG_REALTEK_PHY_HWMON=y
CONFIG_REBOOT_MODE=m
CONFIG_REED_SOLOMON_DEC16=y
CONFIG_REGMAP=y
@@ -8951,6 +8999,7 @@ CONFIG_RTW88_8703B=m
CONFIG_RTW88_8723D=m
CONFIG_RTW88_8723X=m
CONFIG_RTW88_8812A=m
CONFIG_RTW88_8814A=m
CONFIG_RTW88_8821A=m
CONFIG_RTW88_8821C=m
CONFIG_RTW88_8822B=m
@@ -9038,6 +9087,7 @@ CONFIG_SND_SOC_ADAU1761=m
CONFIG_SND_SOC_ADAU17X1=m
CONFIG_SND_SOC_ADAU7118=m
CONFIG_SND_SOC_ADAU_UTILS=m
CONFIG_SND_SOC_AMD_ACPI_MACH=m
CONFIG_SND_SOC_AMD_ACP_I2S=m
CONFIG_SND_SOC_AMD_ACP_LEGACY_COMMON=m
CONFIG_SND_SOC_AMD_ACP_PCM=m
@@ -9161,6 +9211,7 @@ CONFIG_SND_SOC_WM5102=m
CONFIG_SND_SOC_WM8731=m
CONFIG_SND_SOC_WM8804=m
CONFIG_SND_SOC_WM_ADSP=m
CONFIG_SND_SOF_SOF_HDA_SDW_BPT=m
CONFIG_SND_SST_ATOM_HIFI2_PLATFORM=m
CONFIG_SND_SYNTH_EMUX=m
CONFIG_SND_TIMER=m
@@ -9176,6 +9227,7 @@ CONFIG_SOUND_OSS_CORE=y
CONFIG_SPI_DYNAMIC=y
CONFIG_SPI_FSL_LIB=m
CONFIG_SPI_MASTER=y
CONFIG_SPI_OFFLOAD=y
CONFIG_SPI_PXA2XX_PCI=m
CONFIG_SSB_B43_PCI_BRIDGE=y
CONFIG_SSB_BLOCKIO=y
@@ -9258,8 +9310,7 @@ CONFIG_XEN_FRONT_PGDIR_SHBUF=m
CONFIG_XEN_GRANT_DMA_IOMMU=y
CONFIG_XEN_PV_DOM0=y
CONFIG_XILLYBUS_CLASS=m
CONFIG_Z3FOLD=m
CONFIG_ZPOOL=y
CONFIG_ZSTD_COMPRESS=y
CONFIG_ZSWAP_COMPRESSOR_DEFAULT="zstd"
CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud"
CONFIG_ZSWAP_ZPOOL_DEFAULT="zsmalloc"

View File

@@ -11,8 +11,8 @@
CONFIG_X86_EXTENDED_PLATFORM=y
# CONFIG_X86_NUMACHIP is not set
# CONFIG_X86_VSMP is not set
CONFIG_X86_GOLDFISH=y
# CONFIG_X86_INTEL_MID is not set
CONFIG_X86_GOLDFISH=y
# CONFIG_X86_INTEL_LPSS is not set
# CONFIG_X86_AMD_PLATFORM_DEVICE is not set
CONFIG_IOSF_MBI=m
@@ -97,8 +97,6 @@ CONFIG_IO_DELAY_NONE=y
## file: crypto/Kconfig
##
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
@@ -352,6 +350,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y
##
CONFIG_CXL_BUS=y
# CONFIG_CXL_MEM_RAW_COMMANDS is not set
# CONFIG_CXL_FEATURES is not set
##
## file: drivers/devfreq/Kconfig
@@ -427,6 +426,9 @@ CONFIG_EDAC_LEGACY_SYSFS=y
# CONFIG_EDAC_DEBUG is not set
CONFIG_EDAC_DECODE_MCE=y
CONFIG_EDAC_GHES=y
# CONFIG_EDAC_SCRUB is not set
# CONFIG_EDAC_ECS is not set
# CONFIG_EDAC_MEM_REPAIR is not set
# CONFIG_EDAC_AMD64 is not set
# CONFIG_EDAC_E752X is not set
# CONFIG_EDAC_I82975X is not set
@@ -484,6 +486,11 @@ CONFIG_GOOGLE_COREBOOT_TABLE=m
CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT=m
CONFIG_GOOGLE_VPD=m
##
## file: drivers/fwctl/Kconfig
##
# CONFIG_FWCTL is not set
##
## file: drivers/gnss/Kconfig
##
@@ -562,6 +569,11 @@ CONFIG_DRM_HYPERV=m
##
# CONFIG_DRM_AST is not set
##
## file: drivers/gpu/drm/bridge/Kconfig
##
# CONFIG_DRM_I2C_NXP_TDA998X is not set
##
## file: drivers/gpu/drm/bridge/analogix/Kconfig
##
@@ -598,14 +610,6 @@ CONFIG_DRM_CLIENT_DEFAULT_FBDEV=y
##
# CONFIG_DRM_HISI_HIBMC is not set
##
## file: drivers/gpu/drm/i2c/Kconfig
##
# CONFIG_DRM_I2C_CH7006 is not set
# CONFIG_DRM_I2C_SIL164 is not set
# CONFIG_DRM_I2C_NXP_TDA998X is not set
# CONFIG_DRM_I2C_NXP_TDA9950 is not set
##
## file: drivers/gpu/drm/i915/Kconfig
##
@@ -639,6 +643,7 @@ CONFIG_DRM_QXL=m
##
## file: drivers/gpu/drm/tiny/Kconfig
##
# CONFIG_DRM_APPLETBDRM is not set
CONFIG_DRM_BOCHS=m
CONFIG_DRM_CIRRUS_QEMU=m
# CONFIG_DRM_GM12U320 is not set
@@ -843,6 +848,11 @@ CONFIG_HSI_CHAR=m
##
# CONFIG_HTE is not set
##
## file: drivers/hv/Kconfig
##
CONFIG_MSHV_ROOT=m
##
## file: drivers/hwmon/Kconfig
##
@@ -894,6 +904,7 @@ CONFIG_HWMON=y
# CONFIG_SENSORS_G762 is not set
# CONFIG_SENSORS_HIH6130 is not set
# CONFIG_SENSORS_HS3001 is not set
# CONFIG_SENSORS_HTU31 is not set
# CONFIG_SENSORS_I5500 is not set
# CONFIG_SENSORS_CORETEMP is not set
# CONFIG_SENSORS_ISL28022 is not set
@@ -1039,6 +1050,7 @@ CONFIG_SENSORS_PMBUS=m
# CONFIG_SENSORS_DELTA_AHE50DC_FAN is not set
# CONFIG_SENSORS_FSP_3Y is not set
# CONFIG_SENSORS_DPS920AB is not set
# CONFIG_SENSORS_INA233 is not set
# CONFIG_SENSORS_INSPUR_IPSPS is not set
# CONFIG_SENSORS_IR35221 is not set
# CONFIG_SENSORS_IR36021 is not set
@@ -1631,6 +1643,7 @@ CONFIG_MFD_INTEL_PMC_BXT=m
# CONFIG_MFD_MAX14577 is not set
# CONFIG_MFD_MAX77541 is not set
# CONFIG_MFD_MAX77693 is not set
# CONFIG_MFD_MAX77705 is not set
# CONFIG_MFD_MAX77843 is not set
# CONFIG_MFD_MAX8907 is not set
# CONFIG_MFD_MAX8925 is not set
@@ -1642,7 +1655,6 @@ CONFIG_MFD_INTEL_PMC_BXT=m
# CONFIG_MFD_MENF21BMC is not set
# CONFIG_MFD_VIPERBOARD is not set
# CONFIG_MFD_RETU is not set
# CONFIG_MFD_PCF50633 is not set
# CONFIG_MFD_SY7636A is not set
# CONFIG_MFD_RDC321X is not set
# CONFIG_MFD_RT4831 is not set
@@ -2169,6 +2181,7 @@ CONFIG_FBNIC=m
##
CONFIG_MCTP_SERIAL=m
CONFIG_MCTP_TRANSPORT_I2C=m
CONFIG_MCTP_TRANSPORT_USB=m
##
## file: drivers/net/mdio/Kconfig
@@ -2376,6 +2389,11 @@ CONFIG_PCIEASPM_DEFAULT=y
# CONFIG_PCIEASPM_POWERSAVE is not set
## end choice
##
## file: drivers/pci/pwrctrl/Kconfig
##
# CONFIG_PCI_PWRCTL_SLOT is not set
##
## file: drivers/pci/switch/Kconfig
##
@@ -2446,6 +2464,7 @@ CONFIG_WMI_BMOF=m
# CONFIG_GPD_POCKET_FAN is not set
# CONFIG_WIRELESS_HOTKEY is not set
# CONFIG_IBM_RTL is not set
# CONFIG_LENOVO_WMI_HOTKEY_UTILITIES is not set
# CONFIG_SENSORS_HDAPS is not set
# CONFIG_THINKPAD_LMI is not set
CONFIG_ACPI_QUICKSTART=m
@@ -2459,7 +2478,6 @@ CONFIG_MSI_WMI_PLATFORM=m
# CONFIG_ACPI_CMPC is not set
# CONFIG_TOPSTAR_LAPTOP is not set
CONFIG_SERIAL_MULTI_INSTANTIATE=m
# CONFIG_MLX_PLATFORM is not set
# CONFIG_INSPUR_PLATFORM_PROFILE is not set
# CONFIG_LENOVO_WMI_CAMERA is not set
# CONFIG_INTEL_IPS is not set
@@ -2594,7 +2612,6 @@ CONFIG_CHARGER_GPIO=m
CONFIG_BATTERY_GOLDFISH=m
# CONFIG_BATTERY_RT5033 is not set
# CONFIG_CHARGER_RT9455 is not set
# CONFIG_FUEL_GAUGE_STC3117 is not set
# CONFIG_CHARGER_BD99954 is not set
# CONFIG_BATTERY_UG3105 is not set
# CONFIG_FUEL_GAUGE_MM8013 is not set
@@ -2609,6 +2626,11 @@ CONFIG_BATTERY_GOLDFISH=m
##
CONFIG_PPS=m
##
## file: drivers/pps/generators/Kconfig
##
CONFIG_PPS_GENERATOR_TIO=m
##
## file: drivers/ptp/Kconfig
##
@@ -2864,6 +2886,7 @@ CONFIG_GOLDFISH_TTY=m
# CONFIG_SERIAL_8250_RSA is not set
# CONFIG_SERIAL_8250_RT288X is not set
# CONFIG_SERIAL_8250_MID is not set
# CONFIG_SERIAL_8250_NI is not set
##
## file: drivers/ufs/Kconfig
@@ -3158,6 +3181,7 @@ CONFIG_WATCHDOG_PRETIMEOUT_GOV_NOOP=m
CONFIG_WATCHDOG_PRETIMEOUT_GOV_PANIC=m
CONFIG_SOFT_WATCHDOG_PRETIMEOUT=y
# CONFIG_LENOVO_SE10_WDT is not set
# CONFIG_LENOVO_SE30_WDT is not set
# CONFIG_XILINX_WATCHDOG is not set
# CONFIG_ZIIRAVE_WATCHDOG is not set
# CONFIG_CADENCE_WATCHDOG is not set
@@ -3422,11 +3446,6 @@ CONFIG_CIFS_SWN_UPCALL=y
CONFIG_CIFS_FSCACHE=y
# CONFIG_CIFS_COMPRESSION is not set
##
## file: fs/sysv/Kconfig
##
# CONFIG_SYSV_FS is not set
##
## file: fs/ufs/Kconfig
##
@@ -3560,12 +3579,9 @@ CONFIG_ZSWAP_SHRINKER_DEFAULT_ON=y
CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y
## end choice
## choice: Default allocator
CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
# CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD_DEPRECATED is not set
# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set
CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC=y
## end choice
CONFIG_ZBUD=y
CONFIG_Z3FOLD_DEPRECATED=m
CONFIG_ZSMALLOC=y
# CONFIG_HWPOISON_INJECT is not set
CONFIG_NUMA_EMU=y
@@ -3880,6 +3896,7 @@ CONFIG_IPE_PROP_DM_VERITY_SIGNATURE=y
## file: security/keys/Kconfig
##
# CONFIG_KEYS_REQUEST_CACHE is not set
# CONFIG_BIG_KEYS is not set
# CONFIG_TRUSTED_KEYS is not set
# CONFIG_USER_DECRYPTED_DATA is not set
@@ -3898,9 +3915,20 @@ CONFIG_ASYNC_PQ=m
CONFIG_ASYNC_RAID6_RECOV=m
CONFIG_BLK_DEV_RNBD=y
CONFIG_CHECK_SIGNATURE=y
CONFIG_CRC_CCITT=m
CONFIG_CRYPTO_CHACHA20_X86_64=m
CONFIG_CRYPTO_LIB_AESCFB=m
CONFIG_CRYPTO_LIB_ARC4=m
CONFIG_CRYPTO_LIB_CHACHA=m
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
CONFIG_CRYPTO_LIB_CHACHA_INTERNAL=m
CONFIG_CRYPTO_LIB_POLY1305=m
CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
CONFIG_CRYPTO_LIB_POLY1305_INTERNAL=m
CONFIG_CRYPTO_POLY1305_X86_64=m
CONFIG_CXL_PORT=y
CONFIG_DEV_SYNC_PROBE=m
CONFIG_DRM_BRIDGE=y
CONFIG_DRM_CLIENT=y
CONFIG_DRM_CLIENT_DEFAULT="fbdev"
@@ -3953,7 +3981,6 @@ CONFIG_PPPOE_HASH_BITS=4
CONFIG_PREEMPTION=y
CONFIG_PREEMPT_BUILD=y
CONFIG_PREEMPT_COUNT=y
CONFIG_PREEMPT_RCU=y
CONFIG_RAID6_PQ=m
CONFIG_RATIONAL=y
CONFIG_REGMAP=y
@@ -3980,8 +4007,7 @@ CONFIG_USB_XHCI_PCI=m
CONFIG_VIDEOMODE_HELPERS=y
CONFIG_VIRTIO_DMA_SHARED_BUFFER=m
CONFIG_XEN_FRONT_PGDIR_SHBUF=m
CONFIG_Z3FOLD=m
CONFIG_ZPOOL=y
CONFIG_ZSTD_COMPRESS=y
CONFIG_ZSWAP_COMPRESSOR_DEFAULT="zstd"
CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud"
CONFIG_ZSWAP_ZPOOL_DEFAULT="zsmalloc"

122
debian/config/config vendored
View File

@@ -117,7 +117,6 @@ CONFIG_MITIGATION_PAGE_TABLE_ISOLATION=y
# CONFIG_MITIGATION_SRBDS is not set
# CONFIG_MITIGATION_SSB is not set
CONFIG_PCI_MMCONFIG=y
# CONFIG_PCI_CNB20LE_QUIRK is not set
# CONFIG_ISA_BUS is not set
CONFIG_ISA_DMA_API=y
CONFIG_IA32_EMULATION=y
@@ -127,7 +126,11 @@ CONFIG_IA32_EMULATION=y
##
## file: arch/x86/Kconfig.cpu
##
## choice: Processor family
## choice: x86_64 Compiler Build Optimization
# CONFIG_X86_NATIVE_CPU is not set
CONFIG_GENERIC_CPU=y
# CONFIG_MNATIVE_INTEL is not set
# CONFIG_MNATIVE_AMD is not set
# CONFIG_MK8 is not set
# CONFIG_MK8SSE3 is not set
# CONFIG_MK10 is not set
@@ -144,7 +147,6 @@ CONFIG_IA32_EMULATION=y
# CONFIG_MZEN4 is not set
# CONFIG_MZEN5 is not set
# CONFIG_MPSC is not set
# CONFIG_MATOM is not set
# CONFIG_MCORE2 is not set
# CONFIG_MNEHALEM is not set
# CONFIG_MWESTMERE is not set
@@ -160,8 +162,8 @@ CONFIG_IA32_EMULATION=y
# CONFIG_MCANNONLAKE is not set
# CONFIG_MICELAKE_CLIENT is not set
# CONFIG_MICELAKE_SERVER is not set
# CONFIG_MCASCADELAKE is not set
# CONFIG_MCOOPERLAKE is not set
# CONFIG_MCASCADELAKE is not set
# CONFIG_MTIGERLAKE is not set
# CONFIG_MSAPPHIRERAPIDS is not set
# CONFIG_MROCKETLAKE is not set
@@ -169,9 +171,6 @@ CONFIG_IA32_EMULATION=y
# CONFIG_MRAPTORLAKE is not set
# CONFIG_MMETEORLAKE is not set
# CONFIG_MEMERALDRAPIDS is not set
CONFIG_GENERIC_CPU=y
# CONFIG_MNATIVE_INTEL is not set
# CONFIG_MNATIVE_AMD is not set
## end choice
# CONFIG_PROCESSOR_SELECT is not set
CONFIG_CPU_SUP_INTEL=y
@@ -332,6 +331,7 @@ CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_AUTHENC=m
CONFIG_CRYPTO_KRB5ENC=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_RSA=y
CONFIG_CRYPTO_DH=y
@@ -355,10 +355,12 @@ CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_CHACHA20=m
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CTR=y
CONFIG_CRYPTO_CTS=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=y
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_CCM=m
@@ -386,8 +388,6 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_XXHASH=m
CONFIG_CRYPTO_CRC32C=y
CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_CRCT10DIF=y
CONFIG_CRYPTO_CRC64_ROCKSOFT=y
CONFIG_CRYPTO_LZO=y
CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_DRBG_MENU=y
@@ -414,6 +414,12 @@ CONFIG_PKCS7_MESSAGE_PARSER=y
CONFIG_SIGNED_PE_FILE_VERIFICATION=y
# CONFIG_FIPS_SIGNATURE_SELFTEST is not set
##
## file: crypto/krb5/Kconfig
##
CONFIG_CRYPTO_KRB5=m
# CONFIG_CRYPTO_KRB5_SELFTESTS is not set
##
## file: drivers/acpi/Kconfig
##
@@ -663,6 +669,7 @@ CONFIG_X86_POWERNOW_K8=m
CONFIG_X86_AMD_FREQ_SENSITIVITY=m
CONFIG_X86_SPEEDSTEP_CENTRINO=m
CONFIG_X86_P4_CLOCKMOD=m
CONFIG_CPUFREQ_ARCH_CUR_FREQ=y
##
## file: drivers/cpuidle/Kconfig
@@ -1169,6 +1176,7 @@ CONFIG_PCI_REALLOC_ENABLE_AUTO=y
CONFIG_PCI_STUB=m
CONFIG_PCI_PF_STUB=m
CONFIG_XEN_PCIDEV_FRONTEND=m
CONFIG_PCI_DOE=y
CONFIG_PCI_IOV=y
CONFIG_PCI_PRI=y
CONFIG_PCI_PASID=y
@@ -2079,7 +2087,6 @@ CONFIG_UDF_FS=m
## file: fs/unicode/Kconfig
##
CONFIG_UNICODE=y
# CONFIG_UNICODE_NORMALIZATION_SELFTEST is not set
##
## file: fs/verity/Kconfig
@@ -2161,7 +2168,6 @@ CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_HUGETLB=y
CONFIG_CPUSETS=y
# CONFIG_CPUSETS_V1 is not set
CONFIG_PROC_PID_CPUSET=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
@@ -2419,17 +2425,6 @@ CONFIG_SYNTH_EVENTS=y
## file: lib/Kconfig
##
CONFIG_PACKING=y
CONFIG_CRC_CCITT=m
CONFIG_CRC16=y
CONFIG_CRC_T10DIF=y
CONFIG_CRC64_ROCKSOFT=y
CONFIG_CRC_ITU_T=m
CONFIG_CRC32=y
CONFIG_CRC64=y
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_LIBCRC32C=y
CONFIG_CRC8=m
CONFIG_CRC_OPTIMIZATIONS=y
# CONFIG_RANDOM32_SELFTEST is not set
# CONFIG_GLOB_SELFTEST is not set
@@ -2476,6 +2471,7 @@ CONFIG_STRIP_ASM_SYMS=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set
CONFIG_OBJTOOL_WERROR=y
CONFIG_VMLINUX_MAP=y
CONFIG_BUILTIN_MODULE_RANGES=y
# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
@@ -2493,6 +2489,7 @@ CONFIG_DEBUG_FS_ALLOW_ALL=y
# CONFIG_SHRINKER_DEBUG is not set
# CONFIG_DEBUG_STACK_USAGE is not set
CONFIG_SCHED_STACK_END_CHECK=y
# CONFIG_DEBUG_VFS is not set
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_VM_PGTABLE is not set
# CONFIG_DEBUG_VIRTUAL is not set
@@ -2509,9 +2506,9 @@ CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120
# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
CONFIG_DETECT_HUNG_TASK_BLOCKER=y
# CONFIG_WQ_WATCHDOG is not set
# CONFIG_TEST_LOCKUP is not set
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
# CONFIG_PROVE_LOCKING is not set
# CONFIG_LOCK_STAT is not set
@@ -2565,14 +2562,6 @@ CONFIG_IO_STRICT_DEVMEM=y
##
# CONFIG_UBSAN is not set
##
## file: lib/crypto/Kconfig
##
CONFIG_CRYPTO_LIB_CHACHA=m
CONFIG_CRYPTO_LIB_CURVE25519=m
CONFIG_CRYPTO_LIB_POLY1305=m
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
##
## file: lib/fonts/Kconfig
##
@@ -2620,7 +2609,6 @@ CONFIG_XZ_DEC_MICROLZMA=y
## file: mm/Kconfig
##
CONFIG_SWAP=y
CONFIG_ZSMALLOC=m
# CONFIG_ZSMALLOC_STAT is not set
CONFIG_ZSMALLOC_CHAIN_SIZE=8
# CONFIG_SLUB_TINY is not set
@@ -2657,6 +2645,7 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
# CONFIG_TRANSPARENT_HUGEPAGE_NEVER is not set
## end choice
# CONFIG_READ_ONLY_THP_FOR_FS is not set
# CONFIG_NO_PAGE_MAPCOUNT is not set
# CONFIG_CMA is not set
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
@@ -3315,8 +3304,6 @@ CONFIG_SECURITY_NETWORK_XFRM=y
CONFIG_SECURITY_PATH=y
CONFIG_INTEL_TXT=y
CONFIG_LSM_MMAP_MIN_ADDR=65536
CONFIG_HARDENED_USERCOPY=y
CONFIG_FORTIFY_SOURCE=y
# CONFIG_STATIC_USERMODEHELPER is not set
## choice: First legacy 'major LSM' to be initialized
# CONFIG_DEFAULT_SECURITY_SELINUX is not set
@@ -3335,6 +3322,9 @@ CONFIG_INIT_STACK_ALL_ZERO=y
CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
CONFIG_ZERO_CALL_USED_REGS=y
CONFIG_FORTIFY_SOURCE=y
CONFIG_HARDENED_USERCOPY=y
CONFIG_HARDENED_USERCOPY_DEFAULT_ON=y
## choice: Randomize layout of sensitive kernel structures
CONFIG_RANDSTRUCT_NONE=y
## end choice
@@ -3485,6 +3475,7 @@ CONFIG_ARCH_HAS_CPU_FINALIZE_INIT=y
CONFIG_ARCH_HAS_CPU_PASID=y
CONFIG_ARCH_HAS_CPU_RELAX=y
CONFIG_ARCH_HAS_CRC32=y
CONFIG_ARCH_HAS_CRC64=y
CONFIG_ARCH_HAS_CRC_T10DIF=y
CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y
CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
@@ -3494,6 +3485,7 @@ CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
CONFIG_ARCH_HAS_DMA_OPS=y
CONFIG_ARCH_HAS_ELFCORE_COMPAT=y
CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
CONFIG_ARCH_HAS_EXECMEM_ROX=y
CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
@@ -3511,6 +3503,7 @@ CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y
CONFIG_ARCH_HAS_PKEYS=y
CONFIG_ARCH_HAS_PMEM_API=y
CONFIG_ARCH_HAS_PREEMPT_LAZY=y
CONFIG_ARCH_HAS_PTDUMP=y
CONFIG_ARCH_HAS_PTE_DEVMAP=y
CONFIG_ARCH_HAS_PTE_SPECIAL=y
CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
@@ -3560,6 +3553,7 @@ CONFIG_ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP=y
CONFIG_ARCH_SUPPORTS_LTO_CLANG=y
CONFIG_ARCH_SUPPORTS_LTO_CLANG_THIN=y
CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
CONFIG_ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS=y
CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
CONFIG_ARCH_SUPPORTS_PAGE_TABLE_CHECK=y
CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y
@@ -3579,6 +3573,7 @@ CONFIG_ARCH_USE_MEMTEST=y
CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
CONFIG_ARCH_USE_SYM_ANNOTATIONS=y
CONFIG_ARCH_VMLINUX_NEEDS_RELOCS=y
CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
CONFIG_ARCH_WANTS_NO_INSTR=y
CONFIG_ARCH_WANTS_THP_SWAP=y
@@ -3586,6 +3581,7 @@ CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
CONFIG_ARCH_WANT_HUGETLB_VMEMMAP_PREINIT=y
CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y
CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
@@ -3643,7 +3639,13 @@ CONFIG_CONTIG_ALLOC=y
CONFIG_CPU_FREQ_GOV_ATTR_SET=y
CONFIG_CPU_FREQ_GOV_COMMON=y
CONFIG_CPU_RMAP=y
CONFIG_CRC16=y
CONFIG_CRC32=y
CONFIG_CRC32_ARCH=y
CONFIG_CRC64=y
CONFIG_CRC64_ARCH=y
CONFIG_CRC_ITU_T=m
CONFIG_CRC_T10DIF=y
CONFIG_CRC_T10DIF_ARCH=y
CONFIG_CRYPTO_ACOMP2=y
CONFIG_CRYPTO_AEAD=m
@@ -3658,7 +3660,6 @@ CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=y
CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=y
CONFIG_CRYPTO_BLOWFISH_COMMON=m
CONFIG_CRYPTO_CAST_COMMON=m
CONFIG_CRYPTO_CHACHA20_X86_64=m
CONFIG_CRYPTO_CURVE25519_X86=m
CONFIG_CRYPTO_DRBG=y
CONFIG_CRYPTO_DRBG_HMAC=y
@@ -3668,6 +3669,7 @@ CONFIG_CRYPTO_GENIV=m
CONFIG_CRYPTO_HASH=y
CONFIG_CRYPTO_HASH2=y
CONFIG_CRYPTO_HASH_INFO=y
CONFIG_CRYPTO_HKDF=y
CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKS=64
CONFIG_CRYPTO_JITTERENTROPY_MEMORY_BLOCKSIZE=32
CONFIG_CRYPTO_KDF800108_CTR=y
@@ -3676,14 +3678,11 @@ CONFIG_CRYPTO_KPP2=y
CONFIG_CRYPTO_LIB_AES=y
CONFIG_CRYPTO_LIB_AESGCM=y
CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y
CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m
CONFIG_CRYPTO_LIB_CHACHA_INTERNAL=m
CONFIG_CRYPTO_LIB_CURVE25519=m
CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m
CONFIG_CRYPTO_LIB_CURVE25519_INTERNAL=m
CONFIG_CRYPTO_LIB_DES=m
CONFIG_CRYPTO_LIB_GF128MUL=y
CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m
CONFIG_CRYPTO_LIB_POLY1305_INTERNAL=m
CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11
CONFIG_CRYPTO_LIB_SHA1=y
CONFIG_CRYPTO_LIB_SHA256=y
@@ -3691,7 +3690,6 @@ CONFIG_CRYPTO_LIB_UTILS=y
CONFIG_CRYPTO_MANAGER2=y
CONFIG_CRYPTO_NHPOLY1305=m
CONFIG_CRYPTO_NULL2=y
CONFIG_CRYPTO_POLY1305_X86_64=m
CONFIG_CRYPTO_POLYVAL=m
CONFIG_CRYPTO_RNG=y
CONFIG_CRYPTO_RNG2=y
@@ -3706,6 +3704,7 @@ CONFIG_CRYPTO_SM4=m
CONFIG_CRYPTO_TWOFISH_COMMON=m
CONFIG_CRYPTO_USER_API=m
CONFIG_CRYPTO_XCTR=m
CONFIG_CXL_MCE=y
CONFIG_CXL_SUSPEND=y
CONFIG_DCACHE_WORD_ACCESS=y
CONFIG_DEBUG_INFO=y
@@ -3792,6 +3791,7 @@ CONFIG_FUNCTION_ALIGNMENT_16B=y
CONFIG_FUNCTION_ALIGNMENT_4B=y
CONFIG_FUNCTION_PADDING_BYTES=16
CONFIG_FUNCTION_PADDING_CFI=11
CONFIG_FUNCTION_TRACE_ARGS=y
CONFIG_FUTEX_PI=y
CONFIG_FWNODE_MDIO=m
CONFIG_FW_LOADER_PAGED_BUF=y
@@ -3824,12 +3824,12 @@ CONFIG_GENERIC_MSI_IRQ=y
CONFIG_GENERIC_NET_UTILS=y
CONFIG_GENERIC_PCI_IOMAP=y
CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_GENERIC_PTDUMP=y
CONFIG_GENERIC_SMP_IDLE_THREAD=y
CONFIG_GENERIC_STRNCPY_FROM_USER=y
CONFIG_GENERIC_STRNLEN_USER=y
CONFIG_GENERIC_TIME_VSYSCALL=y
CONFIG_GENERIC_TRACER=y
CONFIG_GENERIC_VDSO_DATA_STORE=y
CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT=y
CONFIG_GENERIC_VDSO_TIME_NS=y
CONFIG_GET_FREE_REGION=y
@@ -4032,6 +4032,7 @@ CONFIG_IOMMU_IOVA=y
CONFIG_IOMMU_IO_PGTABLE=y
CONFIG_IOMMU_MM_DATA=y
CONFIG_IOMMU_SVA=y
CONFIG_IO_URING_ZCRX=y
CONFIG_IO_WQ=y
CONFIG_IPV6_FOU=m
CONFIG_IPV6_FOU_TUNNEL=m
@@ -4045,14 +4046,13 @@ CONFIG_IRQ_BYPASS_MANAGER=m
CONFIG_IRQ_DOMAIN=y
CONFIG_IRQ_DOMAIN_HIERARCHY=y
CONFIG_IRQ_FORCED_THREADING=y
CONFIG_IRQ_MSI_IOMMU=y
CONFIG_IRQ_WORK=y
CONFIG_JBD2=y
CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
CONFIG_KERNFS=y
CONFIG_KPROBES_ON_FTRACE=y
CONFIG_KRETPROBES=y
CONFIG_KRETPROBE_ON_RETHOOK=y
CONFIG_KVFREE_RCU_BATCHED=y
CONFIG_KVM_ASYNC_PF=y
CONFIG_KVM_COMMON=y
CONFIG_KVM_COMPAT=y
@@ -4064,6 +4064,7 @@ CONFIG_KVM_GENERIC_MMU_NOTIFIER=y
CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY=y
CONFIG_KVM_GENERIC_PRIVATE_MEM=y
CONFIG_KVM_MMIO=y
CONFIG_KVM_MMU_LOCKLESS_AGING=y
CONFIG_KVM_PRIVATE_MEM=y
CONFIG_KVM_VFIO=y
CONFIG_KVM_X86=m
@@ -4102,6 +4103,7 @@ CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
CONFIG_MMU_GATHER_TABLE_FREE=y
CONFIG_MMU_LAZY_TLB_REFCOUNT=y
CONFIG_MMU_NOTIFIER=y
CONFIG_MM_ID=y
CONFIG_MODULES_TREE_LOOKUP=y
CONFIG_MODULES_USE_ELF_RELA=y
CONFIG_MODULE_SIG_FORMAT=y
@@ -4187,6 +4189,7 @@ CONFIG_OUTPUT_FORMAT="elf64-x86-64"
CONFIG_P2SB=y
CONFIG_PADATA=y
CONFIG_PAGE_COUNTER=y
CONFIG_PAGE_MAPCOUNT=y
CONFIG_PAGE_POOL=y
CONFIG_PAGE_SHIFT=12
CONFIG_PAGE_SIZE_LESS_THAN_256KB=y
@@ -4197,7 +4200,6 @@ CONFIG_PARAVIRT_XXL=y
CONFIG_PCIE_PME=y
CONFIG_PCI_ATS=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_DOE=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCI_LABEL=y
CONFIG_PCI_LOCKLESS_CONFIG=y
@@ -4221,7 +4223,7 @@ CONFIG_PROC_CPU_RESCTRL=y
CONFIG_PROC_PID_ARCH_STATUS=y
CONFIG_PROC_THERMAL_MMIO_RAPL=m
CONFIG_PSTORE_ZONE=m
CONFIG_PTDUMP_CORE=y
CONFIG_PTDUMP=y
CONFIG_PTP_1588_CLOCK_OPTIONAL=m
CONFIG_QUEUED_RWLOCKS=y
CONFIG_QUEUED_SPINLOCKS=y
@@ -4232,6 +4234,7 @@ CONFIG_RCU_STALL_COMMON=y
CONFIG_REED_SOLOMON=m
CONFIG_REED_SOLOMON_DEC8=y
CONFIG_REED_SOLOMON_ENC8=y
CONFIG_RESCTRL_FS_PSEUDO_LOCK=y
CONFIG_RETHOOK=y
CONFIG_RING_BUFFER=y
CONFIG_RPMSG=m
@@ -4267,6 +4270,7 @@ CONFIG_SOFTIRQ_ON_OWN_STACK=y
CONFIG_SPARSEMEM=y
CONFIG_SPARSEMEM_EXTREME=y
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_VMEMMAP_PREINIT=y
CONFIG_SPLIT_PMD_PTLOCKS=y
CONFIG_SPLIT_PTE_PTLOCKS=y
CONFIG_SQUASHFS_DECOMP_MULTI=y
@@ -4327,7 +4331,6 @@ CONFIG_VDSO_GETRANDOM=y
CONFIG_VFIO_IOMMU_TYPE1=m
CONFIG_VFIO_PCI_CORE=m
CONFIG_VFIO_PCI_INTX=y
CONFIG_VFIO_PCI_MMAP=y
CONFIG_VFIO_VIRQFD=y
CONFIG_VGASTATE=m
CONFIG_VHOST=m
@@ -4349,9 +4352,22 @@ CONFIG_X86=y
CONFIG_X86_64=y
CONFIG_X86_64_SMP=y
CONFIG_X86_CMOV=y
CONFIG_X86_CMPXCHG64=y
CONFIG_X86_CX8=y
CONFIG_X86_DEBUGCTLMSR=y
CONFIG_X86_DIRECT_GBPAGES=y
CONFIG_X86_DISABLED_FEATURE_CALL_DEPTH=y
CONFIG_X86_DISABLED_FEATURE_CENTAUR_MCR=y
CONFIG_X86_DISABLED_FEATURE_CYRIX_ARR=y
CONFIG_X86_DISABLED_FEATURE_IBT=y
CONFIG_X86_DISABLED_FEATURE_K6_MTRR=y
CONFIG_X86_DISABLED_FEATURE_LA57=y
CONFIG_X86_DISABLED_FEATURE_LAM=y
CONFIG_X86_DISABLED_FEATURE_RETHUNK=y
CONFIG_X86_DISABLED_FEATURE_RETPOLINE=y
CONFIG_X86_DISABLED_FEATURE_RETPOLINE_LFENCE=y
CONFIG_X86_DISABLED_FEATURE_UNRET=y
CONFIG_X86_DISABLED_FEATURE_USER_SHSTK=y
CONFIG_X86_DISABLED_FEATURE_VME=y
CONFIG_X86_HAVE_PAE=y
CONFIG_X86_HV_CALLBACK_VECTOR=y
CONFIG_X86_INTERNODE_CACHE_SHIFT=6
@@ -4362,6 +4378,18 @@ CONFIG_X86_MCE_THRESHOLD=y
CONFIG_X86_MEM_ENCRYPT=y
CONFIG_X86_MINIMUM_CPU_FAMILY=64
CONFIG_X86_NEED_RELOCS=y
CONFIG_X86_REQUIRED_FEATURE_ALWAYS=y
CONFIG_X86_REQUIRED_FEATURE_CMOV=y
CONFIG_X86_REQUIRED_FEATURE_CPUID=y
CONFIG_X86_REQUIRED_FEATURE_CX8=y
CONFIG_X86_REQUIRED_FEATURE_FPU=y
CONFIG_X86_REQUIRED_FEATURE_FXSR=y
CONFIG_X86_REQUIRED_FEATURE_LM=y
CONFIG_X86_REQUIRED_FEATURE_MSR=y
CONFIG_X86_REQUIRED_FEATURE_NOPL=y
CONFIG_X86_REQUIRED_FEATURE_PAE=y
CONFIG_X86_REQUIRED_FEATURE_XMM=y
CONFIG_X86_REQUIRED_FEATURE_XMM2=y
CONFIG_X86_SPEEDSTEP_LIB=m
CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
CONFIG_X86_THERMAL_VECTOR=y

View File

@@ -42,7 +42,7 @@ correctness.
static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs)
{
@@ -3848,6 +3852,7 @@ static struct phy_driver marvell_drivers
@@ -3828,6 +3832,7 @@ static struct phy_driver marvell_drivers
.led_hw_control_set = m88e1318_led_hw_control_set,
.led_hw_control_get = m88e1318_led_hw_control_get,
},
@@ -50,7 +50,7 @@ correctness.
{
.phy_id = MARVELL_PHY_ID_88E1145,
.phy_id_mask = MARVELL_PHY_ID_MASK,
@@ -3871,6 +3876,8 @@ static struct phy_driver marvell_drivers
@@ -3851,6 +3856,8 @@ static struct phy_driver marvell_drivers
.cable_test_start = m88e1111_vct_cable_test_start,
.cable_test_get_status = m88e1111_vct_cable_test_get_status,
},
@@ -59,7 +59,7 @@ correctness.
{
.phy_id = MARVELL_PHY_ID_88E1149R,
.phy_id_mask = MARVELL_PHY_ID_MASK,
@@ -3889,6 +3896,8 @@ static struct phy_driver marvell_drivers
@@ -3869,6 +3876,8 @@ static struct phy_driver marvell_drivers
.get_strings = marvell_get_strings,
.get_stats = marvell_get_stats,
},
@@ -68,7 +68,7 @@ correctness.
{
.phy_id = MARVELL_PHY_ID_88E1240,
.phy_id_mask = MARVELL_PHY_ID_MASK,
@@ -3909,6 +3918,7 @@ static struct phy_driver marvell_drivers
@@ -3889,6 +3898,7 @@ static struct phy_driver marvell_drivers
.get_tunable = m88e1011_get_tunable,
.set_tunable = m88e1011_set_tunable,
},
@@ -76,7 +76,7 @@ correctness.
{
.phy_id = MARVELL_PHY_ID_88E1116R,
.phy_id_mask = MARVELL_PHY_ID_MASK,
@@ -4197,9 +4207,9 @@ static const struct mdio_device_id __may
@@ -4177,9 +4187,9 @@ static const struct mdio_device_id __may
{ MARVELL_PHY_ID_88E1111_FINISAR, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1118, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1121R, MARVELL_PHY_ID_MASK },

View File

@@ -1,32 +0,0 @@
From: Ben Hutchings <benh@debian.org>
Date: Mon, 16 Sep 2024 00:07:04 +0200
Subject: Documentation: Use relative source filenames in ABI documentation
Currently the ABI documentation files contain absolute source
filenames, which makes them unreproducible if the build directory can
vary.
Remove the source base directory ($srctree) from the source filenames
shown in the documentation.
Signed-off-by: Ben Hutchings <benh@debian.org>
---
--- a/Documentation/sphinx/kernel_abi.py
+++ b/Documentation/sphinx/kernel_abi.py
@@ -103,6 +103,7 @@ class KernelCmd(Directive):
lines = code_block + "\n\n"
line_regex = re.compile(r"^\.\. LINENO (\S+)\#([0-9]+)$")
+ srctree = os.path.abspath(os.environ["srctree"])
ln = 0
n = 0
f = fname
@@ -127,7 +128,7 @@ class KernelCmd(Directive):
# sphinx counts lines from 0
ln = int(match.group(2)) - 1
else:
- content.append(line, f, ln)
+ content.append(line, os.path.relpath(f, srctree), ln)
kernellog.info(self.state.document.settings.env.app, "%s: parsed %i lines" % (fname, n))

View File

@@ -9,7 +9,7 @@ sources.
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -188,6 +188,11 @@ cmd_record_mcount = $(if $(findstring $(
@@ -184,6 +184,11 @@ cmd_record_mcount = $(if $(findstring $(
$(sub_cmd_record_mcount))
endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT

View File

@@ -16,7 +16,7 @@ Signed-off-by: Ben Hutchings <benh@debian.org>
---
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -945,7 +945,7 @@ $(LIBAPI)-clean:
@@ -963,7 +963,7 @@ $(LIBAPI)-clean:
$(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
$(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \
O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= subdir= \

View File

@@ -21,7 +21,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+ * much older kernel. Do "use" the attr structure here to avoid
+ * a "set but not used" warning.
*/
- return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
- return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)) == 0;
+ (void)&attr;
+ return 0;
}

View File

@@ -6,7 +6,7 @@ Signed-off-by: Ben Hutchings <benh@debian.org>
---
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -919,7 +919,7 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c in
@@ -937,7 +937,7 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c in
.SECONDARY: $(DLFILTERS:.so=.o)
$(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o

View File

@@ -1,7 +1,7 @@
From: Serge Hallyn <serge.hallyn@canonical.com>
Date: Fri, 31 May 2013 19:12:12 +0000 (+0100)
Subject: add sysctl to disallow unprivileged CLONE_NEWUSER by default
Origin: https://kernel.ubuntu.com/git?p=serge%2Fubuntu-saucy.git;a=commit;h=5c847404dcb2e3195ad0057877e1422ae90892b8
Origin: http://kernel.ubuntu.com/git?p=serge%2Fubuntu-saucy.git;a=commit;h=5c847404dcb2e3195ad0057877e1422ae90892b8
add sysctl to disallow unprivileged CLONE_NEWUSER by default
@@ -34,7 +34,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
/*
* Minimum number of threads to boot the kernel
*/
@@ -2172,6 +2178,10 @@ __latent_entropy struct task_struct *cop
@@ -2194,6 +2200,10 @@ __latent_entropy struct task_struct *cop
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);
@@ -45,7 +45,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -3325,6 +3335,12 @@ int ksys_unshare(unsigned long unshare_f
@@ -3354,6 +3364,12 @@ int ksys_unshare(unsigned long unshare_f
if (unshare_flags & CLONE_NEWNS)
unshare_flags |= CLONE_FS;
@@ -60,18 +60,18 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
goto bad_unshare_out;
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -135,6 +135,10 @@ static enum sysctl_writes_mode sysctl_wr
int sysctl_legacy_va_layout;
#endif
@@ -84,6 +84,10 @@ EXPORT_SYMBOL_GPL(sysctl_long_vals);
static const int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+#endif
+
#endif /* CONFIG_SYSCTL */
#ifdef CONFIG_PROC_SYSCTL
/*
@@ -1617,6 +1621,15 @@ static const struct ctl_table kern_table
/**
@@ -1595,6 +1599,15 @@ static const struct ctl_table kern_table
.mode = 0644,
.proc_handler = proc_dointvec,
},

View File

@@ -80,7 +80,7 @@ Consequently, the ashmem part of this patch has been removed.
{
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6392,6 +6392,7 @@ inval:
@@ -6589,6 +6589,7 @@ inval:
count_vm_vma_lock_event(VMA_LOCK_ABORT);
return NULL;
}

View File

@@ -15,7 +15,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1099,8 +1099,8 @@ module_exit(dccp_v4_exit);
@@ -1094,8 +1094,8 @@ module_exit(dccp_v4_exit);
* values directly, Also cover the case where the protocol is not specified,
* i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
*/
@@ -28,7 +28,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1174,8 +1174,8 @@ module_exit(dccp_v6_exit);
@@ -1167,8 +1167,8 @@ module_exit(dccp_v6_exit);
* values directly, Also cover the case where the protocol is not specified,
* i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
*/

View File

@@ -22,7 +22,7 @@ Export the currently un-exported symbols it depends on.
--- a/fs/file.c
+++ b/fs/file.c
@@ -845,6 +845,7 @@ struct file *file_close_fd(unsigned int
@@ -843,6 +843,7 @@ struct file *file_close_fd(unsigned int
return file;
}
@@ -82,7 +82,7 @@ Export the currently un-exported symbols it depends on.
* task_work_cancel_match - cancel a pending work added by task_work_add()
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2027,6 +2027,7 @@ void zap_page_range_single(struct vm_are
@@ -2020,6 +2020,7 @@ void zap_page_range_single(struct vm_are
tlb_finish_mmu(&tlb);
hugetlb_zap_end(vma, details);
}

View File

@@ -12,7 +12,7 @@ actually used.
---
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1838,6 +1838,14 @@ static int do_fanotify_mark(int fanotify
@@ -1881,6 +1881,14 @@ static int do_fanotify_mark(int fanotify
umask = FANOTIFY_EVENT_FLAGS;
}

View File

@@ -0,0 +1,37 @@
From: Ben Hutchings <benh@debian.org>
Subject: firmware_loader: Log direct loading failures as info for d-i
Date: Thu, 30 May 2024 13:14:32 +0100
Forwarded: not-needed
On an installed Debian system, firmware packages will normally be
installed automatically based on a mapping of device IDs to firmware.
Within the Debian installer this has not yet happened and we need a
way to detect missing firmware.
Although many/most drivers log firmware loading failures, they do so
using many different formats. This adds a single log message to the
firmware loader, which the installer's hw-detect package will look
for. The log level is set to "info" because some failures are
expected and we do not want to confuse users with bogus error messages
(like in bug #966218).
NOTE: The log message format must not be changed without coordinating
this with the check-missing-firmware.sh in hw-detect.
---
drivers/base/firmware_loader/fallback.c | 2 +-
drivers/base/firmware_loader/main.c | 17 ++++++++---------
2 files changed, 9 insertions(+), 10 deletions(-)
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -590,6 +590,10 @@ fw_get_filesystem_firmware(struct device
}
__putname(path);
+ if (rc)
+ dev_info(device, "firmware: failed to load %s (%d)\n",
+ fw_priv->fw_name, rc);
+
return rc;
}

View File

@@ -9,7 +9,7 @@ This reverts commit 561ec64ae67ef25cac8d72bb9c4bfc955edfd415
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1094,8 +1094,8 @@ static inline void put_link(struct namei
@@ -1095,8 +1095,8 @@ static inline void put_link(struct namei
path_put(&last->link);
}

View File

@@ -19,7 +19,7 @@ use of $(ARCH) needs to be moved after this.
---
--- a/Makefile
+++ b/Makefile
@@ -405,36 +405,6 @@ include $(srctree)/scripts/subarch.inclu
@@ -402,36 +402,6 @@ include $(srctree)/scripts/subarch.inclu
# Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
ARCH ?= $(SUBARCH)
@@ -56,7 +56,7 @@ use of $(ARCH) needs to be moved after this.
KCONFIG_CONFIG ?= .config
export KCONFIG_CONFIG
@@ -554,6 +524,35 @@ RUSTFLAGS_KERNEL =
@@ -551,6 +521,35 @@ RUSTFLAGS_KERNEL =
AFLAGS_KERNEL =
LDFLAGS_vmlinux =

View File

@@ -15,7 +15,7 @@ to the installed location.
---
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -660,10 +660,12 @@ static int report__browse_hists(struct r
@@ -666,10 +666,12 @@ static int report__browse_hists(struct r
path = system_path(TIPDIR);
if (perf_tip(&help, path) || help == NULL) {

View File

@@ -20,7 +20,7 @@ is non-empty.
---
--- a/Makefile
+++ b/Makefile
@@ -1871,7 +1871,7 @@ PHONY += prepare
@@ -1882,7 +1882,7 @@ PHONY += prepare
# now expand this into a simple variable to reduce the cost of shell evaluations
prepare: CC_VERSION_TEXT := $(CC_VERSION_TEXT)
prepare:

View File

@@ -1,7 +1,7 @@
From: Adriaan Schmidt <adriaan.schmidt@siemens.com>
Date: Mon, 4 Apr 2022 13:38:33 +0200
Subject: tools: install perf python bindings
Bug-Debian: https://bugs.debian.org/860957
Bug-Debian: http://bugs.debian.org/860957
Forwarded: not-needed
---
@@ -10,7 +10,7 @@ Forwarded: not-needed
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1139,7 +1139,7 @@ install-bin: install-tools install-tests
@@ -1157,7 +1157,7 @@ install-bin: install-tools install-tests
install: install-bin try-install-man
install-python_ext:

View File

@@ -4,7 +4,7 @@ Subject: linux-tools: Install perf-read-vdso{,x}32 in directory under /usr/lib
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1067,21 +1067,21 @@ install-tools: all install-gtk
@@ -1085,21 +1085,21 @@ install-tools: all install-gtk
$(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(includedir_SQ)/perf'; \
$(INSTALL) -m 644 include/perf/perf_dlfilter.h -t '$(DESTDIR_SQ)$(includedir_SQ)/perf'

View File

@@ -13,7 +13,7 @@ $KBUILD_BUILD_TIMESTAMP.
--- a/init/Makefile
+++ b/init/Makefile
@@ -29,7 +29,7 @@ preempt-flag-$(CONFIG_PREEMPT_DYNAMIC) :
@@ -30,7 +30,7 @@ preempt-flag-$(CONFIG_PREEMPT_DYNAMIC) :
preempt-flag-$(CONFIG_PREEMPT_RT) := PREEMPT_RT
build-version = $(or $(KBUILD_BUILD_VERSION), $(build-version-auto))

View File

@@ -19,7 +19,7 @@ Forwarded: not-needed
/* describe a ptrace relationship for potential exception */
struct ptrace_relation {
@@ -474,7 +474,7 @@ static inline void yama_init_sysctl(void
@@ -469,7 +469,7 @@ static inline void yama_init_sysctl(void
static int __init yama_init(void)
{

View File

@@ -31,7 +31,7 @@ cc: linux-efi@vger.kernel.org
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1073,19 +1073,7 @@ void __init setup_arch(char **cmdline_p)
@@ -1127,19 +1127,7 @@ void __init setup_arch(char **cmdline_p)
/* Allocate bigger log buffer */
setup_log_buf(1);

View File

@@ -26,7 +26,7 @@ Signed-off-by: Salvatore Bonaccorso <carnil@debian.org>
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -907,6 +907,8 @@ void __init setup_arch(char **cmdline_p)
@@ -964,6 +964,8 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled(EFI_BOOT))
efi_init();
@@ -35,7 +35,7 @@ Signed-off-by: Salvatore Bonaccorso <carnil@debian.org>
reserve_ibft_region();
x86_init.resources.dmi_setup();
@@ -1073,8 +1075,6 @@ void __init setup_arch(char **cmdline_p)
@@ -1127,8 +1129,6 @@ void __init setup_arch(char **cmdline_p)
/* Allocate bigger log buffer */
setup_log_buf(1);

View File

@@ -22,9 +22,9 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1701,6 +1701,11 @@ int perf_cpu_time_max_percent_handler(co
int perf_event_max_stack_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
@@ -1684,6 +1684,11 @@ extern int sysctl_perf_event_sample_rate
extern void perf_sample_event_took(u64 sample_len_ns);
+static inline bool perf_paranoid_any(void)
+{
@@ -36,7 +36,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -449,8 +449,13 @@ static struct kmem_cache *perf_event_cac
@@ -450,8 +450,13 @@ static struct kmem_cache *perf_event_cac
* 0 - disallow raw tracepoint access for unpriv
* 1 - disallow cpu events for unpriv
* 2 - disallow kernel profiling for unpriv
@@ -48,9 +48,9 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
int sysctl_perf_event_paranoid __read_mostly = 2;
+#endif
/* Minimum for 512 kiB + 1 user control page */
int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
@@ -12813,6 +12818,9 @@ SYSCALL_DEFINE5(perf_event_open,
/* Minimum for 512 kiB + 1 user control page. 'free' kiB per user. */
static int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
@@ -13084,6 +13089,9 @@ SYSCALL_DEFINE5(perf_event_open,
if (err)
return err;
@@ -58,13 +58,13 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+ return -EACCES;
+
/* Do we allow access to perf_event_open(2) ? */
err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
err = security_perf_event_open(PERF_SECURITY_OPEN);
if (err)
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -51,6 +51,15 @@ config PROC_MEM_NO_FORCE
endchoice
@@ -72,6 +72,15 @@ config MSEAL_SYSTEM_MAPPINGS
For complete descriptions of memory sealing, please see
Documentation/userspace-api/mseal.rst
+config SECURITY_PERF_EVENTS_RESTRICT
+ bool "Restrict unprivileged use of performance events"

View File

@@ -22,7 +22,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2264,6 +2264,8 @@
@@ -2288,6 +2288,8 @@
bypassed by not enabling DMAR with this option. In
this case, gfx device will use physical address for
DMA.
@@ -68,7 +68,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
} else if (!strncmp(str, "forcedac", 8)) {
pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
iommu_dma_forcedac = true;
@@ -1902,6 +1910,9 @@ static int device_def_domain_type(struct
@@ -1935,6 +1943,9 @@ static int device_def_domain_type(struct
if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
return IOMMU_DOMAIN_IDENTITY;
@@ -78,7 +78,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
}
return 0;
@@ -2196,6 +2207,9 @@ static int __init init_dmars(void)
@@ -2229,6 +2240,9 @@ static int __init init_dmars(void)
iommu_set_root_entry(iommu);
}

View File

@@ -29,7 +29,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7004,6 +7004,10 @@
@@ -7044,6 +7044,10 @@
later by a loaded module cannot be set this way.
Example: sysctl.vm.swappiness=40
@@ -42,7 +42,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Ignore sysrq setting - this boot parameter will
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -3202,6 +3202,14 @@ config COMPAT_32
@@ -3169,6 +3169,14 @@ config COMPAT_32
select HAVE_UID16
select OLD_SIGSUSPEND3
@@ -57,9 +57,70 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
config COMPAT
def_bool y
depends on IA32_EMULATION || X86_X32_ABI
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -64,7 +64,7 @@ static __always_inline bool do_syscall_x
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -12,6 +12,9 @@
#include <asm/user.h>
#include <asm/auxvec.h>
#include <asm/fsgsbase.h>
+#ifndef COMPILE_OFFSETS /* avoid a circular dependency on asm-offsets.h */
+#include <asm/syscall.h>
+#endif
typedef unsigned long elf_greg_t;
@@ -152,7 +155,8 @@ do { \
#define compat_elf_check_arch(x) \
((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \
- (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
+ (IS_ENABLED(CONFIG_X86_X32_ABI) && x32_enabled && \
+ (x)->e_machine == EM_X86_64))
static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds)
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -13,6 +13,7 @@
#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
+#include <linux/jump_label.h>
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
@@ -28,6 +29,18 @@ extern long ia32_sys_call(const struct p
extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
+#if defined(CONFIG_X86_X32_ABI)
+#if defined(CONFIG_X86_X32_DISABLED)
+DECLARE_STATIC_KEY_FALSE(x32_enabled_skey);
+#define x32_enabled static_branch_unlikely(&x32_enabled_skey)
+#else
+DECLARE_STATIC_KEY_TRUE(x32_enabled_skey);
+#define x32_enabled static_branch_likely(&x32_enabled_skey)
+#endif
+#else
+#define x32_enabled 0
+#endif
+
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -7,6 +7,9 @@
#include <linux/syscalls.h>
#include <linux/entry-common.h>
#include <linux/nospec.h>
+#include <linux/moduleparam.h>
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "syscall."
#include <asm/syscall.h>
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
@@ -75,7 +78,7 @@ static __always_inline bool do_syscall_x
*/
unsigned int xnr = nr - __X32_SYSCALL_BIT;
@@ -68,23 +129,12 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
xnr = array_index_nospec(xnr, X32_NR_syscalls);
regs->ax = x32_sys_call(regs, xnr);
return true;
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -4,6 +4,9 @@
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
+#include <linux/moduleparam.h>
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "syscall."
#include <linux/syscalls.h>
#include <asm/syscall.h>
@@ -23,3 +26,46 @@ long x32_sys_call(const struct pt_regs *
default: return __x64_sys_ni_syscall(regs);
@@ -139,3 +142,48 @@ __visible noinstr bool do_syscall_64(str
/* Use SYSRET to exit to userspace */
return true;
}
};
+
+#ifdef CONFIG_X86_X32_ABI
+/* Maybe enable x32 syscalls */
+
+#if defined(CONFIG_X86_X32_DISABLED)
@@ -127,54 +177,4 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+};
+
+arch_param_cb(x32, &x32_param_ops, NULL, 0444);
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -12,6 +12,9 @@
#include <asm/user.h>
#include <asm/auxvec.h>
#include <asm/fsgsbase.h>
+#ifndef COMPILE_OFFSETS /* avoid a circular dependency on asm-offsets.h */
+#include <asm/syscall.h>
+#endif
typedef unsigned long elf_greg_t;
@@ -151,7 +154,8 @@ do { \
#define compat_elf_check_arch(x) \
((elf_check_arch_ia32(x) && ia32_enabled_verbose()) || \
- (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64))
+ (IS_ENABLED(CONFIG_X86_X32_ABI) && x32_enabled && \
+ (x)->e_machine == EM_X86_64))
static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds)
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -13,6 +13,7 @@
#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
+#include <linux/jump_label.h>
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
@@ -28,6 +29,18 @@ extern long ia32_sys_call(const struct p
extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
+#if defined(CONFIG_X86_X32_ABI)
+#if defined(CONFIG_X86_X32_DISABLED)
+DECLARE_STATIC_KEY_FALSE(x32_enabled_skey);
+#define x32_enabled static_branch_unlikely(&x32_enabled_skey)
+#else
+DECLARE_STATIC_KEY_TRUE(x32_enabled_skey);
+#define x32_enabled static_branch_likely(&x32_enabled_skey)
+#endif
+#else
+#define x32_enabled 0
+#endif
+
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons

View File

@@ -1,52 +0,0 @@
this reverts following commit:
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 14 Jan 2021 16:32:42 -0600
Subject: objtool: Don't fail the kernel build on fatal errors
[ Upstream commit 655cf86548a3938538642a6df27dd359e13c86bd ]
This is basically a revert of commit 644592d32837 ("objtool: Fail the
kernel build on fatal errors").
That change turned out to be more trouble than it's worth. Failing the
build is an extreme measure which sometimes gets too much attention and
blocks CI build testing.
These fatal-type warnings aren't yet as rare as we'd hope, due to the
ever-increasing matrix of supported toolchains/plugins and their
fast-changing nature as of late.
Also, there are more people (and bots) looking for objtool warnings than
ever before, so even non-fatal warnings aren't likely to be ignored for
long.
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -4783,10 +4783,14 @@ int check(struct objtool_file *file)
}
out:
- /*
- * For now, don't fail the kernel build on fatal warnings. These
- * errors are still fairly common due to the growing matrix of
- * supported toolchains and their recent pace of change.
- */
+ if (ret < 0) {
+ /*
+ * Fatal error. The binary is corrupt or otherwise broken in
+ * some way, or objtool itself is broken. Fail the kernel
+ * build.
+ */
+ return ret;
+ }
+
return 0;
}

View File

@@ -1,14 +1,6 @@
From 90b69178f6a866c7f3330c2006f6b5396146192c Mon Sep 17 00:00:00 2001
From 906ed24dfc7e1bbceacc087ba38aecfd22a9890b Mon Sep 17 00:00:00 2001
From: graysky <therealgraysky AT proton DOT me>
Date: Mon, 16 Sep 2024 05:55:58 -0400
Subject: ZEN: Add graysky's more-uarches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
From https://github.com/graysky2/kernel_compiler_patch
more-ISA-levels-and-uarches-for-kernel-6.1.79+.patch
Date: Mon, 16 Sep 2024 14:47:03 -0400
FEATURES
This patch adds additional tunings via new x86-64 ISA levels and
@@ -121,46 +113,122 @@ REFERENCES
1. https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html#index-x86-Options
2. https://bugzilla.kernel.org/show_bug.cgi?id=77461
3. https://github.com/graysky2/kernel_gcc_patch/issues/15
4. https://www.linuxforge.net/docs/linux/linux-gcc.php
4. http://www.linuxforge.net/docs/linux/linux-gcc.php
---
arch/x86/Kconfig.cpu | 367 ++++++++++++++++++++++++++++++--
arch/x86/Makefile | 89 +++++++-
arch/x86/include/asm/vermagic.h | 72 +++++++
3 files changed, 511 insertions(+), 17 deletions(-)
arch/x86/Kconfig.cpu | 462 ++++++++++++++++++++++++++++++++++++++++++-
arch/x86/Makefile | 222 +++++++++++++++++++++
2 files changed, 675 insertions(+), 9 deletions(-)
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -155,9 +155,8 @@ config MPENTIUM4
-Paxville
-Dempsey
@@ -31,6 +31,7 @@ choice
- "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
- "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
- "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
+ - "Opteron/Athlon64/Hammer/K8" for all K8 and newer AMD CPUs.
- "Crusoe" for the Transmeta Crusoe series.
- "Efficeon" for the Transmeta Efficeon series.
- "Winchip-C6" for original IDT Winchip.
@@ -41,7 +42,10 @@ choice
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
- "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
- "VIA C7" for VIA C7.
+ - "Intel P4" for the Pentium 4/Netburst microarchitecture.
+ - "Core 2/newer Xeon" for all core2 and newer Intel CPUs.
- "Intel Atom" for the Atom-microarchitecture CPUs.
+ - "Generic-x86-64" for a kernel which runs on any x86-64 CPU.
See each option's help text for additional details. If you don't know
what to do, choose "Pentium-Pro".
@@ -135,10 +139,21 @@ config MPENTIUM4
-Mobile Pentium 4
-Mobile Pentium 4 M
-Extreme Edition (Gallatin)
+ -Prescott
+ -Prescott 2M
+ -Cedar Mill
+ -Presler
+ -Smithfiled
Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename:
-Foster
-Prestonia
-Gallatin
+ -Nocona
+ -Irwindale
+ -Cranford
+ -Potomac
+ -Paxville
+ -Dempsey
-
config MK6
- bool "K6/K6-II/K6-III"
+ bool "AMD K6/K6-II/K6-III"
depends on X86_32
help
Select this for an AMD K6-family processor. Enables use of
@@ -165,7 +164,7 @@ config MK6
flags to GCC.
bool "K6/K6-II/K6-III"
@@ -245,6 +260,435 @@ config MATOM
config MK7
- bool "Athlon/Duron/K7"
+ bool "AMD Athlon/Duron/K7"
depends on X86_32
help
Select this for an AMD Athlon K7-family processor. Enables use of
@@ -173,12 +172,114 @@ config MK7
flags to GCC.
endchoice
config MK8
- bool "Opteron/Athlon64/Hammer/K8"
+config CC_HAS_MARCH_NATIVE
+ # This flag might not be available in cross-compilers:
+ def_bool $(cc-option, -march=native)
+ # LLVM 18 has an easily triggered internal compiler error in core
+ # networking code with '-march=native' on certain systems:
+ # https://github.com/llvm/llvm-project/issues/72026
+ # LLVM 19 introduces an optimization that resolves some high stack
+ # usage warnings that only appear wth '-march=native'.
+ depends on CC_IS_GCC || CLANG_VERSION >= 190100
+
+choice
+ prompt "x86_64 Compiler Build Optimization"
+ default GENERIC_CPU
+
+config X86_NATIVE_CPU
+ bool "Build and optimize for local/native CPU"
+ depends on X86_64
+ depends on CC_HAS_MARCH_NATIVE
+ help
+ Optimize for the current CPU used to compile the kernel.
+ Use this option if you intend to build the kernel for your
+ local machine.
+
+ Note that such a kernel might not work optimally on a
+ different x86 machine.
+
+ If unsure, say N.
+
+config GENERIC_CPU
+ bool "Generic-x86-64"
+ depends on X86_64
+ help
+ Generic x86-64 CPU.
+ Runs equally well on all x86-64 CPUs.
+
+config MNATIVE_INTEL
+ bool "Intel-Native optimizations autodetected by the compiler"
+ help
+
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
+ the optimum settings to use based on your processor. Do NOT use this
+ for AMD CPUs. Intel Only!
+
+ Enables -march=native
+
+config MNATIVE_AMD
+ bool "AMD-Native optimizations autodetected by the compiler"
+ help
+
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
+ the optimum settings to use based on your processor. Do NOT use this
+ for Intel CPUs. AMD Only!
+
+ Enables -march=native
+
+config MK8
+ bool "AMD Opteron/Athlon64/Hammer/K8"
help
Select this for an AMD Opteron or Athlon64 Hammer-family processor.
Enables use of some extended instructions, and passes appropriate
optimization flags to GCC.
+ help
+ Select this for an AMD Opteron or Athlon64 Hammer-family processor.
+ Enables use of some extended instructions, and passes appropriate
+ optimization flags to GCC.
+
+config MK8SSE3
+ bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
+ help
@@ -226,21 +294,21 @@ REFERENCES
+ Enables -march=bdver4
+
+config MZEN
+ bool "AMD Zen"
+ bool "AMD Ryzen"
+ help
+ Select this for AMD Family 17h Zen processors.
+
+ Enables -march=znver1
+
+config MZEN2
+ bool "AMD Zen 2"
+ bool "AMD Ryzen 2"
+ help
+ Select this for AMD Family 17h Zen 2 processors.
+
+ Enables -march=znver2
+
+config MZEN3
+ bool "AMD Zen 3"
+ bool "AMD Ryzen 3"
+ depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ help
+ Select this for AMD Family 19h Zen 3 processors.
@@ -248,7 +316,7 @@ REFERENCES
+ Enables -march=znver3
+
+config MZEN4
+ bool "AMD Zen 4"
+ bool "AMD Ryzen 4"
+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000)
+ help
+ Select this for AMD Family 19h Zen 4 processors.
@@ -256,57 +324,48 @@ REFERENCES
+ Enables -march=znver4
+
+config MZEN5
+ bool "AMD Zen 5"
+ bool "AMD Ryzen 5"
+ depends on (CC_IS_GCC && GCC_VERSION > 140000) || (CC_IS_CLANG && CLANG_VERSION >= 190100)
+ help
+ Select this for AMD Family 19h Zen 5 processors.
+
+ Enables -march=znver5
+
config MCRUSOE
bool "Crusoe"
depends on X86_32
@@ -269,8 +370,17 @@ config MPSC
using the cpu family field
in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
+config MATOM
+ bool "Intel Atom"
+config MPSC
+ bool "Intel P4 / older Netburst based Xeon"
+ depends on X86_64
+ help
+ Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
+ Xeon CPUs with Intel 64bit which is compatible with x86-64.
+ Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
+ Netburst core and shouldn't use this option. You can distinguish them
+ using the cpu family field
+ in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
+
+config MCORE2
+ bool "Intel Core 2"
+ depends on X86_64
+ help
+
+ Select this for the Intel Atom platform. Intel Atom CPUs have an
+ in-order pipelining architecture and thus can benefit from
+ accordingly optimized code. Use a recent GCC with specific Atom
+ support in order to fully benefit from selecting this option.
+ Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
+ 53xx) CPUs. You can distinguish newer from older Xeons by the CPU
+ family in /proc/cpuinfo. Newer ones have 6 and older ones 15
+ (not a typo)
+
config MCORE2
- bool "Core 2/newer Xeon"
+ bool "Intel Core 2"
help
Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
@@ -278,14 +388,199 @@ config MCORE2
family in /proc/cpuinfo. Newer ones have 6 and older ones 15
(not a typo)
-config MATOM
- bool "Intel Atom"
+ Enables -march=core2
+
+config MNEHALEM
+ bool "Intel Nehalem"
help
- Select this for the Intel Atom platform. Intel Atom CPUs have an
- in-order pipelining architecture and thus can benefit from
- accordingly optimized code. Use a recent GCC with specific Atom
- support in order to fully benefit from selecting this option.
+ depends on X86_64
+ help
+
+ Select this for 1st Gen Core processors in the Nehalem family.
+
+ Enables -march=nehalem
+
+config MWESTMERE
+ bool "Intel Westmere"
+ depends on X86_64
+ help
+
+ Select this for the Intel Westmere formerly Nehalem-C family.
@@ -315,6 +374,7 @@ REFERENCES
+
+config MSILVERMONT
+ bool "Intel Silvermont"
+ depends on X86_64
+ help
+
+ Select this for the Intel Silvermont platform.
@@ -323,6 +383,7 @@ REFERENCES
+
+config MGOLDMONT
+ bool "Intel Goldmont"
+ depends on X86_64
+ help
+
+ Select this for the Intel Goldmont platform including Apollo Lake and Denverton.
@@ -331,6 +392,7 @@ REFERENCES
+
+config MGOLDMONTPLUS
+ bool "Intel Goldmont Plus"
+ depends on X86_64
+ help
+
+ Select this for the Intel Goldmont Plus platform including Gemini Lake.
@@ -339,6 +401,7 @@ REFERENCES
+
+config MSANDYBRIDGE
+ bool "Intel Sandy Bridge"
+ depends on X86_64
+ help
+
+ Select this for 2nd Gen Core processors in the Sandy Bridge family.
@@ -347,6 +410,7 @@ REFERENCES
+
+config MIVYBRIDGE
+ bool "Intel Ivy Bridge"
+ depends on X86_64
+ help
+
+ Select this for 3rd Gen Core processors in the Ivy Bridge family.
@@ -355,6 +419,7 @@ REFERENCES
+
+config MHASWELL
+ bool "Intel Haswell"
+ depends on X86_64
+ help
+
+ Select this for 4th Gen Core processors in the Haswell family.
@@ -363,6 +428,7 @@ REFERENCES
+
+config MBROADWELL
+ bool "Intel Broadwell"
+ depends on X86_64
+ help
+
+ Select this for 5th Gen Core processors in the Broadwell family.
@@ -371,6 +437,7 @@ REFERENCES
+
+config MSKYLAKE
+ bool "Intel Skylake"
+ depends on X86_64
+ help
+
+ Select this for 6th Gen Core processors in the Skylake family.
@@ -379,6 +446,7 @@ REFERENCES
+
+config MSKYLAKEX
+ bool "Intel Skylake X"
+ depends on X86_64
+ help
+
+ Select this for 6th Gen Core processors in the Skylake X family.
@@ -387,6 +455,7 @@ REFERENCES
+
+config MCANNONLAKE
+ bool "Intel Cannon Lake"
+ depends on X86_64
+ help
+
+ Select this for 8th Gen Core processors
@@ -395,6 +464,7 @@ REFERENCES
+
+config MICELAKE_CLIENT
+ bool "Intel Ice Lake"
+ depends on X86_64
+ help
+
+ Select this for 10th Gen Core client processors in the Ice Lake family.
@@ -403,22 +473,16 @@ REFERENCES
+
+config MICELAKE_SERVER
+ bool "Intel Ice Lake Server"
+ depends on X86_64
+ help
+
+ Select this for 10th Gen Core server processors in the Ice Lake family.
+
+ Enables -march=icelake-server
+
+config MCASCADELAKE
+ bool "Intel Cascade Lake"
+ help
+
+ Select this for Xeon processors in the Cascade Lake family.
+
+ Enables -march=cascadelake
+
+config MCOOPERLAKE
+ bool "Intel Cooper Lake"
+ depends on X86_64
+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
+ help
+
@@ -426,8 +490,19 @@ REFERENCES
+
+ Enables -march=cooperlake
+
+config MCASCADELAKE
+ bool "Intel Cascade Lake"
+ depends on X86_64
+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
+ help
+
+ Select this for Xeon processors in the Cascade Lake family.
+
+ Enables -march=cascadelake
+
+config MTIGERLAKE
+ bool "Intel Tiger Lake"
+ depends on X86_64
+ depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
+ help
+
@@ -437,6 +512,7 @@ REFERENCES
+
+config MSAPPHIRERAPIDS
+ bool "Intel Sapphire Rapids"
+ depends on X86_64
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ help
+
@@ -446,6 +522,7 @@ REFERENCES
+
+config MROCKETLAKE
+ bool "Intel Rocket Lake"
+ depends on X86_64
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ help
+
@@ -455,6 +532,7 @@ REFERENCES
+
+config MALDERLAKE
+ bool "Intel Alder Lake"
+ depends on X86_64
+ depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
+ help
+
@@ -464,6 +542,7 @@ REFERENCES
+
+config MRAPTORLAKE
+ bool "Intel Raptor Lake"
+ depends on X86_64
+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
+ help
+
@@ -473,6 +552,7 @@ REFERENCES
+
+config MMETEORLAKE
+ bool "Intel Meteor Lake"
+ depends on X86_64
+ depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
+ help
+
@@ -482,46 +562,16 @@ REFERENCES
+
+config MEMERALDRAPIDS
+ bool "Intel Emerald Rapids"
+ depends on X86_64
+ depends on (CC_IS_GCC && GCC_VERSION > 130000) || (CC_IS_CLANG && CLANG_VERSION >= 150500)
+ help
+
+ Select this for fifth-generation 10 nm process processors in the Emerald Rapids family.
+
+ Enables -march=emeraldrapids
config GENERIC_CPU
bool "Generic-x86-64"
@@ -294,6 +589,26 @@ config GENERIC_CPU
Generic x86-64 CPU.
Run equally well on all x86-64 CPUs.
+config MNATIVE_INTEL
+ bool "Intel-Native optimizations autodetected by the compiler"
+ help
+
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
+ the optimum settings to use based on your processor. Do NOT use this
+ for AMD CPUs. Intel Only!
+endchoice
+
+ Enables -march=native
+
+config MNATIVE_AMD
+ bool "AMD-Native optimizations autodetected by the compiler"
+ help
+
+ Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
+ the optimum settings to use based on your processor. Do NOT use this
+ for Intel CPUs. AMD Only!
+
+ Enables -march=native
+
endchoice
config X86_GENERIC
@@ -308,6 +623,30 @@ config X86_GENERIC
This is really intended for distributors who need more
generic optimizations.
+config X86_64_VERSION
+ int "x86-64 compiler ISA level"
+ range 1 3
@@ -531,7 +581,7 @@ REFERENCES
+ Specify a specific x86-64 compiler ISA level.
+
+ There are three x86-64 ISA levels that work on top of
+ the x86-64 baseline, namely: x86-64-v2, x86-64-v3, and x86-64-v4.
+ the x86-64 baseline, namely: x86-64-v2 and x86-64-v3.
+
+ x86-64-v2 brings support for vector instructions up to Streaming SIMD
+ Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3
@@ -546,221 +596,291 @@ REFERENCES
+ /lib/ld-linux-x86-64.so.2 --help | grep supported
+ /lib64/ld-linux-x86-64.so.2 --help | grep supported
+
#
# Define implied options from the CPU selection here
config X86_INTERNODE_CACHE_SHIFT
@@ -318,7 +657,7 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_GENERIC
bool "Generic x86 support"
depends on X86_32
@@ -266,8 +710,8 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
- default "7" if MPENTIUM4
- default "6" if MK7 || MPENTIUMM || MATOM || MVIAC7 || X86_GENERIC || X86_64
+ default "7" if MPENTIUM4 || MPSC
+ default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE_CLIENT || MICELAKE_SERVER || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
default "4" if MELAN || M486SX || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
@@ -336,11 +675,11 @@ config X86_ALIGNMENT_16
@@ -285,19 +729,19 @@ config X86_ALIGNMENT_16
config X86_INTEL_USERCOPY
def_bool y
- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK7 || MEFFICEON
+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE_CLIENT || MICELAKE_SERVER || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL
config X86_USE_PPRO_CHECKSUM
def_bool y
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MATOM
+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MZEN4 || MZEN5 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE_CLIENT || MICELAKE_SERVER || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MRAPTORLAKE || MMETEORLAKE || MEMERALDRAPIDS || MNATIVE_INTEL || MNATIVE_AMD
#
# P6_NOPs are a relatively minor optimization that require a family >=
config X86_TSC
def_bool y
- depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MATOM) || X86_64
+ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
config X86_HAVE_PAE
def_bool y
- depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC7 || MATOM || X86_64
+ depends on MCRUSOE || MEFFICEON || MCYRIXIII || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC7 || MCORE2 || MATOM || X86_64
config X86_CX8
def_bool y
@@ -307,13 +751,13 @@ config X86_CX8
# generates cmov.
config X86_CMOV
def_bool y
- depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || MATOM || MGEODE_LX || X86_64)
+ depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MK7)
- default "5" if X86_32 && X86_CX8
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
+ default "5" if X86_32 && X86_CMPXCHG64
default "4"
config X86_DEBUGCTLMSR
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -182,15 +182,98 @@ else
cflags-$(CONFIG_MK8) += -march=k8
cflags-$(CONFIG_MPSC) += -march=nocona
cflags-$(CONFIG_MCORE2) += -march=core2
- cflags-$(CONFIG_MATOM) += -march=atom
- cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
+ cflags-$(CONFIG_MATOM) += -march=bonnell
+ ifeq ($(CONFIG_X86_64_VERSION),1)
+ cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic
+ rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic
+ else
+ cflags-$(CONFIG_GENERIC_CPU) += -march=x86-64-v$(CONFIG_X86_64_VERSION)
+ rustflags-$(CONFIG_GENERIC_CPU) += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION)
+ endif
+ cflags-$(CONFIG_MK8SSE3) += -march=k8-sse3
+ cflags-$(CONFIG_MK10) += -march=amdfam10
+ cflags-$(CONFIG_MBARCELONA) += -march=barcelona
+ cflags-$(CONFIG_MBOBCAT) += -march=btver1
+ cflags-$(CONFIG_MJAGUAR) += -march=btver2
+ cflags-$(CONFIG_MBULLDOZER) += -march=bdver1
+ cflags-$(CONFIG_MPILEDRIVER) += -march=bdver2 -mno-tbm
+ cflags-$(CONFIG_MSTEAMROLLER) += -march=bdver3 -mno-tbm
+ cflags-$(CONFIG_MEXCAVATOR) += -march=bdver4 -mno-tbm
+ cflags-$(CONFIG_MZEN) += -march=znver1
+ cflags-$(CONFIG_MZEN2) += -march=znver2
+ cflags-$(CONFIG_MZEN3) += -march=znver3
+ cflags-$(CONFIG_MZEN4) += -march=znver4
+ cflags-$(CONFIG_MZEN5) += -march=znver5
+ cflags-$(CONFIG_MNATIVE_INTEL) += -march=native
+ cflags-$(CONFIG_MNATIVE_AMD) += -march=native -mno-tbm
+ cflags-$(CONFIG_MNEHALEM) += -march=nehalem
+ cflags-$(CONFIG_MWESTMERE) += -march=westmere
+ cflags-$(CONFIG_MSILVERMONT) += -march=silvermont
+ cflags-$(CONFIG_MGOLDMONT) += -march=goldmont
+ cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus
+ cflags-$(CONFIG_MSANDYBRIDGE) += -march=sandybridge
+ cflags-$(CONFIG_MIVYBRIDGE) += -march=ivybridge
+ cflags-$(CONFIG_MHASWELL) += -march=haswell
+ cflags-$(CONFIG_MBROADWELL) += -march=broadwell
+ cflags-$(CONFIG_MSKYLAKE) += -march=skylake
+ cflags-$(CONFIG_MSKYLAKEX) += -march=skylake-avx512
+ cflags-$(CONFIG_MCANNONLAKE) += -march=cannonlake
+ cflags-$(CONFIG_MICELAKE_CLIENT) += -march=icelake-client
+ cflags-$(CONFIG_MICELAKE_SERVER) += -march=icelake-server
+ cflags-$(CONFIG_MCASCADELAKE) += -march=cascadelake
+ cflags-$(CONFIG_MCOOPERLAKE) += -march=cooperlake
+ cflags-$(CONFIG_MTIGERLAKE) += -march=tigerlake
+ cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids
+ cflags-$(CONFIG_MROCKETLAKE) += -march=rocketlake
+ cflags-$(CONFIG_MALDERLAKE) += -march=alderlake
+ cflags-$(CONFIG_MRAPTORLAKE) += -march=raptorlake
+ cflags-$(CONFIG_MMETEORLAKE) += -march=meteorlake
+ cflags-$(CONFIG_MEMERALDRAPIDS) += -march=emeraldrapids
KBUILD_CFLAGS += $(cflags-y)
@@ -173,8 +173,230 @@ else
# Use -mskip-rax-setup if supported.
KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup)
rustflags-$(CONFIG_MK8) += -Ctarget-cpu=k8
rustflags-$(CONFIG_MPSC) += -Ctarget-cpu=nocona
rustflags-$(CONFIG_MCORE2) += -Ctarget-cpu=core2
rustflags-$(CONFIG_MATOM) += -Ctarget-cpu=atom
- rustflags-$(CONFIG_GENERIC_CPU) += -Ztune-cpu=generic
+ rustflags-$(CONFIG_MK8SSE3) += -Ctarget-cpu=k8-sse3
+ rustflags-$(CONFIG_MK10) += -Ctarget-cpu=amdfam10
+ rustflags-$(CONFIG_MBARCELONA) += -Ctarget-cpu=barcelona
+ rustflags-$(CONFIG_MBOBCAT) += -Ctarget-cpu=btver1
+ rustflags-$(CONFIG_MJAGUAR) += -Ctarget-cpu=btver2
+ rustflags-$(CONFIG_MBULLDOZER) += -Ctarget-cpu=bdver1
+ rustflags-$(CONFIG_MPILEDRIVER) += -Ctarget-cpu=bdver2
+ rustflags-$(CONFIG_MSTEAMROLLER) += -Ctarget-cpu=bdver3
+ rustflags-$(CONFIG_MEXCAVATOR) += -Ctarget-cpu=bdver4
+ rustflags-$(CONFIG_MZEN) += -Ctarget-cpu=znver1
+ rustflags-$(CONFIG_MZEN2) += -Ctarget-cpu=znver2
+ rustflags-$(CONFIG_MZEN3) += -Ctarget-cpu=znver3
+ rustflags-$(CONFIG_MZEN4) += -Ctarget-cpu=znver4
+ rustflags-$(CONFIG_MZEN5) += -Ctarget-cpu=znver5
+ rustflags-$(CONFIG_MNATIVE_INTEL) += -Ctarget-cpu=native
+ rustflags-$(CONFIG_MNATIVE_AMD) += -Ctarget-cpu=native
+ rustflags-$(CONFIG_MNEHALEM) += -Ctarget-cpu=nehalem
+ rustflags-$(CONFIG_MWESTMERE) += -Ctarget-cpu=westmere
+ rustflags-$(CONFIG_MSILVERMONT) += -Ctarget-cpu=silvermont
+ rustflags-$(CONFIG_MGOLDMONT) += -Ctarget-cpu=goldmont
+ rustflags-$(CONFIG_MGOLDMONTPLUS) += -Ctarget-cpu=goldmont-plus
+ rustflags-$(CONFIG_MSANDYBRIDGE) += -Ctarget-cpu=sandybridge
+ rustflags-$(CONFIG_MIVYBRIDGE) += -Ctarget-cpu=ivybridge
+ rustflags-$(CONFIG_MHASWELL) += -Ctarget-cpu=haswell
+ rustflags-$(CONFIG_MBROADWELL) += -Ctarget-cpu=broadwell
+ rustflags-$(CONFIG_MSKYLAKE) += -Ctarget-cpu=skylake
+ rustflags-$(CONFIG_MSKYLAKEX) += -Ctarget-cpu=skylake-avx512
+ rustflags-$(CONFIG_MCANNONLAKE) += -Ctarget-cpu=cannonlake
+ rustflags-$(CONFIG_MICELAKE_CLIENT) += -Ctarget-cpu=icelake-client
+ rustflags-$(CONFIG_MICELAKE_SERVER) += -Ctarget-cpu=icelake-server
+ rustflags-$(CONFIG_MCASCADELAKE) += -Ctarget-cpu=cascadelake
+ rustflags-$(CONFIG_MCOOPERLAKE) += -Ctarget-cpu=cooperlake
+ rustflags-$(CONFIG_MTIGERLAKE) += -Ctarget-cpu=tigerlake
+ rustflags-$(CONFIG_MSAPPHIRERAPIDS) += -Ctarget-cpu=sapphirerapids
+ rustflags-$(CONFIG_MROCKETLAKE) += -Ctarget-cpu=rocketlake
+ rustflags-$(CONFIG_MALDERLAKE) += -Ctarget-cpu=alderlake
+ rustflags-$(CONFIG_MRAPTORLAKE) += -Ctarget-cpu=raptorlake
+ rustflags-$(CONFIG_MMETEORLAKE) += -Ctarget-cpu=meteorlake
+ rustflags-$(CONFIG_MEMERALDRAPIDS) += -Ctarget-cpu=emeraldrapids
KBUILD_RUSTFLAGS += $(rustflags-y)
+ifdef CONFIG_X86_NATIVE_CPU
+ KBUILD_CFLAGS += -march=native
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=native
+endif
+
+ifdef CONFIG_MNATIVE_INTEL
+ KBUILD_CFLAGS += -march=native
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=native
+endif
+
+ifdef CONFIG_MNATIVE_AMD
+ KBUILD_CFLAGS += -march=native
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=native
+endif
+
+ifdef CONFIG_MK8
+ KBUILD_CFLAGS += -march=k8
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=k8
+endif
+
+ifdef CONFIG_MK8SSE3
+ KBUILD_CFLAGS += -march=k8-sse3
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=k8-sse3
+endif
+
+ifdef CONFIG_MK10
+ KBUILD_CFLAGS += -march=amdfam10
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=amdfam10
+endif
+
+ifdef CONFIG_MBARCELONA
+ KBUILD_CFLAGS += -march=barcelona
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=barcelona
+endif
+
+ifdef CONFIG_MBOBCAT
+ KBUILD_CFLAGS += -march=btver1
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=btver1
+endif
+
+ifdef CONFIG_MJAGUAR
+ KBUILD_CFLAGS += -march=btver2
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=btver2
+endif
+
+ifdef CONFIG_MBULLDOZER
+ KBUILD_CFLAGS += -march=bdver1
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver1
+endif
+
+ifdef CONFIG_MPILEDRIVER
+ KBUILD_CFLAGS += -march=bdver2 -mno-tbm
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver2 -mno-tbm
+endif
+
+ifdef CONFIG_MSTEAMROLLER
+ KBUILD_CFLAGS += -march=bdver3 -mno-tbm
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver3 -mno-tbm
+endif
+
+ifdef CONFIG_MEXCAVATOR
+ KBUILD_CFLAGS += -march=bdver4 -mno-tbm
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=bdver4 -mno-tbm
+endif
+
+ifdef CONFIG_MZEN
+ KBUILD_CFLAGS += -march=znver1
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver1
+endif
+
+ifdef CONFIG_MZEN2
+ KBUILD_CFLAGS += -march=znver2
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver2
+endif
+
+ifdef CONFIG_MZEN3
+ KBUILD_CFLAGS += -march=znver3
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver3
+endif
+
+ifdef CONFIG_MZEN4
+ KBUILD_CFLAGS += -march=znver4
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver4
+endif
+
+ifdef CONFIG_MZEN5
+ KBUILD_CFLAGS += -march=znver5
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=znver5
+endif
+
+ifdef CONFIG_MPSC
+ KBUILD_CFLAGS += -march=nocona
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=nocona
+endif
+
+ifdef CONFIG_MCORE2
+ KBUILD_CFLAGS += -march=core2
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=core2
+endif
+
+ifdef CONFIG_MNEHALEM
+ KBUILD_CFLAGS += -march=nehalem
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=nehalem
+endif
+
+ifdef CONFIG_MWESTMERE
+ KBUILD_CFLAGS += -march=westmere
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=westmere
+endif
+
+ifdef CONFIG_MSILVERMONT
+ KBUILD_CFLAGS += -march=silvermont
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=silvermont
+endif
+
+ifdef CONFIG_MGOLDMONT
+ KBUILD_CFLAGS += -march=goldmont
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=goldmont
+endif
+
+ifdef CONFIG_MGOLDMONTPLUS
+ KBUILD_CFLAGS += -march=goldmont-plus
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=goldmont-plus
+endif
+
+ifdef CONFIG_MSANDYBRIDGE
+ KBUILD_CFLAGS += -march=sandybridge
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=sandybridge
+endif
+
+ifdef CONFIG_MIVYBRIDGE
+ KBUILD_CFLAGS += -march=ivybridge
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=ivybridge
+endif
+
+ifdef CONFIG_MHASWELL
+ KBUILD_CFLAGS += -march=haswell
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=haswell
+endif
+
+ifdef CONFIG_MBROADWELL
+ KBUILD_CFLAGS += -march=broadwell
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=broadwell
+endif
+
+ifdef CONFIG_MSKYLAKE
+ KBUILD_CFLAGS += -march=skylake
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=skylake
+endif
+
+ifdef CONFIG_MSKYLAKEX
+ KBUILD_CFLAGS += -march=skylake-avx512
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=skylake-avx512
+endif
+
+ifdef CONFIG_MCANNONLAKE
+ KBUILD_CFLAGS += -march=cannonlake
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=cannonlake
+endif
+
+ifdef CONFIG_MICELAKE_CLIENT
+ KBUILD_CFLAGS += -march=icelake-client
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=icelake-client
+endif
+
+ifdef CONFIG_MICELAKE_SERVER
+ KBUILD_CFLAGS += -march=icelake-server
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=icelake-server
+endif
+
+ifdef CONFIG_MCOOPERLAKE
+ KBUILD_CFLAGS += -march=cooperlake
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=cooperlake
+endif
+
+ifdef CONFIG_MCASCADELAKE
+ KBUILD_CFLAGS += -march=cascadelake
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=cascadelake
+endif
+
+ifdef CONFIG_MTIGERLAKE
+ KBUILD_CFLAGS += -march=tigerlake
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=tigerlake
+endif
+
+ifdef CONFIG_MSAPPHIRERAPIDS
+ KBUILD_CFLAGS += -march=sapphirerapids
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=sapphirerapids
+endif
+
+ifdef CONFIG_MROCKETLAKE
+ KBUILD_CFLAGS += -march=rocketlake
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=rocketlake
+endif
+
+ifdef CONFIG_MALDERLAKE
+ KBUILD_CFLAGS += -march=alderlake
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=alderlake
+endif
+
+ifdef CONFIG_MRAPTORLAKE
+ KBUILD_CFLAGS += -march=raptorlake
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=raptorlake
+endif
+
+ifdef CONFIG_MMETEORLAKE
+ KBUILD_CFLAGS += -march=meteorlake
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=meteorlake
+endif
+
+ifdef CONFIG_MEMERALDRAPIDS
+ KBUILD_CFLAGS += -march=emeraldrapids
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=emeraldrapids
+endif
+
+ifdef CONFIG_GENERIC_CPU
+ifeq ($(CONFIG_X86_64_VERSION),1)
KBUILD_CFLAGS += -march=x86-64 -mtune=generic
KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic
+else
+ KBUILD_CFLAGS +=-march=x86-64-v$(CONFIG_X86_64_VERSION)
+ KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION)
+endif # CONFIG_X86_64_VERSION
+endif # CONFIG_GENERIC_CPU
KBUILD_CFLAGS += -mno-red-zone
--- a/arch/x86/include/asm/vermagic.h
+++ b/arch/x86/include/asm/vermagic.h
@@ -17,6 +17,56 @@
#define MODULE_PROC_FAMILY "586MMX "
#elif defined CONFIG_MCORE2
#define MODULE_PROC_FAMILY "CORE2 "
+#elif defined CONFIG_MNATIVE_INTEL
+#define MODULE_PROC_FAMILY "NATIVE_INTEL "
+#elif defined CONFIG_MNATIVE_AMD
+#define MODULE_PROC_FAMILY "NATIVE_AMD "
+#elif defined CONFIG_MNEHALEM
+#define MODULE_PROC_FAMILY "NEHALEM "
+#elif defined CONFIG_MWESTMERE
+#define MODULE_PROC_FAMILY "WESTMERE "
+#elif defined CONFIG_MSILVERMONT
+#define MODULE_PROC_FAMILY "SILVERMONT "
+#elif defined CONFIG_MGOLDMONT
+#define MODULE_PROC_FAMILY "GOLDMONT "
+#elif defined CONFIG_MGOLDMONTPLUS
+#define MODULE_PROC_FAMILY "GOLDMONTPLUS "
+#elif defined CONFIG_MSANDYBRIDGE
+#define MODULE_PROC_FAMILY "SANDYBRIDGE "
+#elif defined CONFIG_MIVYBRIDGE
+#define MODULE_PROC_FAMILY "IVYBRIDGE "
+#elif defined CONFIG_MHASWELL
+#define MODULE_PROC_FAMILY "HASWELL "
+#elif defined CONFIG_MBROADWELL
+#define MODULE_PROC_FAMILY "BROADWELL "
+#elif defined CONFIG_MSKYLAKE
+#define MODULE_PROC_FAMILY "SKYLAKE "
+#elif defined CONFIG_MSKYLAKEX
+#define MODULE_PROC_FAMILY "SKYLAKEX "
+#elif defined CONFIG_MCANNONLAKE
+#define MODULE_PROC_FAMILY "CANNONLAKE "
+#elif defined CONFIG_MICELAKE_CLIENT
+#define MODULE_PROC_FAMILY "ICELAKE_CLIENT "
+#elif defined CONFIG_MICELAKE_SERVER
+#define MODULE_PROC_FAMILY "ICELAKE_SERVER "
+#elif defined CONFIG_MCASCADELAKE
+#define MODULE_PROC_FAMILY "CASCADELAKE "
+#elif defined CONFIG_MCOOPERLAKE
+#define MODULE_PROC_FAMILY "COOPERLAKE "
+#elif defined CONFIG_MTIGERLAKE
+#define MODULE_PROC_FAMILY "TIGERLAKE "
+#elif defined CONFIG_MSAPPHIRERAPIDS
+#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS "
+#elif defined CONFIG_ROCKETLAKE
+#define MODULE_PROC_FAMILY "ROCKETLAKE "
+#elif defined CONFIG_MALDERLAKE
+#define MODULE_PROC_FAMILY "ALDERLAKE "
+#elif defined CONFIG_MRAPTORLAKE
+#define MODULE_PROC_FAMILY "RAPTORLAKE "
+#elif defined CONFIG_MMETEORLAKE
+#define MODULE_PROC_FAMILY "METEORLAKE "
+#elif defined CONFIG_MEMERALDRAPIDS
+#define MODULE_PROC_FAMILY "EMERALDRAPIDS "
#elif defined CONFIG_MATOM
#define MODULE_PROC_FAMILY "ATOM "
#elif defined CONFIG_M686
@@ -35,6 +85,28 @@
#define MODULE_PROC_FAMILY "K7 "
#elif defined CONFIG_MK8
#define MODULE_PROC_FAMILY "K8 "
+#elif defined CONFIG_MK8SSE3
+#define MODULE_PROC_FAMILY "K8SSE3 "
+#elif defined CONFIG_MK10
+#define MODULE_PROC_FAMILY "K10 "
+#elif defined CONFIG_MBARCELONA
+#define MODULE_PROC_FAMILY "BARCELONA "
+#elif defined CONFIG_MBOBCAT
+#define MODULE_PROC_FAMILY "BOBCAT "
+#elif defined CONFIG_MBULLDOZER
+#define MODULE_PROC_FAMILY "BULLDOZER "
+#elif defined CONFIG_MPILEDRIVER
+#define MODULE_PROC_FAMILY "PILEDRIVER "
+#elif defined CONFIG_MSTEAMROLLER
+#define MODULE_PROC_FAMILY "STEAMROLLER "
+#elif defined CONFIG_MJAGUAR
+#define MODULE_PROC_FAMILY "JAGUAR "
+#elif defined CONFIG_MEXCAVATOR
+#define MODULE_PROC_FAMILY "EXCAVATOR "
+#elif defined CONFIG_MZEN
+#define MODULE_PROC_FAMILY "ZEN "
+#elif defined CONFIG_MZEN2
+#define MODULE_PROC_FAMILY "ZEN2 "
#elif defined CONFIG_MELAN
#define MODULE_PROC_FAMILY "ELAN "
#elif defined CONFIG_MCRUSOE
KBUILD_CFLAGS += -mcmodel=kernel

View File

@@ -1,4 +1,4 @@
From f4f448a305e9d705b9a0da102ddfd58bfaac5cc0 Mon Sep 17 00:00:00 2001
From 15db9c3419fd147812151d95fb34bbd70f2f9715 Mon Sep 17 00:00:00 2001
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
Date: Sun, 11 Dec 2022 23:51:16 +0100
Subject: ZEN: Restore CONFIG_OPTIMIZE_FOR_PERFORMANCE_O3
@@ -13,7 +13,7 @@ dependency on CONFIG_ARC and adds RUSTFLAGS.
--- a/Makefile
+++ b/Makefile
@@ -871,6 +871,9 @@ KBUILD_CFLAGS += -fno-delete-null-pointe
@@ -868,6 +868,9 @@ KBUILD_CFLAGS += -fno-delete-null-pointe
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
KBUILD_CFLAGS += -O2
KBUILD_RUSTFLAGS += -Copt-level=2
@@ -25,7 +25,7 @@ dependency on CONFIG_ARC and adds RUSTFLAGS.
KBUILD_RUSTFLAGS += -Copt-level=s
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1473,6 +1473,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
@@ -1479,6 +1479,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
with the "-O2" compiler flag for best performance and most
helpful compile-time warnings.

View File

@@ -1,6 +1,6 @@
--- a/Makefile
+++ b/Makefile
@@ -879,6 +879,10 @@ KBUILD_CFLAGS += -Os
@@ -876,6 +876,10 @@ KBUILD_CFLAGS += -Os
KBUILD_RUSTFLAGS += -Copt-level=s
endif

View File

@@ -1,24 +1,22 @@
From 3ebc1fdf3e0ee9bff1efe20eb5791eba5c84a810 Mon Sep 17 00:00:00 2001
From 40f9fa82bb21a5e3f17f539897128a69824ad8ef Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Thu, 3 Aug 2023 13:53:49 +0000
Subject: XANMOD: x86/build: Prevent generating avx2 and avx512 floating-point code
Date: Mon, 18 Nov 2024 20:17:44 +0000
Subject: [PATCH 1/4] XANMOD: x86/build: Prevent generating avx2 floating-point
code
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
arch/x86/Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
arch/x86/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -74,9 +74,9 @@ export BITS
@@ -74,7 +74,7 @@ export BITS
#
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
#
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
-KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
+KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
#
# CFLAGS for compiling floating point code inside the kernel.

View File

@@ -4,8 +4,8 @@
#
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
#
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -mno-avx512f -fno-tree-vectorize
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -fno-tree-vectorize
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-avx512f
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2

View File

@@ -1,7 +1,8 @@
From b1a99a2a9675f80b7c04a239a6b047373ccf3a17 Mon Sep 17 00:00:00 2001
From 7e45fca50a3151248266bca7058e1efa9b5233ca Mon Sep 17 00:00:00 2001
From: Alexandre Frade <kernel@xanmod.org>
Date: Mon, 16 Sep 2024 00:55:35 +0000
Subject: XANMOD: kbuild: Add GCC SMS-based modulo scheduling flags
Subject: [PATCH 02/19] XANMOD: kbuild: Add GCC SMS-based modulo scheduling
flags
Signed-off-by: Alexandre Frade <kernel@xanmod.org>
---
@@ -10,7 +11,7 @@ Signed-off-by: Alexandre Frade <kernel@xanmod.org>
--- a/Makefile
+++ b/Makefile
@@ -883,6 +883,13 @@ ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
@@ -880,6 +880,13 @@ ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
KBUILD_CFLAGS += $(call cc-option,-fivopts)
endif

View File

@@ -1,27 +0,0 @@
From cb40e98d75a75567cbd10f9fc69c2ec12c87a445 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:15 +0000
Subject: cpufreq/amd-pstate: Remove the redundant des_perf clamping in
adjust_perf
des_perf is later on clamped between min_perf and max_perf in
amd_pstate_update. So, remove the redundant clamping from
amd_pstate_adjust_perf.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 2 --
1 file changed, 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -705,8 +705,6 @@ static void amd_pstate_adjust_perf(unsig
if (max_perf < min_perf)
max_perf = min_perf;
- des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
-
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
policy->governor->flags);
cpufreq_cpu_put(policy);

View File

@@ -1,133 +0,0 @@
From f58e440e56a6c8a2c04894e5d169d1a98a8ce74f Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:18 +0000
Subject: cpufreq/amd-pstate: Modularize perf<->freq conversion
Delegate the perf<->frequency conversion to helper functions to reduce
code duplication, and improve readability.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 57 +++++++++++++++++++-----------------
1 file changed, 30 insertions(+), 27 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -142,6 +142,20 @@ static struct quirk_entry quirk_amd_7k62
.lowest_freq = 550,
};
+static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val)
+{
+ u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
+ cpudata->nominal_freq);
+
+ return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf);
+}
+
+static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val)
+{
+ return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val,
+ cpudata->nominal_perf);
+}
+
static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
{
/**
@@ -534,7 +548,6 @@ static inline bool amd_pstate_sample(str
static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
{
- unsigned long max_freq;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
@@ -543,8 +556,7 @@ static void amd_pstate_update(struct amd
des_perf = clamp_t(u8, des_perf, min_perf, max_perf);
- max_freq = READ_ONCE(cpudata->max_limit_freq);
- policy->cur = div_u64(des_perf * max_freq, max_perf);
+ policy->cur = perf_to_freq(cpudata, des_perf);
if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
min_perf = des_perf;
@@ -594,14 +606,11 @@ static int amd_pstate_verify(struct cpuf
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
{
- u8 max_limit_perf, min_limit_perf, max_perf;
- u32 max_freq;
+ u8 max_limit_perf, min_limit_perf;
struct amd_cpudata *cpudata = policy->driver_data;
- max_perf = READ_ONCE(cpudata->highest_perf);
- max_freq = READ_ONCE(cpudata->max_freq);
- max_limit_perf = div_u64(policy->max * max_perf, max_freq);
- min_limit_perf = div_u64(policy->min * max_perf, max_freq);
+ max_limit_perf = freq_to_perf(cpudata, policy->max);
+ min_limit_perf = freq_to_perf(cpudata, policy->min);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
@@ -619,21 +628,15 @@ static int amd_pstate_update_freq(struct
{
struct cpufreq_freqs freqs;
struct amd_cpudata *cpudata = policy->driver_data;
- u8 des_perf, cap_perf;
-
- if (!cpudata->max_freq)
- return -ENODEV;
+ u8 des_perf;
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
amd_pstate_update_min_max_limit(policy);
- cap_perf = READ_ONCE(cpudata->highest_perf);
-
freqs.old = policy->cur;
freqs.new = target_freq;
- des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
- cpudata->max_freq);
+ des_perf = freq_to_perf(cpudata, target_freq);
WARN_ON(fast_switch && !policy->fast_switch_enabled);
/*
@@ -907,7 +910,6 @@ static int amd_pstate_init_freq(struct a
{
int ret;
u32 min_freq, max_freq;
- u8 highest_perf, nominal_perf, lowest_nonlinear_perf;
u32 nominal_freq, lowest_nonlinear_freq;
struct cppc_perf_caps cppc_perf;
@@ -925,16 +927,17 @@ static int amd_pstate_init_freq(struct a
else
nominal_freq = cppc_perf.nominal_freq;
- highest_perf = READ_ONCE(cpudata->highest_perf);
- nominal_perf = READ_ONCE(cpudata->nominal_perf);
- max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf);
-
- lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
- lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf);
- WRITE_ONCE(cpudata->min_freq, min_freq * 1000);
- WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000);
- WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000);
- WRITE_ONCE(cpudata->max_freq, max_freq * 1000);
+ min_freq *= 1000;
+ nominal_freq *= 1000;
+
+ WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
+ WRITE_ONCE(cpudata->min_freq, min_freq);
+
+ max_freq = perf_to_freq(cpudata, cpudata->highest_perf);
+ lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
+
+ WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
+ WRITE_ONCE(cpudata->max_freq, max_freq);
/**
* Below values need to be initialized correctly, otherwise driver will fail to load

View File

@@ -1,37 +0,0 @@
From 0a12d4a3ca1a996c1073d60c6775424972e8b7b9 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:19 +0000
Subject: cpufreq/amd-pstate: Remove the unnecessary cpufreq_update_policy call
The update_limits callback is only called in two conditions.
* When the preferred core rankings change. In which case, we just need to
change the prefcore ranking in the cpudata struct. As there are no changes
to any of the perf values, there is no need to call cpufreq_update_policy()
* When the _PPC ACPI object changes, i.e. the highest allowed Pstate
changes. The _PPC object is only used for a table based cpufreq driver
like acpi-cpufreq, hence is irrelevant for CPPC based amd-pstate.
Hence, the cpufreq_update_policy() call becomes unnecessary and can be
removed.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 4 ----
1 file changed, 4 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -855,10 +855,6 @@ static void amd_pstate_update_limits(uns
sched_set_itmt_core_prio((int)cur_high, cpu);
}
cpufreq_cpu_put(policy);
-
- if (!highest_perf_changed)
- cpufreq_update_policy(cpu);
-
}
/*

View File

@@ -1,124 +0,0 @@
From ab0520499c83ff44d468f1b2b604c85e2f78d694 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:22 +0000
Subject: cpufreq/amd-pstate: Use scope based cleanup for cpufreq_policy refs
There have been instances in past where refcount decrementing is missed
while exiting a function. Use automatic scope based cleanup to avoid
such errors.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 25 ++++++++-----------------
include/linux/cpufreq.h | 3 +++
2 files changed, 11 insertions(+), 17 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -548,7 +548,7 @@ static inline bool amd_pstate_sample(str
static void amd_pstate_update(struct amd_cpudata *cpudata, u8 min_perf,
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
{
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
if (!policy)
@@ -574,8 +574,6 @@ static void amd_pstate_update(struct amd
}
amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch);
-
- cpufreq_cpu_put(policy);
}
static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
@@ -587,7 +585,8 @@ static int amd_pstate_verify(struct cpuf
* amd-pstate qos_requests.
*/
if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
- struct cpufreq_policy *policy = cpufreq_cpu_get(policy_data->cpu);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) =
+ cpufreq_cpu_get(policy_data->cpu);
struct amd_cpudata *cpudata;
if (!policy)
@@ -595,7 +594,6 @@ static int amd_pstate_verify(struct cpuf
cpudata = policy->driver_data;
policy_data->min = cpudata->lowest_nonlinear_freq;
- cpufreq_cpu_put(policy);
}
cpufreq_verify_within_cpu_limits(policy_data);
@@ -678,7 +676,7 @@ static void amd_pstate_adjust_perf(unsig
unsigned long capacity)
{
u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf;
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
if (!policy)
@@ -710,7 +708,6 @@ static void amd_pstate_adjust_perf(unsig
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
policy->governor->flags);
- cpufreq_cpu_put(policy);
}
static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
@@ -823,28 +820,23 @@ static void amd_pstate_init_prefcore(str
static void amd_pstate_update_limits(unsigned int cpu)
{
- struct cpufreq_policy *policy = NULL;
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
u32 prev_high = 0, cur_high = 0;
- int ret;
bool highest_perf_changed = false;
if (!amd_pstate_prefcore)
return;
- policy = cpufreq_cpu_get(cpu);
if (!policy)
return;
- cpudata = policy->driver_data;
-
guard(mutex)(&amd_pstate_driver_lock);
- ret = amd_get_highest_perf(cpu, &cur_high);
- if (ret) {
- cpufreq_cpu_put(policy);
+ if (amd_get_highest_perf(cpu, &cur_high))
return;
- }
+
+ cpudata = policy->driver_data;
prev_high = READ_ONCE(cpudata->prefcore_ranking);
highest_perf_changed = (prev_high != cur_high);
@@ -854,7 +846,6 @@ static void amd_pstate_update_limits(uns
if (cur_high < CPPC_MAX_PERF)
sched_set_itmt_core_prio((int)cur_high, cpu);
}
- cpufreq_cpu_put(policy);
}
/*
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -213,6 +213,9 @@ static inline struct cpufreq_policy *cpu
static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { }
#endif
+/* Scope based cleanup macro for cpufreq_policy kobject reference counting */
+DEFINE_FREE(put_cpufreq_policy, struct cpufreq_policy *, if (_T) cpufreq_cpu_put(_T))
+
static inline bool policy_is_inactive(struct cpufreq_policy *policy)
{
return cpumask_empty(policy->cpus);

View File

@@ -1,26 +0,0 @@
From 658a4b7a41583e3b73477c0fbbee07aa6d6f7e0e Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Wed, 5 Feb 2025 11:25:23 +0000
Subject: cpufreq/amd-pstate: Remove the unncecessary driver_lock in
amd_pstate_update_limits
There is no need to take a driver wide lock while updating the
highest_perf value in the percpu cpudata struct. Hence remove it.
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 2 --
1 file changed, 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -831,8 +831,6 @@ static void amd_pstate_update_limits(uns
if (!policy)
return;
- guard(mutex)(&amd_pstate_driver_lock);
-
if (amd_get_highest_perf(cpu, &cur_high))
return;

View File

@@ -1,35 +0,0 @@
From 20f8507de83bc844c6ff2329e61ffc37734364e9 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Sat, 22 Feb 2025 03:32:22 +0000
Subject: cpufreq/amd-pstate: Fix the clamping of perf values
The clamping in freq_to_perf() is broken right now, as we first typecast
(read wraparound) the overflowing value into a u8 and then clamp it down.
So, use a u32 to store the >255 value in certain edge cases and then clamp
it down into a u8.
Also, use a "explicit typecast + clamp" instead of just a "clamp_t" as the
latter typecasts first and then clamps between the limits, which defeats
our purpose.
Fixes: 305621eb6a8b ("cpufreq/amd-pstate: Modularize perf<->freq conversion")
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
---
drivers/cpufreq/amd-pstate.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -144,10 +144,10 @@ static struct quirk_entry quirk_amd_7k62
static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val)
{
- u8 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
+ u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
cpudata->nominal_freq);
- return clamp_t(u8, perf_val, cpudata->lowest_perf, cpudata->highest_perf);
+ return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf);
}
static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val)

View File

@@ -1,35 +0,0 @@
From 240a074b7f92278755df715be1ea5ea5d3d2f5ac Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:17 -0600
Subject: cpufreq/amd-pstate: Show a warning when a CPU fails to setup
I came across a system that MSR_AMD_CPPC_CAP1 for some CPUs isn't
populated. This is an unexpected behavior that is most likely a
BIOS bug. In the event it happens I'd like users to report bugs
to properly root cause and get this fixed.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 2 ++
1 file changed, 2 insertions(+)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1027,6 +1027,7 @@ static int amd_pstate_cpu_init(struct cp
free_cpudata2:
freq_qos_remove_request(&cpudata->req[0]);
free_cpudata1:
+ pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret);
kfree(cpudata);
return ret;
}
@@ -1520,6 +1521,7 @@ static int amd_pstate_epp_cpu_init(struc
return 0;
free_cpudata1:
+ pr_warn("Failed to initialize CPU %d: %d\n", policy->cpu, ret);
kfree(cpudata);
return ret;
}

View File

@@ -1,209 +0,0 @@
From 82520910e91d62f19c944ff17ba8f966553e79d6 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:18 -0600
Subject: cpufreq/amd-pstate: Drop min and max cached frequencies
Use the perf_to_freq helpers to calculate this on the fly.
As the members are no longer cached add an extra check into
amd_pstate_epp_update_limit() to avoid unnecessary calls in
amd_pstate_update_min_max_limit().
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 14 +++++------
drivers/cpufreq/amd-pstate.c | 43 +++++++++------------------------
drivers/cpufreq/amd-pstate.h | 9 ++-----
3 files changed, 20 insertions(+), 46 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -214,14 +214,14 @@ static void amd_pstate_ut_check_freq(u32
break;
cpudata = policy->driver_data;
- if (!((cpudata->max_freq >= cpudata->nominal_freq) &&
+ if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) &&
(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
- (cpudata->lowest_nonlinear_freq > cpudata->min_freq) &&
- (cpudata->min_freq > 0))) {
+ (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) &&
+ (policy->cpuinfo.min_freq > 0))) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
- __func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
- cpudata->lowest_nonlinear_freq, cpudata->min_freq);
+ __func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq,
+ cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq);
goto skip_test;
}
@@ -233,13 +233,13 @@ static void amd_pstate_ut_check_freq(u32
}
if (cpudata->boost_supported) {
- if ((policy->max == cpudata->max_freq) ||
+ if ((policy->max == policy->cpuinfo.max_freq) ||
(policy->max == cpudata->nominal_freq))
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
else {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
- __func__, cpu, policy->max, cpudata->max_freq,
+ __func__, cpu, policy->max, policy->cpuinfo.max_freq,
cpudata->nominal_freq);
goto skip_test;
}
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -717,7 +717,7 @@ static int amd_pstate_cpu_boost_update(s
int ret = 0;
nominal_freq = READ_ONCE(cpudata->nominal_freq);
- max_freq = READ_ONCE(cpudata->max_freq);
+ max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf));
if (on)
policy->cpuinfo.max_freq = max_freq;
@@ -916,13 +916,10 @@ static int amd_pstate_init_freq(struct a
nominal_freq *= 1000;
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
- WRITE_ONCE(cpudata->min_freq, min_freq);
max_freq = perf_to_freq(cpudata, cpudata->highest_perf);
lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
-
WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
- WRITE_ONCE(cpudata->max_freq, max_freq);
/**
* Below values need to be initialized correctly, otherwise driver will fail to load
@@ -947,9 +944,9 @@ static int amd_pstate_init_freq(struct a
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
{
- int min_freq, max_freq, ret;
- struct device *dev;
struct amd_cpudata *cpudata;
+ struct device *dev;
+ int ret;
/*
* Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
@@ -980,17 +977,11 @@ static int amd_pstate_cpu_init(struct cp
if (ret)
goto free_cpudata1;
- min_freq = READ_ONCE(cpudata->min_freq);
- max_freq = READ_ONCE(cpudata->max_freq);
-
policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
- policy->min = min_freq;
- policy->max = max_freq;
-
- policy->cpuinfo.min_freq = min_freq;
- policy->cpuinfo.max_freq = max_freq;
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
@@ -1014,9 +1005,6 @@ static int amd_pstate_cpu_init(struct cp
goto free_cpudata2;
}
- cpudata->max_limit_freq = max_freq;
- cpudata->min_limit_freq = min_freq;
-
policy->driver_data = cpudata;
if (!current_pstate_driver->adjust_perf)
@@ -1074,14 +1062,10 @@ static int amd_pstate_cpu_suspend(struct
static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
char *buf)
{
- int max_freq;
struct amd_cpudata *cpudata = policy->driver_data;
- max_freq = READ_ONCE(cpudata->max_freq);
- if (max_freq < 0)
- return max_freq;
- return sysfs_emit(buf, "%u\n", max_freq);
+ return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)));
}
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
@@ -1439,10 +1423,10 @@ static bool amd_pstate_acpi_pm_profile_u
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
- int min_freq, max_freq, ret;
struct amd_cpudata *cpudata;
struct device *dev;
u64 value;
+ int ret;
/*
* Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
@@ -1473,19 +1457,13 @@ static int amd_pstate_epp_cpu_init(struc
if (ret)
goto free_cpudata1;
- min_freq = READ_ONCE(cpudata->min_freq);
- max_freq = READ_ONCE(cpudata->max_freq);
-
- policy->cpuinfo.min_freq = min_freq;
- policy->cpuinfo.max_freq = max_freq;
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
/* It will be updated by governor */
policy->cur = policy->cpuinfo.min_freq;
policy->driver_data = cpudata;
- policy->min = policy->cpuinfo.min_freq;
- policy->max = policy->cpuinfo.max_freq;
-
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
/*
@@ -1543,7 +1521,8 @@ static int amd_pstate_epp_update_limit(s
struct amd_cpudata *cpudata = policy->driver_data;
u8 epp;
- amd_pstate_update_min_max_limit(policy);
+ if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
+ amd_pstate_update_min_max_limit(policy);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
epp = 0;
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -46,8 +46,6 @@ struct amd_aperf_mperf {
* @max_limit_perf: Cached value of the performance corresponding to policy->max
* @min_limit_freq: Cached value of policy->min (in khz)
* @max_limit_freq: Cached value of policy->max (in khz)
- * @max_freq: the frequency (in khz) that mapped to highest_perf
- * @min_freq: the frequency (in khz) that mapped to lowest_perf
* @nominal_freq: the frequency (in khz) that mapped to nominal_perf
* @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf
* @cur: Difference of Aperf/Mperf/tsc count between last and current sample
@@ -77,11 +75,8 @@ struct amd_cpudata {
u8 prefcore_ranking;
u8 min_limit_perf;
u8 max_limit_perf;
- u32 min_limit_freq;
- u32 max_limit_freq;
-
- u32 max_freq;
- u32 min_freq;
+ u32 min_limit_freq;
+ u32 max_limit_freq;
u32 nominal_freq;
u32 lowest_nonlinear_freq;

View File

@@ -1,611 +0,0 @@
From 21109b42429e0d9f0ee1bfadddae38fb5b0b23c3 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:19 -0600
Subject: cpufreq/amd-pstate: Move perf values into a union
By storing perf values in a union all the writes and reads can
be done atomically, removing the need for some concurrency protections.
While making this change, also drop the cached frequency values,
using inline helpers to calculate them on demand from perf value.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 18 +--
drivers/cpufreq/amd-pstate.c | 205 ++++++++++++++++++--------------
drivers/cpufreq/amd-pstate.h | 51 +++++---
3 files changed, 158 insertions(+), 116 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -129,6 +129,7 @@ static void amd_pstate_ut_check_perf(u32
struct cppc_perf_caps cppc_perf;
struct cpufreq_policy *policy = NULL;
struct amd_cpudata *cpudata = NULL;
+ union perf_cached cur_perf;
for_each_possible_cpu(cpu) {
policy = cpufreq_cpu_get(cpu);
@@ -162,19 +163,20 @@ static void amd_pstate_ut_check_perf(u32
lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
}
- if (highest_perf != READ_ONCE(cpudata->highest_perf) && !cpudata->hw_prefcore) {
+ cur_perf = READ_ONCE(cpudata->perf);
+ if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
- __func__, cpu, highest_perf, cpudata->highest_perf);
+ __func__, cpu, highest_perf, cur_perf.highest_perf);
goto skip_test;
}
- if ((nominal_perf != READ_ONCE(cpudata->nominal_perf)) ||
- (lowest_nonlinear_perf != READ_ONCE(cpudata->lowest_nonlinear_perf)) ||
- (lowest_perf != READ_ONCE(cpudata->lowest_perf))) {
+ if (nominal_perf != cur_perf.nominal_perf ||
+ (lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
+ (lowest_perf != cur_perf.lowest_perf)) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n",
- __func__, cpu, nominal_perf, cpudata->nominal_perf,
- lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
- lowest_perf, cpudata->lowest_perf);
+ __func__, cpu, nominal_perf, cur_perf.nominal_perf,
+ lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf,
+ lowest_perf, cur_perf.lowest_perf);
goto skip_test;
}
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -142,18 +142,17 @@ static struct quirk_entry quirk_amd_7k62
.lowest_freq = 550,
};
-static inline u8 freq_to_perf(struct amd_cpudata *cpudata, unsigned int freq_val)
+static inline u8 freq_to_perf(union perf_cached perf, u32 nominal_freq, unsigned int freq_val)
{
- u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * cpudata->nominal_perf,
- cpudata->nominal_freq);
+ u32 perf_val = DIV_ROUND_UP_ULL((u64)freq_val * perf.nominal_perf, nominal_freq);
- return (u8)clamp(perf_val, cpudata->lowest_perf, cpudata->highest_perf);
+ return (u8)clamp(perf_val, perf.lowest_perf, perf.highest_perf);
}
-static inline u32 perf_to_freq(struct amd_cpudata *cpudata, u8 perf_val)
+static inline u32 perf_to_freq(union perf_cached perf, u32 nominal_freq, u8 perf_val)
{
- return DIV_ROUND_UP_ULL((u64)cpudata->nominal_freq * perf_val,
- cpudata->nominal_perf);
+ return DIV_ROUND_UP_ULL((u64)nominal_freq * perf_val,
+ perf.nominal_perf);
}
static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
@@ -347,7 +346,9 @@ static int amd_pstate_set_energy_pref_in
}
if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
epp,
FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
@@ -425,6 +426,7 @@ static inline int amd_pstate_cppc_enable
static int msr_init_perf(struct amd_cpudata *cpudata)
{
+ union perf_cached perf = READ_ONCE(cpudata->perf);
u64 cap1, numerator;
int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
@@ -436,19 +438,21 @@ static int msr_init_perf(struct amd_cpud
if (ret)
return ret;
- WRITE_ONCE(cpudata->highest_perf, numerator);
- WRITE_ONCE(cpudata->max_limit_perf, numerator);
- WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
- WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
- WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+ perf.highest_perf = numerator;
+ perf.max_limit_perf = numerator;
+ perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1);
+ perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
+ perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
+ perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
+ WRITE_ONCE(cpudata->perf, perf);
WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
- WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
return 0;
}
static int shmem_init_perf(struct amd_cpudata *cpudata)
{
struct cppc_perf_caps cppc_perf;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
u64 numerator;
int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
@@ -459,14 +463,14 @@ static int shmem_init_perf(struct amd_cp
if (ret)
return ret;
- WRITE_ONCE(cpudata->highest_perf, numerator);
- WRITE_ONCE(cpudata->max_limit_perf, numerator);
- WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
- WRITE_ONCE(cpudata->lowest_nonlinear_perf,
- cppc_perf.lowest_nonlinear_perf);
- WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
+ perf.highest_perf = numerator;
+ perf.max_limit_perf = numerator;
+ perf.min_limit_perf = cppc_perf.lowest_perf;
+ perf.nominal_perf = cppc_perf.nominal_perf;
+ perf.lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
+ perf.lowest_perf = cppc_perf.lowest_perf;
+ WRITE_ONCE(cpudata->perf, perf);
WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
- WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
if (cppc_state == AMD_PSTATE_ACTIVE)
return 0;
@@ -549,14 +553,14 @@ static void amd_pstate_update(struct amd
u8 des_perf, u8 max_perf, bool fast_switch, int gov_flags)
{
struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpudata->cpu);
- u8 nominal_perf = READ_ONCE(cpudata->nominal_perf);
+ union perf_cached perf = READ_ONCE(cpudata->perf);
if (!policy)
return;
des_perf = clamp_t(u8, des_perf, min_perf, max_perf);
- policy->cur = perf_to_freq(cpudata, des_perf);
+ policy->cur = perf_to_freq(perf, cpudata->nominal_freq, des_perf);
if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
min_perf = des_perf;
@@ -565,7 +569,7 @@ static void amd_pstate_update(struct amd
/* limit the max perf when core performance boost feature is disabled */
if (!cpudata->boost_supported)
- max_perf = min_t(u8, nominal_perf, max_perf);
+ max_perf = min_t(u8, perf.nominal_perf, max_perf);
if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
@@ -602,39 +606,41 @@ static int amd_pstate_verify(struct cpuf
return 0;
}
-static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
+static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
{
- u8 max_limit_perf, min_limit_perf;
struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
- max_limit_perf = freq_to_perf(cpudata, policy->max);
- min_limit_perf = freq_to_perf(cpudata, policy->min);
+ perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max);
+ perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
- min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
+ perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf);
- WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
- WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
WRITE_ONCE(cpudata->min_limit_freq, policy->min);
-
- return 0;
+ WRITE_ONCE(cpudata->perf, perf);
}
static int amd_pstate_update_freq(struct cpufreq_policy *policy,
unsigned int target_freq, bool fast_switch)
{
struct cpufreq_freqs freqs;
- struct amd_cpudata *cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata;
+ union perf_cached perf;
u8 des_perf;
+ cpudata = policy->driver_data;
+
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
amd_pstate_update_min_max_limit(policy);
+ perf = READ_ONCE(cpudata->perf);
+
freqs.old = policy->cur;
freqs.new = target_freq;
- des_perf = freq_to_perf(cpudata, target_freq);
+ des_perf = freq_to_perf(perf, cpudata->nominal_freq, target_freq);
WARN_ON(fast_switch && !policy->fast_switch_enabled);
/*
@@ -645,8 +651,8 @@ static int amd_pstate_update_freq(struct
if (!fast_switch)
cpufreq_freq_transition_begin(policy, &freqs);
- amd_pstate_update(cpudata, cpudata->min_limit_perf, des_perf,
- cpudata->max_limit_perf, fast_switch,
+ amd_pstate_update(cpudata, perf.min_limit_perf, des_perf,
+ perf.max_limit_perf, fast_switch,
policy->governor->flags);
if (!fast_switch)
@@ -675,9 +681,10 @@ static void amd_pstate_adjust_perf(unsig
unsigned long target_perf,
unsigned long capacity)
{
- u8 max_perf, min_perf, des_perf, cap_perf, min_limit_perf;
+ u8 max_perf, min_perf, des_perf, cap_perf;
struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata;
+ union perf_cached perf;
if (!policy)
return;
@@ -687,8 +694,8 @@ static void amd_pstate_adjust_perf(unsig
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
amd_pstate_update_min_max_limit(policy);
- cap_perf = READ_ONCE(cpudata->highest_perf);
- min_limit_perf = READ_ONCE(cpudata->min_limit_perf);
+ perf = READ_ONCE(cpudata->perf);
+ cap_perf = perf.highest_perf;
des_perf = cap_perf;
if (target_perf < capacity)
@@ -699,10 +706,10 @@ static void amd_pstate_adjust_perf(unsig
else
min_perf = cap_perf;
- if (min_perf < min_limit_perf)
- min_perf = min_limit_perf;
+ if (min_perf < perf.min_limit_perf)
+ min_perf = perf.min_limit_perf;
- max_perf = cpudata->max_limit_perf;
+ max_perf = perf.max_limit_perf;
if (max_perf < min_perf)
max_perf = min_perf;
@@ -713,11 +720,12 @@ static void amd_pstate_adjust_perf(unsig
static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
{
struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
u32 nominal_freq, max_freq;
int ret = 0;
nominal_freq = READ_ONCE(cpudata->nominal_freq);
- max_freq = perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf));
+ max_freq = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf);
if (on)
policy->cpuinfo.max_freq = max_freq;
@@ -881,30 +889,30 @@ static u32 amd_pstate_get_transition_lat
}
/*
- * amd_pstate_init_freq: Initialize the max_freq, min_freq,
- * nominal_freq and lowest_nonlinear_freq for
- * the @cpudata object.
+ * amd_pstate_init_freq: Initialize the nominal_freq and lowest_nonlinear_freq
+ * for the @cpudata object.
*
- * Requires: highest_perf, lowest_perf, nominal_perf and
- * lowest_nonlinear_perf members of @cpudata to be
- * initialized.
+ * Requires: all perf members of @cpudata to be initialized.
*
- * Returns 0 on success, non-zero value on failure.
+ * Returns 0 on success, non-zero value on failure.
*/
static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
{
- int ret;
- u32 min_freq, max_freq;
- u32 nominal_freq, lowest_nonlinear_freq;
+ u32 min_freq, max_freq, nominal_freq, lowest_nonlinear_freq;
struct cppc_perf_caps cppc_perf;
+ union perf_cached perf;
+ int ret;
ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
if (ret)
return ret;
+ perf = READ_ONCE(cpudata->perf);
- if (quirks && quirks->lowest_freq)
+ if (quirks && quirks->lowest_freq) {
min_freq = quirks->lowest_freq;
- else
+ perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq);
+ WRITE_ONCE(cpudata->perf, perf);
+ } else
min_freq = cppc_perf.lowest_freq;
if (quirks && quirks->nominal_freq)
@@ -917,8 +925,8 @@ static int amd_pstate_init_freq(struct a
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
- max_freq = perf_to_freq(cpudata, cpudata->highest_perf);
- lowest_nonlinear_freq = perf_to_freq(cpudata, cpudata->lowest_nonlinear_perf);
+ max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf);
+ lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf);
WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
/**
@@ -945,6 +953,7 @@ static int amd_pstate_init_freq(struct a
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata;
+ union perf_cached perf;
struct device *dev;
int ret;
@@ -980,8 +989,14 @@ static int amd_pstate_cpu_init(struct cp
policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu);
policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu);
- policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
+ perf = READ_ONCE(cpudata->perf);
+
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.lowest_perf);
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.highest_perf);
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
@@ -1062,23 +1077,27 @@ static int amd_pstate_cpu_suspend(struct
static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
char *buf)
{
- struct amd_cpudata *cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata;
+ union perf_cached perf;
+ cpudata = policy->driver_data;
+ perf = READ_ONCE(cpudata->perf);
- return sysfs_emit(buf, "%u\n", perf_to_freq(cpudata, READ_ONCE(cpudata->highest_perf)));
+ return sysfs_emit(buf, "%u\n",
+ perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf));
}
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
char *buf)
{
- int freq;
- struct amd_cpudata *cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata;
+ union perf_cached perf;
- freq = READ_ONCE(cpudata->lowest_nonlinear_freq);
- if (freq < 0)
- return freq;
+ cpudata = policy->driver_data;
+ perf = READ_ONCE(cpudata->perf);
- return sysfs_emit(buf, "%u\n", freq);
+ return sysfs_emit(buf, "%u\n",
+ perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_nonlinear_perf));
}
/*
@@ -1088,12 +1107,11 @@ static ssize_t show_amd_pstate_lowest_no
static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
char *buf)
{
- u8 perf;
- struct amd_cpudata *cpudata = policy->driver_data;
+ struct amd_cpudata *cpudata;
- perf = READ_ONCE(cpudata->highest_perf);
+ cpudata = policy->driver_data;
- return sysfs_emit(buf, "%u\n", perf);
+ return sysfs_emit(buf, "%u\n", cpudata->perf.highest_perf);
}
static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
@@ -1424,6 +1442,7 @@ static bool amd_pstate_acpi_pm_profile_u
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata;
+ union perf_cached perf;
struct device *dev;
u64 value;
int ret;
@@ -1457,8 +1476,15 @@ static int amd_pstate_epp_cpu_init(struc
if (ret)
goto free_cpudata1;
- policy->cpuinfo.min_freq = policy->min = perf_to_freq(cpudata, cpudata->lowest_perf);
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(cpudata, cpudata->highest_perf);
+ perf = READ_ONCE(cpudata->perf);
+
+ policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.lowest_perf);
+ policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.highest_perf);
+
/* It will be updated by governor */
policy->cur = policy->cpuinfo.min_freq;
@@ -1519,6 +1545,7 @@ static void amd_pstate_epp_cpu_exit(stru
static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
+ union perf_cached perf;
u8 epp;
if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
@@ -1529,15 +1556,16 @@ static int amd_pstate_epp_update_limit(s
else
epp = READ_ONCE(cpudata->epp_cached);
+ perf = READ_ONCE(cpudata->perf);
if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp,
- cpudata->min_limit_perf,
- cpudata->max_limit_perf,
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp,
+ perf.min_limit_perf,
+ perf.max_limit_perf,
policy->boost_enabled);
}
- return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
- cpudata->max_limit_perf, epp, false);
+ return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U,
+ perf.max_limit_perf, epp, false);
}
static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
@@ -1569,20 +1597,18 @@ static int amd_pstate_epp_set_policy(str
static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- u8 max_perf;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
int ret;
ret = amd_pstate_cppc_enable(true);
if (ret)
pr_err("failed to enable amd pstate during resume, return %d\n", ret);
- max_perf = READ_ONCE(cpudata->highest_perf);
-
if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
cpudata->epp_cached,
FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
- max_perf, policy->boost_enabled);
+ perf.highest_perf, policy->boost_enabled);
}
return amd_pstate_epp_update_limit(policy);
@@ -1606,22 +1632,21 @@ static int amd_pstate_epp_cpu_online(str
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- u8 min_perf;
+ union perf_cached perf = READ_ONCE(cpudata->perf);
if (cpudata->suspended)
return 0;
- min_perf = READ_ONCE(cpudata->lowest_perf);
-
guard(mutex)(&amd_pstate_limits_lock);
if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
AMD_CPPC_EPP_BALANCE_POWERSAVE,
- min_perf, min_perf, policy->boost_enabled);
+ perf.lowest_perf, perf.lowest_perf,
+ policy->boost_enabled);
}
- return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf,
+ return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf,
AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
}
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -13,6 +13,36 @@
/*********************************************************************
* AMD P-state INTERFACE *
*********************************************************************/
+
+/**
+ * union perf_cached - A union to cache performance-related data.
+ * @highest_perf: the maximum performance an individual processor may reach,
+ * assuming ideal conditions
+ * For platforms that support the preferred core feature, the highest_perf value maybe
+ * configured to any value in the range 166-255 by the firmware (because the preferred
+ * core ranking is encoded in the highest_perf value). To maintain consistency across
+ * all platforms, we split the highest_perf and preferred core ranking values into
+ * cpudata->perf.highest_perf and cpudata->prefcore_ranking.
+ * @nominal_perf: the maximum sustained performance level of the processor,
+ * assuming ideal operating conditions
+ * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
+ * savings are achieved
+ * @lowest_perf: the absolute lowest performance level of the processor
+ * @min_limit_perf: Cached value of the performance corresponding to policy->min
+ * @max_limit_perf: Cached value of the performance corresponding to policy->max
+ */
+union perf_cached {
+ struct {
+ u8 highest_perf;
+ u8 nominal_perf;
+ u8 lowest_nonlinear_perf;
+ u8 lowest_perf;
+ u8 min_limit_perf;
+ u8 max_limit_perf;
+ };
+ u64 val;
+};
+
/**
* struct amd_aperf_mperf
* @aperf: actual performance frequency clock count
@@ -30,20 +60,9 @@ struct amd_aperf_mperf {
* @cpu: CPU number
* @req: constraint request to apply
* @cppc_req_cached: cached performance request hints
- * @highest_perf: the maximum performance an individual processor may reach,
- * assuming ideal conditions
- * For platforms that do not support the preferred core feature, the
- * highest_pef may be configured with 166 or 255, to avoid max frequency
- * calculated wrongly. we take the fixed value as the highest_perf.
- * @nominal_perf: the maximum sustained performance level of the processor,
- * assuming ideal operating conditions
- * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
- * savings are achieved
- * @lowest_perf: the absolute lowest performance level of the processor
+ * @perf: cached performance-related data
* @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
* priority.
- * @min_limit_perf: Cached value of the performance corresponding to policy->min
- * @max_limit_perf: Cached value of the performance corresponding to policy->max
* @min_limit_freq: Cached value of policy->min (in khz)
* @max_limit_freq: Cached value of policy->max (in khz)
* @nominal_freq: the frequency (in khz) that mapped to nominal_perf
@@ -68,13 +87,9 @@ struct amd_cpudata {
struct freq_qos_request req[2];
u64 cppc_req_cached;
- u8 highest_perf;
- u8 nominal_perf;
- u8 lowest_nonlinear_perf;
- u8 lowest_perf;
+ union perf_cached perf;
+
u8 prefcore_ranking;
- u8 min_limit_perf;
- u8 max_limit_perf;
u32 min_limit_freq;
u32 max_limit_freq;
u32 nominal_freq;

View File

@@ -1,81 +0,0 @@
From 0daee82069cfe4a322bed954a4a5f19226e49e95 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:20 -0600
Subject: cpufreq/amd-pstate: Overhaul locking
amd_pstate_cpu_boost_update() and refresh_frequency_limits() both
update the policy state and have nothing to do with the amd-pstate
driver itself.
A global "limits" lock doesn't make sense because each CPU can have
policies changed independently. Each time a CPU changes values they
will atomically be written to the per-CPU perf member. Drop per CPU
locking cases.
The remaining "global" driver lock is used to ensure that only one
entity can change driver modes at a given time.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -196,7 +196,6 @@ static inline int get_mode_idx_from_str(
return -EINVAL;
}
-static DEFINE_MUTEX(amd_pstate_limits_lock);
static DEFINE_MUTEX(amd_pstate_driver_lock);
static u8 msr_get_epp(struct amd_cpudata *cpudata)
@@ -1169,8 +1168,6 @@ static ssize_t store_energy_performance_
if (ret < 0)
return -EINVAL;
- guard(mutex)(&amd_pstate_limits_lock);
-
ret = amd_pstate_set_energy_pref_index(policy, ret);
return ret ? ret : count;
@@ -1343,8 +1340,10 @@ int amd_pstate_update_status(const char
if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
return -EINVAL;
- if (mode_state_machine[cppc_state][mode_idx])
+ if (mode_state_machine[cppc_state][mode_idx]) {
+ guard(mutex)(&amd_pstate_driver_lock);
return mode_state_machine[cppc_state][mode_idx](mode_idx);
+ }
return 0;
}
@@ -1365,7 +1364,6 @@ static ssize_t status_store(struct devic
char *p = memchr(buf, '\n', count);
int ret;
- guard(mutex)(&amd_pstate_driver_lock);
ret = amd_pstate_update_status(buf, p ? p - buf : count);
return ret < 0 ? ret : count;
@@ -1637,8 +1635,6 @@ static int amd_pstate_epp_cpu_offline(st
if (cpudata->suspended)
return 0;
- guard(mutex)(&amd_pstate_limits_lock);
-
if (trace_amd_pstate_epp_perf_enabled()) {
trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
AMD_CPPC_EPP_BALANCE_POWERSAVE,
@@ -1678,8 +1674,6 @@ static int amd_pstate_epp_resume(struct
struct amd_cpudata *cpudata = policy->driver_data;
if (cpudata->suspended) {
- guard(mutex)(&amd_pstate_limits_lock);
-
/* enable amd pstate from suspend state*/
amd_pstate_epp_reenable(policy);

View File

@@ -1,48 +0,0 @@
From 7c820a91ffd02aa7e426e8801893575f218a7a80 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:21 -0600
Subject: cpufreq/amd-pstate: Drop `cppc_cap1_cached`
The `cppc_cap1_cached` variable isn't used at all, there is no
need to read it at initialization for each CPU.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 5 -----
drivers/cpufreq/amd-pstate.h | 2 --
2 files changed, 7 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1508,11 +1508,6 @@ static int amd_pstate_epp_cpu_init(struc
if (ret)
return ret;
WRITE_ONCE(cpudata->cppc_req_cached, value);
-
- ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
- if (ret)
- return ret;
- WRITE_ONCE(cpudata->cppc_cap1_cached, value);
}
ret = amd_pstate_set_epp(cpudata, cpudata->epp_default);
if (ret)
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -76,7 +76,6 @@ struct amd_aperf_mperf {
* AMD P-State driver supports preferred core featue.
* @epp_cached: Cached CPPC energy-performance preference value
* @policy: Cpufreq policy value
- * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
*
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
* represents all the attributes and goals that AMD P-State requests at runtime.
@@ -105,7 +104,6 @@ struct amd_cpudata {
/* EPP feature related attributes*/
u8 epp_cached;
u32 policy;
- u64 cppc_cap1_cached;
bool suspended;
u8 epp_default;
};

View File

@@ -1,144 +0,0 @@
From 5d0c340db98de378a11abfbaf587b6e601e7291c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:22 -0600
Subject: cpufreq/amd-pstate-ut: Use _free macro to free put policy
Using a scoped cleanup macro simplifies cleanup code.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 33 ++++++++++++++-------------------
1 file changed, 14 insertions(+), 19 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -26,6 +26,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/fs.h>
+#include <linux/cleanup.h>
#include <acpi/cppc_acpi.h>
@@ -127,11 +128,12 @@ static void amd_pstate_ut_check_perf(u32
u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0;
u64 cap1 = 0;
struct cppc_perf_caps cppc_perf;
- struct cpufreq_policy *policy = NULL;
struct amd_cpudata *cpudata = NULL;
union perf_cached cur_perf;
for_each_possible_cpu(cpu) {
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+
policy = cpufreq_cpu_get(cpu);
if (!policy)
break;
@@ -142,7 +144,7 @@ static void amd_pstate_ut_check_perf(u32
if (ret) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
- goto skip_test;
+ return;
}
highest_perf = cppc_perf.highest_perf;
@@ -154,7 +156,7 @@ static void amd_pstate_ut_check_perf(u32
if (ret) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
- goto skip_test;
+ return;
}
highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
@@ -167,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32
if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
__func__, cpu, highest_perf, cur_perf.highest_perf);
- goto skip_test;
+ return;
}
if (nominal_perf != cur_perf.nominal_perf ||
(lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
@@ -177,7 +179,7 @@ static void amd_pstate_ut_check_perf(u32
__func__, cpu, nominal_perf, cur_perf.nominal_perf,
lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf,
lowest_perf, cur_perf.lowest_perf);
- goto skip_test;
+ return;
}
if (!((highest_perf >= nominal_perf) &&
@@ -188,15 +190,11 @@ static void amd_pstate_ut_check_perf(u32
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
__func__, cpu, highest_perf, nominal_perf,
lowest_nonlinear_perf, lowest_perf);
- goto skip_test;
+ return;
}
- cpufreq_cpu_put(policy);
}
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- return;
-skip_test:
- cpufreq_cpu_put(policy);
}
/*
@@ -207,10 +205,11 @@ skip_test:
static void amd_pstate_ut_check_freq(u32 index)
{
int cpu = 0;
- struct cpufreq_policy *policy = NULL;
struct amd_cpudata *cpudata = NULL;
for_each_possible_cpu(cpu) {
+ struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+
policy = cpufreq_cpu_get(cpu);
if (!policy)
break;
@@ -224,14 +223,14 @@ static void amd_pstate_ut_check_freq(u32
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
__func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq,
cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq);
- goto skip_test;
+ return;
}
if (cpudata->lowest_nonlinear_freq != policy->min) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n",
__func__, cpu, cpudata->lowest_nonlinear_freq, policy->min);
- goto skip_test;
+ return;
}
if (cpudata->boost_supported) {
@@ -243,20 +242,16 @@ static void amd_pstate_ut_check_freq(u32
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
__func__, cpu, policy->max, policy->cpuinfo.max_freq,
cpudata->nominal_freq);
- goto skip_test;
+ return;
}
} else {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d must support boost!\n", __func__, cpu);
- goto skip_test;
+ return;
}
- cpufreq_cpu_put(policy);
}
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- return;
-skip_test:
- cpufreq_cpu_put(policy);
}
static int amd_pstate_set_mode(enum amd_pstate_mode mode)

View File

@@ -1,37 +0,0 @@
From 8937b7068ca30072c4c4cf4c22000112afbd6839 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:23 -0600
Subject: cpufreq/amd-pstate-ut: Allow lowest nonlinear and lowest to be the
same
Several Ryzen AI processors support the exact same value for lowest
nonlinear perf and lowest perf. Loosen up the unit tests to allow this
scenario.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -184,7 +184,7 @@ static void amd_pstate_ut_check_perf(u32
if (!((highest_perf >= nominal_perf) &&
(nominal_perf > lowest_nonlinear_perf) &&
- (lowest_nonlinear_perf > lowest_perf) &&
+ (lowest_nonlinear_perf >= lowest_perf) &&
(lowest_perf > 0))) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
@@ -217,7 +217,7 @@ static void amd_pstate_ut_check_freq(u32
if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) &&
(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
- (cpudata->lowest_nonlinear_freq > policy->cpuinfo.min_freq) &&
+ (cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) &&
(policy->cpuinfo.min_freq > 0))) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",

View File

@@ -1,309 +0,0 @@
From 8cb701e059fa08dcb9ab74e3c84abc224ff72714 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:24 -0600
Subject: cpufreq/amd-pstate-ut: Drop SUCCESS and FAIL enums
Enums are effectively used as a boolean and don't show
the return value of the failing call.
Instead of using enums switch to returning the actual return
code from the unit test.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 143 ++++++++++++--------------------
1 file changed, 55 insertions(+), 88 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -32,30 +32,20 @@
#include "amd-pstate.h"
-/*
- * Abbreviations:
- * amd_pstate_ut: used as a shortform for AMD P-State unit test.
- * It helps to keep variable names smaller, simpler
- */
-enum amd_pstate_ut_result {
- AMD_PSTATE_UT_RESULT_PASS,
- AMD_PSTATE_UT_RESULT_FAIL,
-};
struct amd_pstate_ut_struct {
const char *name;
- void (*func)(u32 index);
- enum amd_pstate_ut_result result;
+ int (*func)(u32 index);
};
/*
* Kernel module for testing the AMD P-State unit test
*/
-static void amd_pstate_ut_acpi_cpc_valid(u32 index);
-static void amd_pstate_ut_check_enabled(u32 index);
-static void amd_pstate_ut_check_perf(u32 index);
-static void amd_pstate_ut_check_freq(u32 index);
-static void amd_pstate_ut_check_driver(u32 index);
+static int amd_pstate_ut_acpi_cpc_valid(u32 index);
+static int amd_pstate_ut_check_enabled(u32 index);
+static int amd_pstate_ut_check_perf(u32 index);
+static int amd_pstate_ut_check_freq(u32 index);
+static int amd_pstate_ut_check_driver(u32 index);
static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
{"amd_pstate_ut_acpi_cpc_valid", amd_pstate_ut_acpi_cpc_valid },
@@ -78,51 +68,46 @@ static bool get_shared_mem(void)
/*
* check the _CPC object is present in SBIOS.
*/
-static void amd_pstate_ut_acpi_cpc_valid(u32 index)
+static int amd_pstate_ut_acpi_cpc_valid(u32 index)
{
- if (acpi_cpc_valid())
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- else {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
+ if (!acpi_cpc_valid()) {
pr_err("%s the _CPC object is not present in SBIOS!\n", __func__);
+ return -EINVAL;
}
+
+ return 0;
}
-static void amd_pstate_ut_pstate_enable(u32 index)
+/*
+ * check if amd pstate is enabled
+ */
+static int amd_pstate_ut_check_enabled(u32 index)
{
- int ret = 0;
u64 cppc_enable = 0;
+ int ret;
+
+ if (get_shared_mem())
+ return 0;
ret = rdmsrl_safe(MSR_AMD_CPPC_ENABLE, &cppc_enable);
if (ret) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s rdmsrl_safe MSR_AMD_CPPC_ENABLE ret=%d error!\n", __func__, ret);
- return;
+ return ret;
}
- if (cppc_enable)
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- else {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
+
+ if (!cppc_enable) {
pr_err("%s amd pstate must be enabled!\n", __func__);
+ return -EINVAL;
}
-}
-/*
- * check if amd pstate is enabled
- */
-static void amd_pstate_ut_check_enabled(u32 index)
-{
- if (get_shared_mem())
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- else
- amd_pstate_ut_pstate_enable(index);
+ return 0;
}
/*
* check if performance values are reasonable.
* highest_perf >= nominal_perf > lowest_nonlinear_perf > lowest_perf > 0
*/
-static void amd_pstate_ut_check_perf(u32 index)
+static int amd_pstate_ut_check_perf(u32 index)
{
int cpu = 0, ret = 0;
u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0;
@@ -142,9 +127,8 @@ static void amd_pstate_ut_check_perf(u32
if (get_shared_mem()) {
ret = cppc_get_perf_caps(cpu, &cppc_perf);
if (ret) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
- return;
+ return ret;
}
highest_perf = cppc_perf.highest_perf;
@@ -154,9 +138,8 @@ static void amd_pstate_ut_check_perf(u32
} else {
ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
if (ret) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
- return;
+ return ret;
}
highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
@@ -169,32 +152,30 @@ static void amd_pstate_ut_check_perf(u32
if (highest_perf != cur_perf.highest_perf && !cpudata->hw_prefcore) {
pr_err("%s cpu%d highest=%d %d highest perf doesn't match\n",
__func__, cpu, highest_perf, cur_perf.highest_perf);
- return;
+ return -EINVAL;
}
if (nominal_perf != cur_perf.nominal_perf ||
(lowest_nonlinear_perf != cur_perf.lowest_nonlinear_perf) ||
(lowest_perf != cur_perf.lowest_perf)) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d nominal=%d %d lowest_nonlinear=%d %d lowest=%d %d, they should be equal!\n",
__func__, cpu, nominal_perf, cur_perf.nominal_perf,
lowest_nonlinear_perf, cur_perf.lowest_nonlinear_perf,
lowest_perf, cur_perf.lowest_perf);
- return;
+ return -EINVAL;
}
if (!((highest_perf >= nominal_perf) &&
(nominal_perf > lowest_nonlinear_perf) &&
(lowest_nonlinear_perf >= lowest_perf) &&
(lowest_perf > 0))) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
__func__, cpu, highest_perf, nominal_perf,
lowest_nonlinear_perf, lowest_perf);
- return;
+ return -EINVAL;
}
}
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
+ return 0;
}
/*
@@ -202,7 +183,7 @@ static void amd_pstate_ut_check_perf(u32
* max_freq >= nominal_freq > lowest_nonlinear_freq > min_freq > 0
* check max freq when set support boost mode.
*/
-static void amd_pstate_ut_check_freq(u32 index)
+static int amd_pstate_ut_check_freq(u32 index)
{
int cpu = 0;
struct amd_cpudata *cpudata = NULL;
@@ -219,39 +200,33 @@ static void amd_pstate_ut_check_freq(u32
(cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
(cpudata->lowest_nonlinear_freq >= policy->cpuinfo.min_freq) &&
(policy->cpuinfo.min_freq > 0))) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
__func__, cpu, policy->cpuinfo.max_freq, cpudata->nominal_freq,
cpudata->lowest_nonlinear_freq, policy->cpuinfo.min_freq);
- return;
+ return -EINVAL;
}
if (cpudata->lowest_nonlinear_freq != policy->min) {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d cpudata_lowest_nonlinear_freq=%d policy_min=%d, they should be equal!\n",
__func__, cpu, cpudata->lowest_nonlinear_freq, policy->min);
- return;
+ return -EINVAL;
}
if (cpudata->boost_supported) {
- if ((policy->max == policy->cpuinfo.max_freq) ||
- (policy->max == cpudata->nominal_freq))
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
- else {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
+ if ((policy->max != policy->cpuinfo.max_freq) &&
+ (policy->max != cpudata->nominal_freq)) {
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
__func__, cpu, policy->max, policy->cpuinfo.max_freq,
cpudata->nominal_freq);
- return;
+ return -EINVAL;
}
} else {
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d must support boost!\n", __func__, cpu);
- return;
+ return -EINVAL;
}
}
- amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
+ return 0;
}
static int amd_pstate_set_mode(enum amd_pstate_mode mode)
@@ -263,32 +238,28 @@ static int amd_pstate_set_mode(enum amd_
return amd_pstate_update_status(mode_str, strlen(mode_str));
}
-static void amd_pstate_ut_check_driver(u32 index)
+static int amd_pstate_ut_check_driver(u32 index)
{
enum amd_pstate_mode mode1, mode2 = AMD_PSTATE_DISABLE;
- int ret;
for (mode1 = AMD_PSTATE_DISABLE; mode1 < AMD_PSTATE_MAX; mode1++) {
- ret = amd_pstate_set_mode(mode1);
+ int ret = amd_pstate_set_mode(mode1);
if (ret)
- goto out;
+ return ret;
for (mode2 = AMD_PSTATE_DISABLE; mode2 < AMD_PSTATE_MAX; mode2++) {
if (mode1 == mode2)
continue;
ret = amd_pstate_set_mode(mode2);
- if (ret)
- goto out;
+ if (ret) {
+ pr_err("%s: failed to update status for %s->%s\n", __func__,
+ amd_pstate_get_mode_string(mode1),
+ amd_pstate_get_mode_string(mode2));
+ return ret;
+ }
}
}
-out:
- if (ret)
- pr_warn("%s: failed to update status for %s->%s: %d\n", __func__,
- amd_pstate_get_mode_string(mode1),
- amd_pstate_get_mode_string(mode2), ret);
-
- amd_pstate_ut_cases[index].result = ret ?
- AMD_PSTATE_UT_RESULT_FAIL :
- AMD_PSTATE_UT_RESULT_PASS;
+
+ return 0;
}
static int __init amd_pstate_ut_init(void)
@@ -296,16 +267,12 @@ static int __init amd_pstate_ut_init(voi
u32 i = 0, arr_size = ARRAY_SIZE(amd_pstate_ut_cases);
for (i = 0; i < arr_size; i++) {
- amd_pstate_ut_cases[i].func(i);
- switch (amd_pstate_ut_cases[i].result) {
- case AMD_PSTATE_UT_RESULT_PASS:
+ int ret = amd_pstate_ut_cases[i].func(i);
+
+ if (ret)
+ pr_err("%-4d %-20s\t fail: %d!\n", i+1, amd_pstate_ut_cases[i].name, ret);
+ else
pr_info("%-4d %-20s\t success!\n", i+1, amd_pstate_ut_cases[i].name);
- break;
- case AMD_PSTATE_UT_RESULT_FAIL:
- default:
- pr_info("%-4d %-20s\t fail!\n", i+1, amd_pstate_ut_cases[i].name);
- break;
- }
}
return 0;

View File

@@ -1,50 +0,0 @@
From c553e0165997349a3f831fa04bdd7f61913a3442 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:25 -0600
Subject: cpufreq/amd-pstate-ut: Run on all of the correct CPUs
If a CPU is missing a policy or one has been offlined then the unit test
is skipped for the rest of the CPUs on the system.
Instead; iterate online CPUs and skip any missing policies to allow
continuing to test the rest of them.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -116,12 +116,12 @@ static int amd_pstate_ut_check_perf(u32
struct amd_cpudata *cpudata = NULL;
union perf_cached cur_perf;
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
policy = cpufreq_cpu_get(cpu);
if (!policy)
- break;
+ continue;
cpudata = policy->driver_data;
if (get_shared_mem()) {
@@ -188,12 +188,12 @@ static int amd_pstate_ut_check_freq(u32
int cpu = 0;
struct amd_cpudata *cpudata = NULL;
- for_each_possible_cpu(cpu) {
+ for_each_online_cpu(cpu) {
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
policy = cpufreq_cpu_get(cpu);
if (!policy)
- break;
+ continue;
cpudata = policy->driver_data;
if (!((policy->cpuinfo.max_freq >= cpudata->nominal_freq) &&

View File

@@ -1,42 +0,0 @@
From c4197fd693cb98a8a71557187a7cf592d6b68b3c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:26 -0600
Subject: cpufreq/amd-pstate-ut: Adjust variable scope
In amd_pstate_ut_check_freq() and amd_pstate_ut_check_perf() the cpudata
variable is only needed in the scope of the for loop. Move it there.
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-ut.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -113,11 +113,11 @@ static int amd_pstate_ut_check_perf(u32
u32 highest_perf = 0, nominal_perf = 0, lowest_nonlinear_perf = 0, lowest_perf = 0;
u64 cap1 = 0;
struct cppc_perf_caps cppc_perf;
- struct amd_cpudata *cpudata = NULL;
union perf_cached cur_perf;
for_each_online_cpu(cpu) {
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+ struct amd_cpudata *cpudata;
policy = cpufreq_cpu_get(cpu);
if (!policy)
@@ -186,10 +186,10 @@ static int amd_pstate_ut_check_perf(u32
static int amd_pstate_ut_check_freq(u32 index)
{
int cpu = 0;
- struct amd_cpudata *cpudata = NULL;
for_each_online_cpu(cpu) {
struct cpufreq_policy *policy __free(put_cpufreq_policy) = NULL;
+ struct amd_cpudata *cpudata;
policy = cpufreq_cpu_get(cpu);
if (!policy)

View File

@@ -1,123 +0,0 @@
From 19c375251767f49b62894d3b4782f0b8b01313b8 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:27 -0600
Subject: cpufreq/amd-pstate: Replace all AMD_CPPC_* macros with masks
Bitfield masks are easier to follow and less error prone.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
arch/x86/include/asm/msr-index.h | 20 +++++++++++---------
arch/x86/kernel/acpi/cppc.c | 4 +++-
drivers/cpufreq/amd-pstate-ut.c | 9 +++++----
drivers/cpufreq/amd-pstate.c | 16 ++++++----------
4 files changed, 25 insertions(+), 24 deletions(-)
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -709,15 +709,17 @@
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
-#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
-#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
-#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
-#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
+/* Masks for use with MSR_AMD_CPPC_CAP1 */
+#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24)
-#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
-#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
-#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
-#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+/* Masks for use with MSR_AMD_CPPC_REQ */
+#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -4,6 +4,8 @@
* Copyright (c) 2016, Intel Corporation.
*/
+#include <linux/bitfield.h>
+
#include <acpi/cppc_acpi.h>
#include <asm/msr.h>
#include <asm/processor.h>
@@ -149,7 +151,7 @@ int amd_get_highest_perf(unsigned int cp
if (ret)
goto out;
- val = AMD_CPPC_HIGHEST_PERF(val);
+ val = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, val);
} else {
ret = cppc_get_highest_perf(cpu, &val);
if (ret)
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -22,6 +22,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bitfield.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -142,10 +143,10 @@ static int amd_pstate_ut_check_perf(u32
return ret;
}
- highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
- nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
- lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
- lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
+ highest_perf = FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1);
+ nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1);
+ lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1);
+ lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
}
cur_perf = READ_ONCE(cpudata->perf);
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -89,11 +89,6 @@ static bool cppc_enabled;
static bool amd_pstate_prefcore = true;
static struct quirk_entry *quirks;
-#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
-#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
-#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
-#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
-
/*
* AMD Energy Preference Performance (EPP)
* The EPP is used in the CCLK DPM controller to drive
@@ -439,12 +434,13 @@ static int msr_init_perf(struct amd_cpud
perf.highest_perf = numerator;
perf.max_limit_perf = numerator;
- perf.min_limit_perf = AMD_CPPC_LOWEST_PERF(cap1);
- perf.nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
- perf.lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
- perf.lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
+ perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
+ perf.nominal_perf = FIELD_GET(AMD_CPPC_NOMINAL_PERF_MASK, cap1);
+ perf.lowest_nonlinear_perf = FIELD_GET(AMD_CPPC_LOWNONLIN_PERF_MASK, cap1);
+ perf.lowest_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
WRITE_ONCE(cpudata->perf, perf);
- WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
+ WRITE_ONCE(cpudata->prefcore_ranking, FIELD_GET(AMD_CPPC_HIGHEST_PERF_MASK, cap1));
+
return 0;
}

View File

@@ -1,60 +0,0 @@
From bb7fadf4a86e19b52cbe850c9274bfa643d3ce52 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:28 -0600
Subject: cpufreq/amd-pstate: Cache CPPC request in shared mem case too
In order to prevent a potential write for shmem_update_perf()
cache the request into the cppc_req_cached variable normally only
used for the MSR case.
This adds symmetry into the code and potentially avoids extra writes.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -496,6 +496,8 @@ static int shmem_update_perf(struct amd_
u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
{
struct cppc_perf_ctrls perf_ctrls;
+ u64 value, prev;
+ int ret;
if (cppc_state == AMD_PSTATE_ACTIVE) {
int ret = shmem_set_epp(cpudata, epp);
@@ -504,11 +506,29 @@ static int shmem_update_perf(struct amd_
return ret;
}
+ value = prev = READ_ONCE(cpudata->cppc_req_cached);
+
+ value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
+ AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK);
+ value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
+ value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf);
+ value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+
+ if (value == prev)
+ return 0;
+
perf_ctrls.max_perf = max_perf;
perf_ctrls.min_perf = min_perf;
perf_ctrls.desired_perf = des_perf;
- return cppc_set_perf(cpudata->cpu, &perf_ctrls);
+ ret = cppc_set_perf(cpudata->cpu, &perf_ctrls);
+ if (ret)
+ return ret;
+
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+ return 0;
}
static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)

View File

@@ -1,318 +0,0 @@
From e02f8a14d44223160d348d5841cc3dd916a14401 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:29 -0600
Subject: cpufreq/amd-pstate: Move all EPP tracing into *_update_perf and
*_set_epp functions
The EPP tracing is done by the caller today, but this precludes the
information about whether the CPPC request has changed.
Move it into the update_perf and set_epp functions and include information
about whether the request has changed from the last one.
amd_pstate_update_perf() and amd_pstate_set_epp() now require the policy
as an argument instead of the cpudata.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate-trace.h | 13 +++-
drivers/cpufreq/amd-pstate.c | 118 +++++++++++++++++------------
2 files changed, 80 insertions(+), 51 deletions(-)
--- a/drivers/cpufreq/amd-pstate-trace.h
+++ b/drivers/cpufreq/amd-pstate-trace.h
@@ -90,7 +90,8 @@ TRACE_EVENT(amd_pstate_epp_perf,
u8 epp,
u8 min_perf,
u8 max_perf,
- bool boost
+ bool boost,
+ bool changed
),
TP_ARGS(cpu_id,
@@ -98,7 +99,8 @@ TRACE_EVENT(amd_pstate_epp_perf,
epp,
min_perf,
max_perf,
- boost),
+ boost,
+ changed),
TP_STRUCT__entry(
__field(unsigned int, cpu_id)
@@ -107,6 +109,7 @@ TRACE_EVENT(amd_pstate_epp_perf,
__field(u8, min_perf)
__field(u8, max_perf)
__field(bool, boost)
+ __field(bool, changed)
),
TP_fast_assign(
@@ -116,15 +119,17 @@ TRACE_EVENT(amd_pstate_epp_perf,
__entry->min_perf = min_perf;
__entry->max_perf = max_perf;
__entry->boost = boost;
+ __entry->changed = changed;
),
- TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u",
+ TP_printk("cpu%u: [%hhu<->%hhu]/%hhu, epp=%hhu, boost=%u, changed=%u",
(unsigned int)__entry->cpu_id,
(u8)__entry->min_perf,
(u8)__entry->max_perf,
(u8)__entry->highest_perf,
(u8)__entry->epp,
- (bool)__entry->boost
+ (bool)__entry->boost,
+ (bool)__entry->changed
)
);
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -228,9 +228,10 @@ static u8 shmem_get_epp(struct amd_cpuda
return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, epp);
}
-static int msr_update_perf(struct amd_cpudata *cpudata, u8 min_perf,
+static int msr_update_perf(struct cpufreq_policy *policy, u8 min_perf,
u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
u64 value, prev;
value = prev = READ_ONCE(cpudata->cppc_req_cached);
@@ -242,6 +243,18 @@ static int msr_update_perf(struct amd_cp
value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+
+ trace_amd_pstate_epp_perf(cpudata->cpu,
+ perf.highest_perf,
+ epp,
+ min_perf,
+ max_perf,
+ policy->boost_enabled,
+ value != prev);
+ }
+
if (value == prev)
return 0;
@@ -256,24 +269,26 @@ static int msr_update_perf(struct amd_cp
}
WRITE_ONCE(cpudata->cppc_req_cached, value);
- WRITE_ONCE(cpudata->epp_cached, epp);
+ if (epp != cpudata->epp_cached)
+ WRITE_ONCE(cpudata->epp_cached, epp);
return 0;
}
DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
-static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata,
+static inline int amd_pstate_update_perf(struct cpufreq_policy *policy,
u8 min_perf, u8 des_perf,
u8 max_perf, u8 epp,
bool fast_switch)
{
- return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
+ return static_call(amd_pstate_update_perf)(policy, min_perf, des_perf,
max_perf, epp, fast_switch);
}
-static int msr_set_epp(struct amd_cpudata *cpudata, u8 epp)
+static int msr_set_epp(struct cpufreq_policy *policy, u8 epp)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
u64 value, prev;
int ret;
@@ -281,6 +296,19 @@ static int msr_set_epp(struct amd_cpudat
value &= ~AMD_CPPC_EPP_PERF_MASK;
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ union perf_cached perf = cpudata->perf;
+
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
+ epp,
+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK,
+ cpudata->cppc_req_cached),
+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK,
+ cpudata->cppc_req_cached),
+ policy->boost_enabled,
+ value != prev);
+ }
+
if (value == prev)
return 0;
@@ -299,15 +327,29 @@ static int msr_set_epp(struct amd_cpudat
DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp);
-static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u8 epp)
+static inline int amd_pstate_set_epp(struct cpufreq_policy *policy, u8 epp)
{
- return static_call(amd_pstate_set_epp)(cpudata, epp);
+ return static_call(amd_pstate_set_epp)(policy, epp);
}
-static int shmem_set_epp(struct amd_cpudata *cpudata, u8 epp)
+static int shmem_set_epp(struct cpufreq_policy *policy, u8 epp)
{
- int ret;
+ struct amd_cpudata *cpudata = policy->driver_data;
struct cppc_perf_ctrls perf_ctrls;
+ int ret;
+
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ union perf_cached perf = cpudata->perf;
+
+ trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
+ epp,
+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK,
+ cpudata->cppc_req_cached),
+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK,
+ cpudata->cppc_req_cached),
+ policy->boost_enabled,
+ epp != cpudata->epp_cached);
+ }
if (epp == cpudata->epp_cached)
return 0;
@@ -339,17 +381,7 @@ static int amd_pstate_set_energy_pref_in
return -EBUSY;
}
- if (trace_amd_pstate_epp_perf_enabled()) {
- union perf_cached perf = READ_ONCE(cpudata->perf);
-
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
- epp,
- FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
- FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
- policy->boost_enabled);
- }
-
- return amd_pstate_set_epp(cpudata, epp);
+ return amd_pstate_set_epp(policy, epp);
}
static inline int msr_cppc_enable(bool enable)
@@ -492,15 +524,16 @@ static inline int amd_pstate_init_perf(s
return static_call(amd_pstate_init_perf)(cpudata);
}
-static int shmem_update_perf(struct amd_cpudata *cpudata, u8 min_perf,
+static int shmem_update_perf(struct cpufreq_policy *policy, u8 min_perf,
u8 des_perf, u8 max_perf, u8 epp, bool fast_switch)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
struct cppc_perf_ctrls perf_ctrls;
u64 value, prev;
int ret;
if (cppc_state == AMD_PSTATE_ACTIVE) {
- int ret = shmem_set_epp(cpudata, epp);
+ int ret = shmem_set_epp(policy, epp);
if (ret)
return ret;
@@ -515,6 +548,18 @@ static int shmem_update_perf(struct amd_
value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ union perf_cached perf = READ_ONCE(cpudata->perf);
+
+ trace_amd_pstate_epp_perf(cpudata->cpu,
+ perf.highest_perf,
+ epp,
+ min_perf,
+ max_perf,
+ policy->boost_enabled,
+ value != prev);
+ }
+
if (value == prev)
return 0;
@@ -592,7 +637,7 @@ static void amd_pstate_update(struct amd
cpudata->cpu, fast_switch);
}
- amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch);
+ amd_pstate_update_perf(policy, min_perf, des_perf, max_perf, 0, fast_switch);
}
static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
@@ -1525,7 +1570,7 @@ static int amd_pstate_epp_cpu_init(struc
return ret;
WRITE_ONCE(cpudata->cppc_req_cached, value);
}
- ret = amd_pstate_set_epp(cpudata, cpudata->epp_default);
+ ret = amd_pstate_set_epp(policy, cpudata->epp_default);
if (ret)
return ret;
@@ -1566,14 +1611,8 @@ static int amd_pstate_epp_update_limit(s
epp = READ_ONCE(cpudata->epp_cached);
perf = READ_ONCE(cpudata->perf);
- if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf, epp,
- perf.min_limit_perf,
- perf.max_limit_perf,
- policy->boost_enabled);
- }
- return amd_pstate_update_perf(cpudata, perf.min_limit_perf, 0U,
+ return amd_pstate_update_perf(policy, perf.min_limit_perf, 0U,
perf.max_limit_perf, epp, false);
}
@@ -1605,20 +1644,12 @@ static int amd_pstate_epp_set_policy(str
static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
{
- struct amd_cpudata *cpudata = policy->driver_data;
- union perf_cached perf = READ_ONCE(cpudata->perf);
int ret;
ret = amd_pstate_cppc_enable(true);
if (ret)
pr_err("failed to enable amd pstate during resume, return %d\n", ret);
- if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
- cpudata->epp_cached,
- FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
- perf.highest_perf, policy->boost_enabled);
- }
return amd_pstate_epp_update_limit(policy);
}
@@ -1646,14 +1677,7 @@ static int amd_pstate_epp_cpu_offline(st
if (cpudata->suspended)
return 0;
- if (trace_amd_pstate_epp_perf_enabled()) {
- trace_amd_pstate_epp_perf(cpudata->cpu, perf.highest_perf,
- AMD_CPPC_EPP_BALANCE_POWERSAVE,
- perf.lowest_perf, perf.lowest_perf,
- policy->boost_enabled);
- }
-
- return amd_pstate_update_perf(cpudata, perf.lowest_perf, 0, perf.lowest_perf,
+ return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf,
AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
}

View File

@@ -1,37 +0,0 @@
From 5f0b3bf5497422293576a0783e47d203c52ed863 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:30 -0600
Subject: cpufreq/amd-pstate: Update cppc_req_cached for shared mem EPP writes
On EPP only writes update the cached variable so that the min/max
performance controls don't need to be updated again.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 6 ++++++
1 file changed, 6 insertions(+)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -336,6 +336,7 @@ static int shmem_set_epp(struct cpufreq_
{
struct amd_cpudata *cpudata = policy->driver_data;
struct cppc_perf_ctrls perf_ctrls;
+ u64 value;
int ret;
if (trace_amd_pstate_epp_perf_enabled()) {
@@ -362,6 +363,11 @@ static int shmem_set_epp(struct cpufreq_
}
WRITE_ONCE(cpudata->epp_cached, epp);
+ value = READ_ONCE(cpudata->cppc_req_cached);
+ value &= ~AMD_CPPC_EPP_PERF_MASK;
+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
return ret;
}

View File

@@ -1,38 +0,0 @@
From 6c2201fe880d7d35fbde67d74ec1989f053cc0bd Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:31 -0600
Subject: cpufreq/amd-pstate: Drop debug statements for policy setting
There are trace events that exist now for all amd-pstate modes that
will output information right before programming to the hardware.
This makes the existing debug statements unnecessary remaining
overhead. Drop them.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 4 ----
1 file changed, 4 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -667,7 +667,6 @@ static int amd_pstate_verify(struct cpuf
}
cpufreq_verify_within_cpu_limits(policy_data);
- pr_debug("policy_max =%d, policy_min=%d\n", policy_data->max, policy_data->min);
return 0;
}
@@ -1630,9 +1629,6 @@ static int amd_pstate_epp_set_policy(str
if (!policy->cpuinfo.max_freq)
return -ENODEV;
- pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
- policy->cpuinfo.max_freq, policy->max);
-
cpudata->policy = policy->policy;
ret = amd_pstate_epp_update_limit(policy);

View File

@@ -1,327 +0,0 @@
From 3c5030a27361deff20bec5d43339109901f3198c Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:32 -0600
Subject: cpufreq/amd-pstate: Rework CPPC enabling
The CPPC enable register is configured as "write once". That is
any future writes don't actually do anything.
Because of this, all the cleanup paths that currently exist for
CPPC disable are non-effective.
Rework CPPC enable to only enable after all the CAP registers have
been read to avoid enabling CPPC on CPUs with invalid _CPC or
unpopulated MSRs.
As the register is write once, remove all cleanup paths as well.
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 179 +++++++----------------------------
1 file changed, 35 insertions(+), 144 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -85,7 +85,6 @@ static struct cpufreq_driver *current_ps
static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
static int cppc_state = AMD_PSTATE_UNDEFINED;
-static bool cppc_enabled;
static bool amd_pstate_prefcore = true;
static struct quirk_entry *quirks;
@@ -371,89 +370,21 @@ static int shmem_set_epp(struct cpufreq_
return ret;
}
-static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy,
- int pref_index)
+static inline int msr_cppc_enable(struct cpufreq_policy *policy)
{
- struct amd_cpudata *cpudata = policy->driver_data;
- u8 epp;
-
- if (!pref_index)
- epp = cpudata->epp_default;
- else
- epp = epp_values[pref_index];
-
- if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
- pr_debug("EPP cannot be set under performance policy\n");
- return -EBUSY;
- }
-
- return amd_pstate_set_epp(policy, epp);
-}
-
-static inline int msr_cppc_enable(bool enable)
-{
- int ret, cpu;
- unsigned long logical_proc_id_mask = 0;
-
- /*
- * MSR_AMD_CPPC_ENABLE is write-once, once set it cannot be cleared.
- */
- if (!enable)
- return 0;
-
- if (enable == cppc_enabled)
- return 0;
-
- for_each_present_cpu(cpu) {
- unsigned long logical_id = topology_logical_package_id(cpu);
-
- if (test_bit(logical_id, &logical_proc_id_mask))
- continue;
-
- set_bit(logical_id, &logical_proc_id_mask);
-
- ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
- enable);
- if (ret)
- return ret;
- }
-
- cppc_enabled = enable;
- return 0;
+ return wrmsrl_safe_on_cpu(policy->cpu, MSR_AMD_CPPC_ENABLE, 1);
}
-static int shmem_cppc_enable(bool enable)
+static int shmem_cppc_enable(struct cpufreq_policy *policy)
{
- int cpu, ret = 0;
- struct cppc_perf_ctrls perf_ctrls;
-
- if (enable == cppc_enabled)
- return 0;
-
- for_each_present_cpu(cpu) {
- ret = cppc_set_enable(cpu, enable);
- if (ret)
- return ret;
-
- /* Enable autonomous mode for EPP */
- if (cppc_state == AMD_PSTATE_ACTIVE) {
- /* Set desired perf as zero to allow EPP firmware control */
- perf_ctrls.desired_perf = 0;
- ret = cppc_set_perf(cpu, &perf_ctrls);
- if (ret)
- return ret;
- }
- }
-
- cppc_enabled = enable;
- return ret;
+ return cppc_set_enable(policy->cpu, 1);
}
DEFINE_STATIC_CALL(amd_pstate_cppc_enable, msr_cppc_enable);
-static inline int amd_pstate_cppc_enable(bool enable)
+static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
{
- return static_call(amd_pstate_cppc_enable)(enable);
+ return static_call(amd_pstate_cppc_enable)(policy);
}
static int msr_init_perf(struct amd_cpudata *cpudata)
@@ -1063,6 +994,10 @@ static int amd_pstate_cpu_init(struct cp
cpudata->nominal_freq,
perf.highest_perf);
+ ret = amd_pstate_cppc_enable(policy);
+ if (ret)
+ goto free_cpudata1;
+
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
/* It will be updated by governor */
@@ -1110,28 +1045,6 @@ static void amd_pstate_cpu_exit(struct c
kfree(cpudata);
}
-static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
-{
- int ret;
-
- ret = amd_pstate_cppc_enable(true);
- if (ret)
- pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
-
- return ret;
-}
-
-static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
-{
- int ret;
-
- ret = amd_pstate_cppc_enable(false);
- if (ret)
- pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
-
- return ret;
-}
-
/* Sysfs attributes */
/*
@@ -1223,8 +1136,10 @@ static ssize_t show_energy_performance_a
static ssize_t store_energy_performance_preference(
struct cpufreq_policy *policy, const char *buf, size_t count)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
char str_preference[21];
ssize_t ret;
+ u8 epp;
ret = sscanf(buf, "%20s", str_preference);
if (ret != 1)
@@ -1234,7 +1149,17 @@ static ssize_t store_energy_performance_
if (ret < 0)
return -EINVAL;
- ret = amd_pstate_set_energy_pref_index(policy, ret);
+ if (!ret)
+ epp = cpudata->epp_default;
+ else
+ epp = epp_values[ret];
+
+ if (epp > 0 && policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ pr_debug("EPP cannot be set under performance policy\n");
+ return -EBUSY;
+ }
+
+ ret = amd_pstate_set_epp(policy, epp);
return ret ? ret : count;
}
@@ -1267,7 +1192,6 @@ static ssize_t show_energy_performance_p
static void amd_pstate_driver_cleanup(void)
{
- amd_pstate_cppc_enable(false);
cppc_state = AMD_PSTATE_DISABLE;
current_pstate_driver = NULL;
}
@@ -1301,14 +1225,6 @@ static int amd_pstate_register_driver(in
cppc_state = mode;
- ret = amd_pstate_cppc_enable(true);
- if (ret) {
- pr_err("failed to enable cppc during amd-pstate driver registration, return %d\n",
- ret);
- amd_pstate_driver_cleanup();
- return ret;
- }
-
/* at least one CPU supports CPB */
current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB);
@@ -1548,11 +1464,15 @@ static int amd_pstate_epp_cpu_init(struc
policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
cpudata->nominal_freq,
perf.highest_perf);
+ policy->driver_data = cpudata;
+
+ ret = amd_pstate_cppc_enable(policy);
+ if (ret)
+ goto free_cpudata1;
/* It will be updated by governor */
policy->cur = policy->cpuinfo.min_freq;
- policy->driver_data = cpudata;
policy->boost_enabled = READ_ONCE(cpudata->boost_supported);
@@ -1644,31 +1564,11 @@ static int amd_pstate_epp_set_policy(str
return 0;
}
-static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
-{
- int ret;
-
- ret = amd_pstate_cppc_enable(true);
- if (ret)
- pr_err("failed to enable amd pstate during resume, return %d\n", ret);
-
-
- return amd_pstate_epp_update_limit(policy);
-}
-
static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
{
- struct amd_cpudata *cpudata = policy->driver_data;
- int ret;
-
- pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
+ pr_debug("AMD CPU Core %d going online\n", policy->cpu);
- ret = amd_pstate_epp_reenable(policy);
- if (ret)
- return ret;
- cpudata->suspended = false;
-
- return 0;
+ return amd_pstate_cppc_enable(policy);
}
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
@@ -1686,11 +1586,6 @@ static int amd_pstate_epp_cpu_offline(st
static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- int ret;
-
- /* avoid suspending when EPP is not enabled */
- if (cppc_state != AMD_PSTATE_ACTIVE)
- return 0;
/* invalidate to ensure it's rewritten during resume */
cpudata->cppc_req_cached = 0;
@@ -1698,11 +1593,6 @@ static int amd_pstate_epp_suspend(struct
/* set this flag to avoid setting core offline*/
cpudata->suspended = true;
- /* disable CPPC in lowlevel firmware */
- ret = amd_pstate_cppc_enable(false);
- if (ret)
- pr_err("failed to suspend, return %d\n", ret);
-
return 0;
}
@@ -1711,8 +1601,12 @@ static int amd_pstate_epp_resume(struct
struct amd_cpudata *cpudata = policy->driver_data;
if (cpudata->suspended) {
+ int ret;
+
/* enable amd pstate from suspend state*/
- amd_pstate_epp_reenable(policy);
+ ret = amd_pstate_epp_update_limit(policy);
+ if (ret)
+ return ret;
cpudata->suspended = false;
}
@@ -1727,8 +1621,6 @@ static struct cpufreq_driver amd_pstate_
.fast_switch = amd_pstate_fast_switch,
.init = amd_pstate_cpu_init,
.exit = amd_pstate_cpu_exit,
- .suspend = amd_pstate_cpu_suspend,
- .resume = amd_pstate_cpu_resume,
.set_boost = amd_pstate_set_boost,
.update_limits = amd_pstate_update_limits,
.name = "amd-pstate",
@@ -1895,7 +1787,6 @@ static int __init amd_pstate_init(void)
global_attr_free:
cpufreq_unregister_driver(current_pstate_driver);
- amd_pstate_cppc_enable(false);
return ret;
}
device_initcall(amd_pstate_init);

View File

@@ -1,105 +0,0 @@
From c06cca99a6d74e7a6d6f020dbf982b0b9bf704e6 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:33 -0600
Subject: cpufreq/amd-pstate: Stop caching EPP
EPP values are cached in the cpudata structure per CPU. This is needless
though because they are also cached in the CPPC request variable.
Drop the separate cache for EPP values and always reference the CPPC
request variable when needed.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 19 ++++++++++---------
drivers/cpufreq/amd-pstate.h | 1 -
2 files changed, 10 insertions(+), 10 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -268,8 +268,6 @@ static int msr_update_perf(struct cpufre
}
WRITE_ONCE(cpudata->cppc_req_cached, value);
- if (epp != cpudata->epp_cached)
- WRITE_ONCE(cpudata->epp_cached, epp);
return 0;
}
@@ -318,7 +316,6 @@ static int msr_set_epp(struct cpufreq_po
}
/* update both so that msr_update_perf() can effectively check */
- WRITE_ONCE(cpudata->epp_cached, epp);
WRITE_ONCE(cpudata->cppc_req_cached, value);
return ret;
@@ -335,9 +332,12 @@ static int shmem_set_epp(struct cpufreq_
{
struct amd_cpudata *cpudata = policy->driver_data;
struct cppc_perf_ctrls perf_ctrls;
+ u8 epp_cached;
u64 value;
int ret;
+
+ epp_cached = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
if (trace_amd_pstate_epp_perf_enabled()) {
union perf_cached perf = cpudata->perf;
@@ -348,10 +348,10 @@ static int shmem_set_epp(struct cpufreq_
FIELD_GET(AMD_CPPC_MAX_PERF_MASK,
cpudata->cppc_req_cached),
policy->boost_enabled,
- epp != cpudata->epp_cached);
+ epp != epp_cached);
}
- if (epp == cpudata->epp_cached)
+ if (epp == epp_cached)
return 0;
perf_ctrls.energy_perf = epp;
@@ -360,7 +360,6 @@ static int shmem_set_epp(struct cpufreq_
pr_debug("failed to set energy perf value (%d)\n", ret);
return ret;
}
- WRITE_ONCE(cpudata->epp_cached, epp);
value = READ_ONCE(cpudata->cppc_req_cached);
value &= ~AMD_CPPC_EPP_PERF_MASK;
@@ -1168,9 +1167,11 @@ static ssize_t show_energy_performance_p
struct cpufreq_policy *policy, char *buf)
{
struct amd_cpudata *cpudata = policy->driver_data;
- u8 preference;
+ u8 preference, epp;
+
+ epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
- switch (cpudata->epp_cached) {
+ switch (epp) {
case AMD_CPPC_EPP_PERFORMANCE:
preference = EPP_INDEX_PERFORMANCE;
break;
@@ -1533,7 +1534,7 @@ static int amd_pstate_epp_update_limit(s
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
epp = 0;
else
- epp = READ_ONCE(cpudata->epp_cached);
+ epp = FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached);
perf = READ_ONCE(cpudata->perf);
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -102,7 +102,6 @@ struct amd_cpudata {
bool hw_prefcore;
/* EPP feature related attributes*/
- u8 epp_cached;
u32 policy;
bool suspended;
u8 epp_default;

View File

@@ -1,39 +0,0 @@
From a82e4f4eb6e5e9806c66285cb3cefde644b8ea6b Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 26 Feb 2025 01:49:34 -0600
Subject: cpufreq/amd-pstate: Drop actions in amd_pstate_epp_cpu_offline()
When the CPU goes offline there is no need to change the CPPC request
because the CPU will go into the deepest C-state it supports already.
Actually changing the CPPC request when it goes offline messes up the
cached values and can lead to the wrong values being restored when
it comes back.
Instead drop the actions and if the CPU comes back online let
amd_pstate_epp_set_policy() restore it to expected values.
Reviewed-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1574,14 +1574,7 @@ static int amd_pstate_epp_cpu_online(str
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
{
- struct amd_cpudata *cpudata = policy->driver_data;
- union perf_cached perf = READ_ONCE(cpudata->perf);
-
- if (cpudata->suspended)
- return 0;
-
- return amd_pstate_update_perf(policy, perf.lowest_perf, 0, perf.lowest_perf,
- AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
+ return 0;
}
static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)

View File

@@ -1,41 +0,0 @@
From de3dd387423b30565e846e0ff4424e2c99164030 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <superm1@kernel.org>
Date: Thu, 27 Feb 2025 14:09:08 -0600
Subject: cpufreq/amd-pstate: fix warning noticed by kernel test robot
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202502272001.nafS0qXq-lkp@intel.com/
Signed-off-by: Oleksandr Natalenko <oleksandr@natalenko.name>
---
drivers/cpufreq/amd-pstate.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -903,20 +903,19 @@ static int amd_pstate_init_freq(struct a
return ret;
perf = READ_ONCE(cpudata->perf);
+ if (quirks && quirks->nominal_freq)
+ nominal_freq = quirks->nominal_freq;
+ else
+ nominal_freq = cppc_perf.nominal_freq;
+ nominal_freq *= 1000;
+
if (quirks && quirks->lowest_freq) {
min_freq = quirks->lowest_freq;
perf.lowest_perf = freq_to_perf(perf, nominal_freq, min_freq);
WRITE_ONCE(cpudata->perf, perf);
} else
min_freq = cppc_perf.lowest_freq;
-
- if (quirks && quirks->nominal_freq)
- nominal_freq = quirks->nominal_freq;
- else
- nominal_freq = cppc_perf.nominal_freq;
-
min_freq *= 1000;
- nominal_freq *= 1000;
WRITE_ONCE(cpudata->nominal_freq, nominal_freq);

View File

@@ -1,42 +0,0 @@
From 7e68278a4a90d52966b923404a2d280e3a83b66f Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Date: Mon, 7 Apr 2025 08:19:26 +0000
Subject: cpufreq/amd-pstate: Fix min_limit perf and freq updation for
performance governor
The min_limit perf and freq values can get disconnected with performance
governor, as we only modify the perf value in the special case. Fix that
by modifying the perf and freq values together
Fixes: 009d1c29a451 ("cpufreq/amd-pstate: Move perf values into a union")
Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://lore.kernel.org/r/20250407081925.850473-1-dhananjay.ugwekar@amd.com
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
drivers/cpufreq/amd-pstate.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -607,13 +607,16 @@ static void amd_pstate_update_min_max_li
union perf_cached perf = READ_ONCE(cpudata->perf);
perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max);
- perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
+ WRITE_ONCE(cpudata->max_limit_freq, policy->max);
- if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf);
+ WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq));
+ } else {
+ perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
+ WRITE_ONCE(cpudata->min_limit_freq, policy->min);
+ }
- WRITE_ONCE(cpudata->max_limit_freq, policy->max);
- WRITE_ONCE(cpudata->min_limit_freq, policy->min);
WRITE_ONCE(cpudata->perf, perf);
}

View File

@@ -1,4 +1,4 @@
From 247749c27f92a789d4f1727aa870167c25ca3c5e Mon Sep 17 00:00:00 2001
From 1cb9f09cead0ba384729bfdc74d6fa21d586530c Mon Sep 17 00:00:00 2001
From: Christian Loehle <christian.loehle@arm.com>
Date: Thu, 5 Sep 2024 10:26:39 +0100
Subject: cpuidle: Prefer teo over menu governor
@@ -36,7 +36,7 @@ Signed-off-by: Christian Loehle <christian.loehle@arm.com>
depends on KVM_GUEST
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -519,7 +519,7 @@ static int menu_enable_device(struct cpu
@@ -513,7 +513,7 @@ static int menu_enable_device(struct cpu
static struct cpuidle_governor menu_governor = {
.name = "menu",

View File

@@ -1,65 +0,0 @@
From 5e5a835c50afc3b9bb2b8b9175d0924abb5a7f3c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 27 Jan 2025 13:16:09 -0800
Subject: crypto: x86/aes-xts - make the fast path 64-bit specific
Remove 32-bit support from the fast path in xts_crypt(). Then optimize
it for 64-bit, and simplify the code, by switching to sg_virt() and
removing the now-unnecessary checks for crossing a page boundary.
The result is simpler code that is slightly smaller and faster in the
case that actually matters (64-bit).
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/aesni-intel_glue.c | 30 ++++++++++--------------------
1 file changed, 10 insertions(+), 20 deletions(-)
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -581,11 +581,8 @@ xts_crypt(struct skcipher_request *req,
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
- const unsigned int cryptlen = req->cryptlen;
- struct scatterlist *src = req->src;
- struct scatterlist *dst = req->dst;
- if (unlikely(cryptlen < AES_BLOCK_SIZE))
+ if (unlikely(req->cryptlen < AES_BLOCK_SIZE))
return -EINVAL;
kernel_fpu_begin();
@@ -593,23 +590,16 @@ xts_crypt(struct skcipher_request *req,
/*
* In practice, virtually all XTS plaintexts and ciphertexts are either
- * 512 or 4096 bytes, aligned such that they don't span page boundaries.
- * To optimize the performance of these cases, and also any other case
- * where no page boundary is spanned, the below fast-path handles
- * single-page sources and destinations as efficiently as possible.
+ * 512 or 4096 bytes and do not use multiple scatterlist elements. To
+ * optimize the performance of these cases, the below fast-path handles
+ * single-scatterlist-element messages as efficiently as possible. The
+ * code is 64-bit specific, as it assumes no page mapping is needed.
*/
- if (likely(src->length >= cryptlen && dst->length >= cryptlen &&
- src->offset + cryptlen <= PAGE_SIZE &&
- dst->offset + cryptlen <= PAGE_SIZE)) {
- struct page *src_page = sg_page(src);
- struct page *dst_page = sg_page(dst);
- void *src_virt = kmap_local_page(src_page) + src->offset;
- void *dst_virt = kmap_local_page(dst_page) + dst->offset;
-
- (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen,
- req->iv);
- kunmap_local(dst_virt);
- kunmap_local(src_virt);
+ if (IS_ENABLED(CONFIG_X86_64) &&
+ likely(req->src->length >= req->cryptlen &&
+ req->dst->length >= req->cryptlen)) {
+ (*crypt_func)(&ctx->crypt_ctx, sg_virt(req->src),
+ sg_virt(req->dst), req->cryptlen, req->iv);
kernel_fpu_end();
return 0;
}

View File

@@ -1,176 +0,0 @@
From 4506de20739ac4726a258faa98609a552184d2d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergio=20Gonz=C3=A1lez=20Collado?=
<sergio.collado@gmail.com>
Date: Sun, 2 Mar 2025 23:15:18 +0100
Subject: Kunit to check the longest symbol length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The longest length of a symbol (KSYM_NAME_LEN) was increased to 512
in the reference [1]. This patch adds kunit test suite to check the longest
symbol length. These tests verify that the longest symbol length defined
is supported.
This test can also help other efforts for longer symbol length,
like [2].
The test suite defines one symbol with the longest possible length.
The first test verify that functions with names of the created
symbol, can be called or not.
The second test, verify that the symbols are created (or
not) in the kernel symbol table.
[1] https://lore.kernel.org/lkml/20220802015052.10452-6-ojeda@kernel.org/
[2] https://lore.kernel.org/lkml/20240605032120.3179157-1-song@kernel.org/
Tested-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Rae Moar <rmoar@google.com>
Signed-off-by: Sergio González Collado <sergio.collado@gmail.com>
Link: https://github.com/Rust-for-Linux/linux/issues/504
Source: https://lore.kernel.org/rust-for-linux/20250302221518.76874-1-sergio.collado@gmail.com/
Cherry-picked-for: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/63
---
arch/x86/tools/insn_decoder_test.c | 3 +-
lib/Kconfig.debug | 9 ++++
lib/Makefile | 2 +
lib/longest_symbol_kunit.c | 82 ++++++++++++++++++++++++++++++
4 files changed, 95 insertions(+), 1 deletion(-)
create mode 100644 lib/longest_symbol_kunit.c
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -10,6 +10,7 @@
#include <assert.h>
#include <unistd.h>
#include <stdarg.h>
+#include <linux/kallsyms.h>
#define unlikely(cond) (cond)
@@ -106,7 +107,7 @@ static void parse_args(int argc, char **
}
}
-#define BUFSIZE 256
+#define BUFSIZE (256 + KSYM_NAME_LEN)
int main(int argc, char **argv)
{
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2838,6 +2838,15 @@ config FORTIFY_KUNIT_TEST
by the str*() and mem*() family of functions. For testing runtime
traps of FORTIFY_SOURCE, see LKDTM's "FORTIFY_*" tests.
+config LONGEST_SYM_KUNIT_TEST
+ tristate "Test the longest symbol possible" if !KUNIT_ALL_TESTS
+ depends on KUNIT && KPROBES
+ default KUNIT_ALL_TESTS
+ help
+ Tests the longest symbol possible
+
+ If unsure, say N.
+
config HW_BREAKPOINT_KUNIT_TEST
bool "Test hw_breakpoint constraints accounting" if !KUNIT_ALL_TESTS
depends on HAVE_HW_BREAKPOINT
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -398,6 +398,8 @@ obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fort
obj-$(CONFIG_CRC_KUNIT_TEST) += crc_kunit.o
obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o
+obj-$(CONFIG_LONGEST_SYM_KUNIT_TEST) += longest_symbol_kunit.o
+CFLAGS_longest_symbol_kunit.o += $(call cc-disable-warning, missing-prototypes)
obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
--- /dev/null
+++ b/lib/longest_symbol_kunit.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test the longest symbol length. Execute with:
+ * ./tools/testing/kunit/kunit.py run longest-symbol
+ * --arch=x86_64 --kconfig_add CONFIG_KPROBES=y --kconfig_add CONFIG_MODULES=y
+ * --kconfig_add CONFIG_RETPOLINE=n --kconfig_add CONFIG_CFI_CLANG=n
+ * --kconfig_add CONFIG_MITIGATION_RETPOLINE=n
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <kunit/test.h>
+#include <linux/stringify.h>
+#include <linux/kprobes.h>
+#include <linux/kallsyms.h>
+
+#define DI(name) s##name##name
+#define DDI(name) DI(n##name##name)
+#define DDDI(name) DDI(n##name##name)
+#define DDDDI(name) DDDI(n##name##name)
+#define DDDDDI(name) DDDDI(n##name##name)
+
+/*Generate a symbol whose name length is 511 */
+#define LONGEST_SYM_NAME DDDDDI(g1h2i3j4k5l6m7n)
+
+#define RETURN_LONGEST_SYM 0xAAAAA
+
+noinline int LONGEST_SYM_NAME(void);
+noinline int LONGEST_SYM_NAME(void)
+{
+ return RETURN_LONGEST_SYM;
+}
+
+_Static_assert(sizeof(__stringify(LONGEST_SYM_NAME)) == KSYM_NAME_LEN,
+"Incorrect symbol length found. Expected KSYM_NAME_LEN: "
+__stringify(KSYM_NAME_LEN) ", but found: "
+__stringify(sizeof(LONGEST_SYM_NAME)));
+
+static void test_longest_symbol(struct kunit *test)
+{
+ KUNIT_EXPECT_EQ(test, RETURN_LONGEST_SYM, LONGEST_SYM_NAME());
+};
+
+static void test_longest_symbol_kallsyms(struct kunit *test)
+{
+ unsigned long (*kallsyms_lookup_name)(const char *name);
+ static int (*longest_sym)(void);
+
+ struct kprobe kp = {
+ .symbol_name = "kallsyms_lookup_name",
+ };
+
+ if (register_kprobe(&kp) < 0) {
+ pr_info("%s: kprobe not registered", __func__);
+ KUNIT_FAIL(test, "test_longest_symbol kallsyms: kprobe not registered\n");
+ return;
+ }
+
+ kunit_warn(test, "test_longest_symbol kallsyms: kprobe registered\n");
+ kallsyms_lookup_name = (unsigned long (*)(const char *name))kp.addr;
+ unregister_kprobe(&kp);
+
+ longest_sym =
+ (void *) kallsyms_lookup_name(__stringify(LONGEST_SYM_NAME));
+ KUNIT_EXPECT_EQ(test, RETURN_LONGEST_SYM, longest_sym());
+};
+
+static struct kunit_case longest_symbol_test_cases[] = {
+ KUNIT_CASE(test_longest_symbol),
+ KUNIT_CASE(test_longest_symbol_kallsyms),
+ {}
+};
+
+static struct kunit_suite longest_symbol_test_suite = {
+ .name = "longest-symbol",
+ .test_cases = longest_symbol_test_cases,
+};
+kunit_test_suite(longest_symbol_test_suite);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Test the longest symbol length");
+MODULE_AUTHOR("Sergio González Collado");

View File

@@ -0,0 +1,70 @@
From cda8b1022f32bb7a917148f75f4641e7a5b3e729 Mon Sep 17 00:00:00 2001
From: Jinliang Zheng <alexjlzheng@tencent.com>
Date: Tue, 15 Apr 2025 17:02:32 +0800
Subject: mm: fix ratelimit_pages update error in dirty_ratio_handler()
In dirty_ratio_handler(), vm_dirty_bytes must be set to zero before
calling writeback_set_ratelimit(), as global_dirty_limits() always
prioritizes the value of vm_dirty_bytes.
It's domain_dirty_limits() that's relevant here, not node_dirty_ok:
dirty_ratio_handler
writeback_set_ratelimit
global_dirty_limits(&dirty_thresh) <- ratelimit_pages based on dirty_thresh
domain_dirty_limits
if (bytes) <- bytes = vm_dirty_bytes <--------+
thresh = f1(bytes) <- prioritizes vm_dirty_bytes |
else |
thresh = f2(ratio) |
ratelimit_pages = f3(dirty_thresh) |
vm_dirty_bytes = 0 <- it's late! ---------------------+
This causes ratelimit_pages to still use the value calculated based on
vm_dirty_bytes, which is wrong now.
The impact visible to userspace is difficult to capture directly because
there is no procfs/sysfs interface exported to user space. However, it
will have a real impact on the balance of dirty pages.
For example:
1. On default, we have vm_dirty_ratio=40, vm_dirty_bytes=0
2. echo 8192 > dirty_bytes, then vm_dirty_bytes=8192,
vm_dirty_ratio=0, and ratelimit_pages is calculated based on
vm_dirty_bytes now.
3. echo 20 > dirty_ratio, then since vm_dirty_bytes is not reset to
zero when writeback_set_ratelimit() -> global_dirty_limits() ->
domain_dirty_limits() is called, reallimit_pages is still calculated
based on vm_dirty_bytes instead of vm_dirty_ratio. This does not
conform to the actual intent of the user.
Link: https://lkml.kernel.org/r/20250415090232.7544-1-alexjlzheng@tencent.com
Fixes: 9d823e8f6b1b ("writeback: per task dirty rate limit")
Signed-off-by: Jinliang Zheng <alexjlzheng@tencent.com>
Reviewed-by: MengEn Sun <mengensun@tencent.com>
Cc: Andrea Righi <andrea@betterlinux.com>
Cc: Fenggaung Wu <fengguang.wu@intel.com>
Cc: Jinliang Zheng <alexjlzheng@tencent.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/page-writeback.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -520,8 +520,8 @@ static int dirty_ratio_handler(const str
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
- writeback_set_ratelimit();
vm_dirty_bytes = 0;
+ writeback_set_ratelimit();
}
return ret;
}

View File

@@ -0,0 +1,179 @@
From 30a724581b5037176f6492359c189ebb180ccf1f Mon Sep 17 00:00:00 2001
From: GONG Ruiqi <gongruiqi1@huawei.com>
Date: Sun, 27 Apr 2025 10:53:03 +0800
Subject: vgacon: Add check for vc_origin address range in vgacon_scroll()
Our in-house Syzkaller reported the following BUG (twice), which we
believed was the same issue with [1]:
==================================================================
BUG: KASAN: slab-out-of-bounds in vcs_scr_readw+0xc2/0xd0 drivers/tty/vt/vt.c:4740
Read of size 2 at addr ffff88800f5bef60 by task syz.7.2620/12393
...
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x72/0xa0 lib/dump_stack.c:106
print_address_description.constprop.0+0x6b/0x3d0 mm/kasan/report.c:364
print_report+0xba/0x280 mm/kasan/report.c:475
kasan_report+0xa9/0xe0 mm/kasan/report.c:588
vcs_scr_readw+0xc2/0xd0 drivers/tty/vt/vt.c:4740
vcs_write_buf_noattr drivers/tty/vt/vc_screen.c:493 [inline]
vcs_write+0x586/0x840 drivers/tty/vt/vc_screen.c:690
vfs_write+0x219/0x960 fs/read_write.c:584
ksys_write+0x12e/0x260 fs/read_write.c:639
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81
entry_SYSCALL_64_after_hwframe+0x78/0xe2
...
</TASK>
Allocated by task 5614:
kasan_save_stack+0x20/0x40 mm/kasan/common.c:45
kasan_set_track+0x25/0x30 mm/kasan/common.c:52
____kasan_kmalloc mm/kasan/common.c:374 [inline]
__kasan_kmalloc+0x8f/0xa0 mm/kasan/common.c:383
kasan_kmalloc include/linux/kasan.h:201 [inline]
__do_kmalloc_node mm/slab_common.c:1007 [inline]
__kmalloc+0x62/0x140 mm/slab_common.c:1020
kmalloc include/linux/slab.h:604 [inline]
kzalloc include/linux/slab.h:721 [inline]
vc_do_resize+0x235/0xf40 drivers/tty/vt/vt.c:1193
vgacon_adjust_height+0x2d4/0x350 drivers/video/console/vgacon.c:1007
vgacon_font_set+0x1f7/0x240 drivers/video/console/vgacon.c:1031
con_font_set drivers/tty/vt/vt.c:4628 [inline]
con_font_op+0x4da/0xa20 drivers/tty/vt/vt.c:4675
vt_k_ioctl+0xa10/0xb30 drivers/tty/vt/vt_ioctl.c:474
vt_ioctl+0x14c/0x1870 drivers/tty/vt/vt_ioctl.c:752
tty_ioctl+0x655/0x1510 drivers/tty/tty_io.c:2779
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:871 [inline]
__se_sys_ioctl+0x12d/0x190 fs/ioctl.c:857
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81
entry_SYSCALL_64_after_hwframe+0x78/0xe2
Last potentially related work creation:
kasan_save_stack+0x20/0x40 mm/kasan/common.c:45
__kasan_record_aux_stack+0x94/0xa0 mm/kasan/generic.c:492
__call_rcu_common.constprop.0+0xc3/0xa10 kernel/rcu/tree.c:2713
netlink_release+0x620/0xc20 net/netlink/af_netlink.c:802
__sock_release+0xb5/0x270 net/socket.c:663
sock_close+0x1e/0x30 net/socket.c:1425
__fput+0x408/0xab0 fs/file_table.c:384
__fput_sync+0x4c/0x60 fs/file_table.c:465
__do_sys_close fs/open.c:1580 [inline]
__se_sys_close+0x68/0xd0 fs/open.c:1565
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x59/0x110 arch/x86/entry/common.c:81
entry_SYSCALL_64_after_hwframe+0x78/0xe2
Second to last potentially related work creation:
kasan_save_stack+0x20/0x40 mm/kasan/common.c:45
__kasan_record_aux_stack+0x94/0xa0 mm/kasan/generic.c:492
__call_rcu_common.constprop.0+0xc3/0xa10 kernel/rcu/tree.c:2713
netlink_release+0x620/0xc20 net/netlink/af_netlink.c:802
__sock_release+0xb5/0x270 net/socket.c:663
sock_close+0x1e/0x30 net/socket.c:1425
__fput+0x408/0xab0 fs/file_table.c:384
task_work_run+0x154/0x240 kernel/task_work.c:239
exit_task_work include/linux/task_work.h:45 [inline]
do_exit+0x8e5/0x1320 kernel/exit.c:874
do_group_exit+0xcd/0x280 kernel/exit.c:1023
get_signal+0x1675/0x1850 kernel/signal.c:2905
arch_do_signal_or_restart+0x80/0x3b0 arch/x86/kernel/signal.c:310
exit_to_user_mode_loop kernel/entry/common.c:111 [inline]
exit_to_user_mode_prepare include/linux/entry-common.h:328 [inline]
__syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline]
syscall_exit_to_user_mode+0x1b3/0x1e0 kernel/entry/common.c:218
do_syscall_64+0x66/0x110 arch/x86/entry/common.c:87
entry_SYSCALL_64_after_hwframe+0x78/0xe2
The buggy address belongs to the object at ffff88800f5be000
which belongs to the cache kmalloc-2k of size 2048
The buggy address is located 2656 bytes to the right of
allocated 1280-byte region [ffff88800f5be000, ffff88800f5be500)
...
Memory state around the buggy address:
ffff88800f5bee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff88800f5bee80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88800f5bef00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
^
ffff88800f5bef80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff88800f5bf000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
==================================================================
By analyzing the vmcore, we found that vc->vc_origin was somehow placed
one line prior to vc->vc_screenbuf when vc was in KD_TEXT mode, and
further writings to /dev/vcs caused out-of-bounds reads (and writes
right after) in vcs_write_buf_noattr().
Our further experiments show that in most cases, vc->vc_origin equals to
vga_vram_base when the console is in KD_TEXT mode, and it's around
vc->vc_screenbuf for the KD_GRAPHICS mode. But via triggerring a
TIOCL_SETVESABLANK ioctl beforehand, we can make vc->vc_origin be around
vc->vc_screenbuf while the console is in KD_TEXT mode, and then by
writing the special 'ESC M' control sequence to the tty certain times
(depends on the value of `vc->state.y - vc->vc_top`), we can eventually
move vc->vc_origin prior to vc->vc_screenbuf. Here's the PoC, tested on
QEMU:
```
int main() {
const int RI_NUM = 10; // should be greater than `vc->state.y - vc->vc_top`
int tty_fd, vcs_fd;
const char *tty_path = "/dev/tty0";
const char *vcs_path = "/dev/vcs";
const char escape_seq[] = "\x1bM"; // ESC + M
const char trigger_seq[] = "Let's trigger an OOB write.";
struct vt_sizes vt_size = { 70, 2 };
int blank = TIOCL_BLANKSCREEN;
tty_fd = open(tty_path, O_RDWR);
char vesa_mode[] = { TIOCL_SETVESABLANK, 1 };
ioctl(tty_fd, TIOCLINUX, vesa_mode);
ioctl(tty_fd, TIOCLINUX, &blank);
ioctl(tty_fd, VT_RESIZE, &vt_size);
for (int i = 0; i < RI_NUM; ++i)
write(tty_fd, escape_seq, sizeof(escape_seq) - 1);
vcs_fd = open(vcs_path, O_RDWR);
write(vcs_fd, trigger_seq, sizeof(trigger_seq));
close(vcs_fd);
close(tty_fd);
return 0;
}
```
To solve this problem, add an address range validation check in
vgacon_scroll(), ensuring vc->vc_origin never precedes vc_screenbuf.
Reported-by: syzbot+9c09fda97a1a65ea859b@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=9c09fda97a1a65ea859b [1]
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Co-developed-by: Yi Yang <yiyang13@huawei.com>
Signed-off-by: Yi Yang <yiyang13@huawei.com>
Signed-off-by: GONG Ruiqi <gongruiqi1@huawei.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
drivers/video/console/vgacon.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -1168,7 +1168,7 @@ static bool vgacon_scroll(struct vc_data
c->vc_screenbuf_size - delta);
c->vc_origin = vga_vram_end - c->vc_screenbuf_size;
vga_rolled_over = 0;
- } else
+ } else if (oldo - delta >= (unsigned long)c->vc_screenbuf)
c->vc_origin -= delta;
c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size;
scr_memsetw((u16 *) (c->vc_origin), c->vc_video_erase_char,

View File

@@ -1,36 +0,0 @@
From b5a4b82efd19d0687a5582a58f6830bf714e34fc Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 18 Mar 2025 15:32:30 -0700
Subject: x86/tools: Drop duplicate unlikely() definition in
insn_decoder_test.c
After commit c104c16073b7 ("Kunit to check the longest symbol length"),
there is a warning when building with clang because there is now a
definition of unlikely from compiler.h in tools/include/linux, which
conflicts with the one in the instruction decoder selftest:
arch/x86/tools/insn_decoder_test.c:15:9: warning: 'unlikely' macro redefined [-Wmacro-redefined]
Remove the second unlikely() definition, as it is no longer necessary,
clearing up the warning.
Fixes: c104c16073b7 ("Kunit to check the longest symbol length")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20250318-x86-decoder-test-fix-unlikely-redef-v1-1-74c84a7bf05b@kernel.org
---
arch/x86/tools/insn_decoder_test.c | 2 --
1 file changed, 2 deletions(-)
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -12,8 +12,6 @@
#include <stdarg.h>
#include <linux/kallsyms.h>
-#define unlikely(cond) (cond)
-
#include <asm/insn.h>
#include <inat.c>
#include <insn.c>

View File

@@ -0,0 +1,102 @@
From 5cf26cf9fd9c11cb1543aac026f8928829895663 Mon Sep 17 00:00:00 2001
From: Murad Masimov <m.masimov@mt-integration.ru>
Date: Mon, 28 Apr 2025 18:34:06 +0300
Subject: fbdev: Fix do_register_framebuffer to prevent null-ptr-deref in
fb_videomode_to_var
If fb_add_videomode() in do_register_framebuffer() fails to allocate
memory for fb_videomode, it will later lead to a null-ptr dereference in
fb_videomode_to_var(), as the fb_info is registered while not having the
mode in modelist that is expected to be there, i.e. the one that is
described in fb_info->var.
================================================================
general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI
KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
CPU: 1 PID: 30371 Comm: syz-executor.1 Not tainted 5.10.226-syzkaller #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
RIP: 0010:fb_videomode_to_var+0x24/0x610 drivers/video/fbdev/core/modedb.c:901
Call Trace:
display_to_var+0x3a/0x7c0 drivers/video/fbdev/core/fbcon.c:929
fbcon_resize+0x3e2/0x8f0 drivers/video/fbdev/core/fbcon.c:2071
resize_screen drivers/tty/vt/vt.c:1176 [inline]
vc_do_resize+0x53a/0x1170 drivers/tty/vt/vt.c:1263
fbcon_modechanged+0x3ac/0x6e0 drivers/video/fbdev/core/fbcon.c:2720
fbcon_update_vcs+0x43/0x60 drivers/video/fbdev/core/fbcon.c:2776
do_fb_ioctl+0x6d2/0x740 drivers/video/fbdev/core/fbmem.c:1128
fb_ioctl+0xe7/0x150 drivers/video/fbdev/core/fbmem.c:1203
vfs_ioctl fs/ioctl.c:48 [inline]
__do_sys_ioctl fs/ioctl.c:753 [inline]
__se_sys_ioctl fs/ioctl.c:739 [inline]
__x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:739
do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46
entry_SYSCALL_64_after_hwframe+0x67/0xd1
================================================================
Even though fbcon_init() checks beforehand if fb_match_mode() in
var_to_display() fails, it can not prevent the panic because fbcon_init()
does not return error code. Considering this and the comment in the code
about fb_match_mode() returning NULL - "This should not happen" - it is
better to prevent registering the fb_info if its mode was not set
successfully. Also move fb_add_videomode() closer to the beginning of
do_register_framebuffer() to avoid having to do the cleanup on fail.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Murad Masimov <m.masimov@mt-integration.ru>
Signed-off-by: Helge Deller <deller@gmx.de>
---
drivers/video/fbdev/core/fbmem.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -388,7 +388,7 @@ static int fb_check_foreignness(struct f
static int do_register_framebuffer(struct fb_info *fb_info)
{
- int i;
+ int i, err = 0;
struct fb_videomode mode;
if (fb_check_foreignness(fb_info))
@@ -397,10 +397,18 @@ static int do_register_framebuffer(struc
if (num_registered_fb == FB_MAX)
return -ENXIO;
- num_registered_fb++;
for (i = 0 ; i < FB_MAX; i++)
if (!registered_fb[i])
break;
+
+ if (!fb_info->modelist.prev || !fb_info->modelist.next)
+ INIT_LIST_HEAD(&fb_info->modelist);
+
+ fb_var_to_videomode(&mode, &fb_info->var);
+ err = fb_add_videomode(&mode, &fb_info->modelist);
+ if (err < 0)
+ return err;
+
fb_info->node = i;
refcount_set(&fb_info->count, 1);
mutex_init(&fb_info->lock);
@@ -426,16 +434,12 @@ static int do_register_framebuffer(struc
if (bitmap_empty(fb_info->pixmap.blit_y, FB_MAX_BLIT_HEIGHT))
bitmap_fill(fb_info->pixmap.blit_y, FB_MAX_BLIT_HEIGHT);
- if (!fb_info->modelist.prev || !fb_info->modelist.next)
- INIT_LIST_HEAD(&fb_info->modelist);
-
if (fb_info->skip_vt_switch)
pm_vt_switch_required(fb_info->device, false);
else
pm_vt_switch_required(fb_info->device, true);
- fb_var_to_videomode(&mode, &fb_info->var);
- fb_add_videomode(&mode, &fb_info->modelist);
+ num_registered_fb++;
registered_fb[i] = fb_info;
#ifdef CONFIG_GUMSTIX_AM200EPD

View File

@@ -0,0 +1,65 @@
From 54c7f478f1a9d58f5609a48d461c7d495bb8301a Mon Sep 17 00:00:00 2001
From: Murad Masimov <m.masimov@mt-integration.ru>
Date: Mon, 28 Apr 2025 18:34:07 +0300
Subject: fbdev: Fix fb_set_var to prevent null-ptr-deref in
fb_videomode_to_var
If fb_add_videomode() in fb_set_var() fails to allocate memory for
fb_videomode, later it may lead to a null-ptr dereference in
fb_videomode_to_var(), as the fb_info is registered while not having the
mode in modelist that is expected to be there, i.e. the one that is
described in fb_info->var.
================================================================
general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI
KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
CPU: 1 PID: 30371 Comm: syz-executor.1 Not tainted 5.10.226-syzkaller #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014
RIP: 0010:fb_videomode_to_var+0x24/0x610 drivers/video/fbdev/core/modedb.c:901
Call Trace:
display_to_var+0x3a/0x7c0 drivers/video/fbdev/core/fbcon.c:929
fbcon_resize+0x3e2/0x8f0 drivers/video/fbdev/core/fbcon.c:2071
resize_screen drivers/tty/vt/vt.c:1176 [inline]
vc_do_resize+0x53a/0x1170 drivers/tty/vt/vt.c:1263
fbcon_modechanged+0x3ac/0x6e0 drivers/video/fbdev/core/fbcon.c:2720
fbcon_update_vcs+0x43/0x60 drivers/video/fbdev/core/fbcon.c:2776
do_fb_ioctl+0x6d2/0x740 drivers/video/fbdev/core/fbmem.c:1128
fb_ioctl+0xe7/0x150 drivers/video/fbdev/core/fbmem.c:1203
vfs_ioctl fs/ioctl.c:48 [inline]
__do_sys_ioctl fs/ioctl.c:753 [inline]
__se_sys_ioctl fs/ioctl.c:739 [inline]
__x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:739
do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46
entry_SYSCALL_64_after_hwframe+0x67/0xd1
================================================================
The reason is that fb_info->var is being modified in fb_set_var(), and
then fb_videomode_to_var() is called. If it fails to add the mode to
fb_info->modelist, fb_set_var() returns error, but does not restore the
old value of fb_info->var. Restore fb_info->var on failure the same way
it is done earlier in the function.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Murad Masimov <m.masimov@mt-integration.ru>
Signed-off-by: Helge Deller <deller@gmx.de>
---
drivers/video/fbdev/core/fbmem.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -328,8 +328,10 @@ fb_set_var(struct fb_info *info, struct
!list_empty(&info->modelist))
ret = fb_add_videomode(&mode, &info->modelist);
- if (ret)
+ if (ret) {
+ info->var = old_var;
return ret;
+ }
event.info = info;
event.data = &mode;

View File

@@ -1,40 +0,0 @@
From e56acee381a8e07edf1920fb58f3166f911b6e5c Mon Sep 17 00:00:00 2001
From: Lingbo Kong <quic_lingbok@quicinc.com>
Date: Wed, 26 Feb 2025 19:31:18 +0800
Subject: wifi: ath12k: Abort scan before removing link interface to prevent
duplicate deletion
Currently, when ath12k performs the remove link interface operation, if
there is an ongoing scan operation on the arvif, ath12k may execute the
remove link interface operation multiple times on the same arvif. This
occurs because, during the remove link operation, if a scan operation is
present on the arvif, ath12k may receive a WMI_SCAN_EVENT_COMPLETED event
from the firmware. Upon receiving this event, ath12k will continue to
execute the ath12k_scan_vdev_clean_work() function, performing the remove
link interface operation on the same arvif again.
To address this issue, before executing the remove link interface
operation, ath12k needs to check if there is an ongoing scan operation on
the current arvif. If such an operation exists, it should be aborted.
Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
---
drivers/net/wireless/ath/ath12k/mac.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -9395,6 +9395,11 @@ ath12k_mac_op_unassign_vif_chanctx(struc
ar->num_started_vdevs == 1 && ar->monitor_vdev_created)
ath12k_mac_monitor_stop(ar);
+ if (ar->scan.arvif == arvif && ar->scan.state == ATH12K_SCAN_RUNNING) {
+ ath12k_scan_abort(ar);
+ ar->scan.arvif = NULL;
+ }
+
ath12k_mac_remove_link_interface(hw, arvif);
ath12k_mac_unassign_link_vif(arvif);
}

View File

@@ -1,49 +0,0 @@
From 8d0e02f81d08c7b1e082028af0f55a22e7e1dfb2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 15 Apr 2025 10:22:04 +0200
Subject: Kconfig: switch CONFIG_SYSFS_SYCALL default to n
This odd system call will be removed in the future. Let's decouple it
from CONFIG_EXPERT and switch the default to n as a first step.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
init/Kconfig | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1603,6 +1603,16 @@ config SYSCTL_ARCH_UNALIGN_ALLOW
the unaligned access emulation.
see arch/parisc/kernel/unaligned.c for reference
+config SYSFS_SYSCALL
+ bool "Sysfs syscall support"
+ default n
+ help
+ sys_sysfs is an obsolete system call no longer supported in libc.
+ Note that disabling this option is more secure but might break
+ compatibility with some systems.
+
+ If unsure say N here.
+
config HAVE_PCSPKR_PLATFORM
bool
@@ -1647,16 +1657,6 @@ config SGETMASK_SYSCALL
If unsure, leave the default option here.
-config SYSFS_SYSCALL
- bool "Sysfs syscall support" if EXPERT
- default y
- help
- sys_sysfs is an obsolete system call no longer supported in libc.
- Note that disabling this option is more secure but might break
- compatibility with some systems.
-
- If unsure say Y here.
-
config FHANDLE
bool "open by fhandle syscalls" if EXPERT
select EXPORTFS

View File

@@ -0,0 +1,113 @@
From 9cb2f9d210f915aabe54c5061d84f3fbe93c71ea Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 7 Apr 2025 11:54:15 +0200
Subject: anon_inode: use a proper mode internally
This allows the VFS to not trip over anonymous inodes and we can add
asserts based on the mode into the vfs. When we report it to userspace
we can simply hide the mode to avoid regressions. I've audited all
direct callers of alloc_anon_inode() and only secretmen overrides i_mode
and i_op inode operations but it already uses a regular file.
Link: https://lore.kernel.org/20250407-work-anon_inode-v1-1-53a44c20d44e@kernel.org
Fixes: af153bb63a336 ("vfs: catch invalid modes in may_open()")
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@vger.kernel.org # all LTS kernels
Reported-by: syzbot+5d8e79d323a13aa0b248@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/67ed3fb3.050a0220.14623d.0009.GAE@google.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/anon_inodes.c | 36 ++++++++++++++++++++++++++++++++++++
fs/internal.h | 3 +++
fs/libfs.c | 8 +++++++-
3 files changed, 46 insertions(+), 1 deletion(-)
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,10 +24,44 @@
#include <linux/uaccess.h>
+#include "internal.h"
+
static struct vfsmount *anon_inode_mnt __ro_after_init;
static struct inode *anon_inode_inode __ro_after_init;
/*
+ * User space expects anonymous inodes to have no file type in st_mode.
+ *
+ * In particular, 'lsof' has this legacy logic:
+ *
+ * type = s->st_mode & S_IFMT;
+ * switch (type) {
+ * ...
+ * case 0:
+ * if (!strcmp(p, "anon_inode"))
+ * Lf->ntype = Ntype = N_ANON_INODE;
+ *
+ * to detect our old anon_inode logic.
+ *
+ * Rather than mess with our internal sane inode data, just fix it
+ * up here in getattr() by masking off the format bits.
+ */
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ struct inode *inode = d_inode(path->dentry);
+
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
+ stat->mode &= ~S_IFMT;
+ return 0;
+}
+
+static const struct inode_operations anon_inode_operations = {
+ .getattr = anon_inode_getattr,
+};
+
+/*
* anon_inodefs_dname() is called from d_path().
*/
static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -66,6 +100,7 @@ static struct inode *anon_inode_make_sec
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
+ inode->i_op = &anon_inode_operations;
error = security_inode_init_security_anon(inode, &QSTR(name),
context_inode);
if (error) {
@@ -313,6 +348,7 @@ static int __init anon_inode_init(void)
anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
if (IS_ERR(anon_inode_inode))
panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
+ anon_inode_inode->i_op = &anon_inode_operations;
return 0;
}
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -343,3 +343,6 @@ static inline bool path_mounted(const st
void file_f_owner_release(struct file *file);
bool file_seek_cur_needs_f_lock(struct file *file);
int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags);
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1647,7 +1647,13 @@ struct inode *alloc_anon_inode(struct su
* that it already _is_ on the dirty list.
*/
inode->i_state = I_DIRTY;
- inode->i_mode = S_IRUSR | S_IWUSR;
+ /*
+ * Historically anonymous inodes didn't have a type at all and
+ * userspace has come to rely on this. Internally they're just
+ * regular files but S_IFREG is masked off when reporting
+ * information to userspace.
+ */
+ inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE;

View File

@@ -0,0 +1,80 @@
From ea4199112ae6d8da866417f50e035be01488c502 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 7 Apr 2025 11:54:17 +0200
Subject: anon_inode: explicitly block ->setattr()
It is currently possible to change the mode and owner of the single
anonymous inode in the kernel:
int main(int argc, char *argv[])
{
int ret, sfd;
sigset_t mask;
struct signalfd_siginfo fdsi;
sigemptyset(&mask);
sigaddset(&mask, SIGINT);
sigaddset(&mask, SIGQUIT);
ret = sigprocmask(SIG_BLOCK, &mask, NULL);
if (ret < 0)
_exit(1);
sfd = signalfd(-1, &mask, 0);
if (sfd < 0)
_exit(2);
ret = fchown(sfd, 5555, 5555);
if (ret < 0)
_exit(3);
ret = fchmod(sfd, 0777);
if (ret < 0)
_exit(3);
_exit(4);
}
This is a bug. It's not really a meaningful one because anonymous inodes
don't really figure into path lookup and they cannot be reopened via
/proc/<pid>/fd/<nr> and can't be used for lookup itself. So they can
only ever serve as direct references.
But it is still completely bogus to allow the mode and ownership or any
of the properties of the anonymous inode to be changed. Block this!
Link: https://lore.kernel.org/20250407-work-anon_inode-v1-3-53a44c20d44e@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@vger.kernel.org # all LTS kernels
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/anon_inodes.c | 7 +++++++
fs/internal.h | 2 ++
2 files changed, 9 insertions(+)
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -57,8 +57,15 @@ int anon_inode_getattr(struct mnt_idmap
return 0;
}
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
static const struct inode_operations anon_inode_operations = {
.getattr = anon_inode_getattr,
+ .setattr = anon_inode_setattr,
};
/*
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -346,3 +346,5 @@ int statmount_mnt_idmap(struct mnt_idmap
int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags);
+int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr);

View File

@@ -0,0 +1,39 @@
From 79f54c5bc7c6097a379c83e9ed56bee27cf1218a Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 7 Apr 2025 11:54:19 +0200
Subject: anon_inode: raise SB_I_NODEV and SB_I_NOEXEC
It isn't possible to execute anonymous inodes because they cannot be
opened in any way after they have been created. This includes execution:
execveat(fd_anon_inode, "", NULL, NULL, AT_EMPTY_PATH)
Anonymous inodes have inode->f_op set to no_open_fops which sets
no_open() which returns ENXIO. That means any call to do_dentry_open()
which is the endpoint of the do_open_execat() will fail. There's no
chance to execute an anonymous inode. Unless a given subsystem overrides
it ofc.
However, we should still harden this and raise SB_I_NODEV and
SB_I_NOEXEC on the superblock itself so that no one gets any creative
ideas.
Link: https://lore.kernel.org/20250407-work-anon_inode-v1-5-53a44c20d44e@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@vger.kernel.org # all LTS kernels
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/anon_inodes.c | 2 ++
1 file changed, 2 insertions(+)
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -86,6 +86,8 @@ static int anon_inodefs_init_fs_context(
struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC);
if (!ctx)
return -ENOMEM;
+ fc->s_iflags |= SB_I_NOEXEC;
+ fc->s_iflags |= SB_I_NODEV;
ctx->dops = &anon_inodefs_dentry_operations;
return 0;
}

View File

@@ -0,0 +1,136 @@
From edaacbee0f33b7371ec460723d1042a6c5a4bb9d Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 21 Apr 2025 10:27:40 +0200
Subject: fs: add S_ANON_INODE
This makes it easy to detect proper anonymous inodes and to ensure that
we can detect them in codepaths such as readahead().
Readahead on anonymous inodes didn't work because they didn't have a
proper mode. Now that they have we need to retain EINVAL being returned
otherwise LTP will fail.
We also need to ensure that ioctls aren't simply fired like they are for
regular files so things like inotify inodes continue to correctly call
their own ioctl handlers as in [1].
Reported-by: Xilin Wu <sophon@radxa.com>
Link: https://lore.kernel.org/3A9139D5CD543962+89831381-31b9-4392-87ec-a84a5b3507d8@radxa.com [1]
Link: https://lore.kernel.org/7a1a7076-ff6b-4cb0-94e7-7218a0a44028@sirena.org.uk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
fs/ioctl.c | 7 ++++---
fs/libfs.c | 2 +-
fs/pidfs.c | 2 +-
include/linux/fs.h | 2 ++
mm/readahead.c | 20 ++++++++++++++++----
5 files changed, 24 insertions(+), 9 deletions(-)
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -821,7 +821,8 @@ static int do_vfs_ioctl(struct file *fil
return ioctl_fioasync(fd, filp, argp);
case FIOQSIZE:
- if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
+ if (S_ISDIR(inode->i_mode) ||
+ (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode)) ||
S_ISLNK(inode->i_mode)) {
loff_t res = inode_get_bytes(inode);
return copy_to_user(argp, &res, sizeof(res)) ?
@@ -856,7 +857,7 @@ static int do_vfs_ioctl(struct file *fil
return ioctl_file_dedupe_range(filp, argp);
case FIONREAD:
- if (!S_ISREG(inode->i_mode))
+ if (!S_ISREG(inode->i_mode) || IS_ANON_FILE(inode))
return vfs_ioctl(filp, cmd, arg);
return put_user(i_size_read(inode) - filp->f_pos,
@@ -881,7 +882,7 @@ static int do_vfs_ioctl(struct file *fil
return ioctl_get_fs_sysfs_path(filp, argp);
default:
- if (S_ISREG(inode->i_mode))
+ if (S_ISREG(inode->i_mode) && !IS_ANON_FILE(inode))
return file_ioctl(filp, cmd, argp);
break;
}
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1656,7 +1656,7 @@ struct inode *alloc_anon_inode(struct su
inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_flags |= S_PRIVATE;
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
simple_inode_init_ts(inode);
return inode;
}
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -826,7 +826,7 @@ static int pidfs_init_inode(struct inode
const struct pid *pid = data;
inode->i_private = data;
- inode->i_flags |= S_PRIVATE;
+ inode->i_flags |= S_PRIVATE | S_ANON_INODE;
inode->i_mode |= S_IRWXU;
inode->i_op = &pidfs_inode_operations;
inode->i_fop = &pidfs_file_operations;
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2344,6 +2344,7 @@ struct super_operations {
#define S_CASEFOLD (1 << 15) /* Casefolded file */
#define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */
#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
+#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2400,6 +2401,7 @@ static inline bool sb_rdonly(const struc
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)
+#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE)
static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
struct inode *inode)
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -690,9 +690,15 @@ EXPORT_SYMBOL_GPL(page_cache_async_ra);
ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
{
+ struct file *file;
+ const struct inode *inode;
+
CLASS(fd, f)(fd);
+ if (fd_empty(f))
+ return -EBADF;
- if (fd_empty(f) || !(fd_file(f)->f_mode & FMODE_READ))
+ file = fd_file(f);
+ if (!(file->f_mode & FMODE_READ))
return -EBADF;
/*
@@ -700,9 +706,15 @@ ssize_t ksys_readahead(int fd, loff_t of
* that can execute readahead. If readahead is not possible
* on this file, then we must return -EINVAL.
*/
- if (!fd_file(f)->f_mapping || !fd_file(f)->f_mapping->a_ops ||
- (!S_ISREG(file_inode(fd_file(f))->i_mode) &&
- !S_ISBLK(file_inode(fd_file(f))->i_mode)))
+ if (!file->f_mapping)
+ return -EINVAL;
+ if (!file->f_mapping->a_ops)
+ return -EINVAL;
+
+ inode = file_inode(file);
+ if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
+ return -EINVAL;
+ if (IS_ANON_FILE(inode))
return -EINVAL;
return vfs_fadvise(fd_file(f), offset, count, POSIX_FADV_WILLNEED);

View File

@@ -0,0 +1,35 @@
From ab287d709809b6dfe4d3c42016a543d976533d51 Mon Sep 17 00:00:00 2001
From: Zijun Hu <quic_zijuhu@quicinc.com>
Date: Wed, 7 May 2025 19:50:26 +0800
Subject: configfs: Do not override creating attribute file failure in
populate_attrs()
populate_attrs() may override failure for creating attribute files
by success for creating subsequent bin attribute files, and have
wrong return value.
Fix by creating bin attribute files under successfully creating
attribute files.
Fixes: 03607ace807b ("configfs: implement binary attributes")
Cc: stable@vger.kernel.org
Reviewed-by: Joel Becker <jlbec@evilplan.org>
Reviewed-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
Link: https://lore.kernel.org/r/20250507-fix_configfs-v3-2-fe2d96de8dc4@quicinc.com
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
fs/configfs/dir.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -619,7 +619,7 @@ static int populate_attrs(struct config_
break;
}
}
- if (t->ct_bin_attrs) {
+ if (!error && t->ct_bin_attrs) {
for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) {
if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i))
continue;

View File

@@ -0,0 +1,104 @@
From 896b7b0d6ed53a7fe159c4b76f25407c816aa619 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 May 2025 19:20:36 -0400
Subject: Don't propagate mounts into detached trees
All versions up to 6.14 did not propagate mount events into detached
tree. Shortly after 6.14 a merge of vfs-6.15-rc1.mount.namespace
(130e696aa68b) has changed that.
Unfortunately, that has caused userland regressions (reported in
https://lore.kernel.org/all/CAOYeF9WQhFDe+BGW=Dp5fK8oRy5AgZ6zokVyTj1Wp4EUiYgt4w@mail.gmail.com/)
Straight revert wouldn't be an option - in particular, the variant in 6.14
had a bug that got fixed in d1ddc6f1d9f0 ("fix IS_MNT_PROPAGATING uses")
and we don't want to bring the bug back.
This is a modification of manual revert posted by Christian, with changes
needed to avoid reintroducing the breakage in scenario described in
d1ddc6f1d9f0.
Cc: stable@vger.kernel.org
Reported-by: Allison Karlitskaya <lis@redhat.com>
Tested-by: Allison Karlitskaya <lis@redhat.com>
Acked-by: Christian Brauner <brauner@kernel.org>
Co-developed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
fs/mount.h | 5 -----
fs/namespace.c | 15 ++-------------
fs/pnode.c | 4 ++--
3 files changed, 4 insertions(+), 20 deletions(-)
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -7,10 +7,6 @@
extern struct list_head notify_list;
-typedef __u32 __bitwise mntns_flags_t;
-
-#define MNTNS_PROPAGATING ((__force mntns_flags_t)(1 << 0))
-
struct mnt_namespace {
struct ns_common ns;
struct mount * root;
@@ -37,7 +33,6 @@ struct mnt_namespace {
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
refcount_t passive; /* number references not pinning @mounts */
- mntns_flags_t mntns_flags;
} __randomize_layout;
struct mnt_pcp {
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3648,7 +3648,7 @@ static int do_move_mount(struct path *ol
if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
goto out;
- if (is_anon_ns(ns)) {
+ if (is_anon_ns(ns) && ns == p->mnt_ns) {
/*
* Ending up with two files referring to the root of the
* same anonymous mount namespace would cause an error
@@ -3656,16 +3656,7 @@ static int do_move_mount(struct path *ol
* twice into the mount tree which would be rejected
* later. But be explicit about it right here.
*/
- if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns))
- goto out;
-
- /*
- * If this is an anonymous mount tree ensure that mount
- * propagation can detect mounts that were just
- * propagated to the target mount tree so we don't
- * propagate onto them.
- */
- ns->mntns_flags |= MNTNS_PROPAGATING;
+ goto out;
} else if (is_anon_ns(p->mnt_ns)) {
/*
* Don't allow moving an attached mount tree to an
@@ -3722,8 +3713,6 @@ static int do_move_mount(struct path *ol
if (attached)
put_mountpoint(old_mp);
out:
- if (is_anon_ns(ns))
- ns->mntns_flags &= ~MNTNS_PROPAGATING;
unlock_mount(mp);
if (!err) {
if (attached) {
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -231,8 +231,8 @@ static int propagate_one(struct mount *m
/* skip if mountpoint isn't visible in m */
if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
return 0;
- /* skip if m is in the anon_ns we are emptying */
- if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING)
+ /* skip if m is in the anon_ns */
+ if (is_anon_ns(m->mnt_ns))
return 0;
if (peers(m, last_dest)) {

View File

@@ -0,0 +1,51 @@
From bc86aaf0e0256220ca787fdbb57a73429ade1129 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 May 2025 07:28:52 -0600
Subject: mm/filemap: gate dropbehind invalidate on folio !dirty && !writeback
It's possible for the folio to either get marked for writeback or
redirtied. Add a helper, filemap_end_dropbehind(), which guards the
folio_unmap_invalidate() call behind check for the folio being both
non-dirty and not under writeback AFTER the folio lock has been
acquired. Use this helper folio_end_dropbehind_write().
Cc: stable@vger.kernel.org
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes")
Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/20250527133255.452431-2-axboe@kernel.dk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
mm/filemap.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct
}
EXPORT_SYMBOL(folio_wait_private_2_killable);
+static void filemap_end_dropbehind(struct folio *folio)
+{
+ struct address_space *mapping = folio->mapping;
+
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+
+ if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio))
+ folio_unmap_invalidate(mapping, folio, 0);
+}
+
/*
* If folio was marked as dropbehind, then pages should be dropped when writeback
* completes. Do that now. If we fail, it's likely because of a big folio -
@@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(s
* invalidation in that case.
*/
if (in_task() && folio_trylock(folio)) {
- if (folio->mapping)
- folio_unmap_invalidate(folio->mapping, folio, 0);
+ filemap_end_dropbehind(folio);
folio_unlock(folio);
}
}

View File

@@ -0,0 +1,51 @@
From fad76185ca91983990c660642151083eb05cbfc0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 May 2025 07:28:53 -0600
Subject: mm/filemap: use filemap_end_dropbehind() for read invalidation
Use the filemap_end_dropbehind() helper rather than calling
folio_unmap_invalidate() directly, as we need to check if the folio has
been redirtied or marked for writeback once the folio lock has been
re-acquired.
Cc: stable@vger.kernel.org
Reported-by: Trond Myklebust <trondmy@hammerspace.com>
Fixes: 8026e49bff9b ("mm/filemap: add read support for RWF_DONTCACHE")
Link: https://lore.kernel.org/linux-fsdevel/ba8a9805331ce258a622feaca266b163db681a10.camel@hammerspace.com/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/20250527133255.452431-3-axboe@kernel.dk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
mm/filemap.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2644,8 +2644,7 @@ static inline bool pos_same_folio(loff_t
return (pos1 >> shift == pos2 >> shift);
}
-static void filemap_end_dropbehind_read(struct address_space *mapping,
- struct folio *folio)
+static void filemap_end_dropbehind_read(struct folio *folio)
{
if (!folio_test_dropbehind(folio))
return;
@@ -2653,7 +2652,7 @@ static void filemap_end_dropbehind_read(
return;
if (folio_trylock(folio)) {
if (folio_test_clear_dropbehind(folio))
- folio_unmap_invalidate(mapping, folio, 0);
+ filemap_end_dropbehind(folio);
folio_unlock(folio);
}
}
@@ -2774,7 +2773,7 @@ put_folios:
for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct folio *folio = fbatch.folios[i];
- filemap_end_dropbehind_read(mapping, folio);
+ filemap_end_dropbehind_read(folio);
folio_put(folio);
}
folio_batch_init(&fbatch);

View File

@@ -0,0 +1,29 @@
From f0579d45f2e03fa3ba0d9466e79a31ea37acb487 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 May 2025 07:28:54 -0600
Subject: Revert "Disable FOP_DONTCACHE for now due to bugs"
This reverts commit 478ad02d6844217cc7568619aeb0809d93ade43d.
Both the read and write side dirty && writeback races should be resolved
now, revert the commit that disabled FOP_DONTCACHE for filesystems.
Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/20250527133255.452431-4-axboe@kernel.dk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
include/linux/fs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2186,7 +2186,7 @@ struct file_operations {
/* Supports asynchronous lock callbacks */
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
/* File system supports uncached read/write buffered IO */
-#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */
+#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,

View File

@@ -0,0 +1,36 @@
From 3b4614564770691cf3a6eb88127268ef6a84180c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 May 2025 07:28:55 -0600
Subject: mm/filemap: unify read/write dropbehind naming
The read side is filemap_end_dropbehind_read(), while the write side
used folio_ as the prefix rather than filemap_. The read side makes more
sense, unify the naming such that the write side follows that.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/20250527133255.452431-5-axboe@kernel.dk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
mm/filemap.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1604,7 +1604,7 @@ static void filemap_end_dropbehind(struc
* completes. Do that now. If we fail, it's likely because of a big folio -
* just reset dropbehind for that case and latter completions should invalidate.
*/
-static void folio_end_dropbehind_write(struct folio *folio)
+static void filemap_end_dropbehind_write(struct folio *folio)
{
/*
* Hitting !in_task() should not happen off RWF_DONTCACHE writeback,
@@ -1659,7 +1659,7 @@ void folio_end_writeback(struct folio *f
acct_reclaim_writeback(folio);
if (folio_dropbehind)
- folio_end_dropbehind_write(folio);
+ filemap_end_dropbehind_write(folio);
folio_put(folio);
}
EXPORT_SYMBOL(folio_end_writeback);

View File

@@ -0,0 +1,78 @@
From 6003153e1bc4ad4952773081d7b89aa1ab2274c3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 27 May 2025 07:28:56 -0600
Subject: mm/filemap: unify dropbehind flag testing and clearing
The read and write side does this a bit differently, unify it such that
the _{read,write} helpers check the bit before locking, and the generic
handler is in charge of clearing the bit and invalidating, once under
the folio lock.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/20250527133255.452431-6-axboe@kernel.dk
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
mm/filemap.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1595,7 +1595,11 @@ static void filemap_end_dropbehind(struc
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio))
+ if (folio_test_writeback(folio) || folio_test_dirty(folio))
+ return;
+ if (!folio_test_clear_dropbehind(folio))
+ return;
+ if (mapping)
folio_unmap_invalidate(mapping, folio, 0);
}
@@ -1606,6 +1610,9 @@ static void filemap_end_dropbehind(struc
*/
static void filemap_end_dropbehind_write(struct folio *folio)
{
+ if (!folio_test_dropbehind(folio))
+ return;
+
/*
* Hitting !in_task() should not happen off RWF_DONTCACHE writeback,
* but can happen if normal writeback just happens to find dirty folios
@@ -1629,8 +1636,6 @@ static void filemap_end_dropbehind_write
*/
void folio_end_writeback(struct folio *folio)
{
- bool folio_dropbehind = false;
-
VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio);
/*
@@ -1652,14 +1657,11 @@ void folio_end_writeback(struct folio *f
* reused before the folio_wake_bit().
*/
folio_get(folio);
- if (!folio_test_dirty(folio))
- folio_dropbehind = folio_test_clear_dropbehind(folio);
if (__folio_end_writeback(folio))
folio_wake_bit(folio, PG_writeback);
- acct_reclaim_writeback(folio);
- if (folio_dropbehind)
- filemap_end_dropbehind_write(folio);
+ filemap_end_dropbehind_write(folio);
+ acct_reclaim_writeback(folio);
folio_put(folio);
}
EXPORT_SYMBOL(folio_end_writeback);
@@ -2651,8 +2653,7 @@ static void filemap_end_dropbehind_read(
if (folio_test_writeback(folio) || folio_test_dirty(folio))
return;
if (folio_trylock(folio)) {
- if (folio_test_clear_dropbehind(folio))
- filemap_end_dropbehind(folio);
+ filemap_end_dropbehind(folio);
folio_unlock(folio);
}
}

View File

@@ -0,0 +1,98 @@
From 61c0b2450f2b85c5053fa4f71d9c619b34d3af6c Mon Sep 17 00:00:00 2001
From: Shivank Garg <shivankg@amd.com>
Date: Mon, 26 May 2025 18:28:18 +0000
Subject: mm/khugepaged: fix race with folio split/free using temporary
reference
hpage_collapse_scan_file() calls is_refcount_suitable(), which in turn
calls folio_mapcount(). folio_mapcount() checks folio_test_large() before
proceeding to folio_large_mapcount(), but there is a race window where the
folio may get split/freed between these checks, triggering:
VM_WARN_ON_FOLIO(!folio_test_large(folio), folio)
Take a temporary reference to the folio in hpage_collapse_scan_file().
This stabilizes the folio during refcount check and prevents incorrect
large folio detection due to concurrent split/free. Use helper
folio_expected_ref_count() + 1 to compare with folio_ref_count() instead
of using is_refcount_suitable().
Link: https://lkml.kernel.org/r/20250526182818.37978-1-shivankg@amd.com
Fixes: 05c5323b2a34 ("mm: track mapcount of large folios in single value")
Signed-off-by: Shivank Garg <shivankg@amd.com>
Reported-by: syzbot+2b99589e33edbe9475ca@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6828470d.a70a0220.38f255.000c.GAE@google.com
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Fengwei Yin <fengwei.yin@intel.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/khugepaged.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2295,6 +2295,17 @@ static int hpage_collapse_scan_file(stru
continue;
}
+ if (!folio_try_get(folio)) {
+ xas_reset(&xas);
+ continue;
+ }
+
+ if (unlikely(folio != xas_reload(&xas))) {
+ folio_put(folio);
+ xas_reset(&xas);
+ continue;
+ }
+
if (folio_order(folio) == HPAGE_PMD_ORDER &&
folio->index == start) {
/* Maybe PMD-mapped */
@@ -2305,23 +2316,27 @@ static int hpage_collapse_scan_file(stru
* it's safe to skip LRU and refcount checks before
* returning.
*/
+ folio_put(folio);
break;
}
node = folio_nid(folio);
if (hpage_collapse_scan_abort(node, cc)) {
result = SCAN_SCAN_ABORT;
+ folio_put(folio);
break;
}
cc->node_load[node]++;
if (!folio_test_lru(folio)) {
result = SCAN_PAGE_LRU;
+ folio_put(folio);
break;
}
- if (!is_refcount_suitable(folio)) {
+ if (folio_expected_ref_count(folio) + 1 != folio_ref_count(folio)) {
result = SCAN_PAGE_COUNT;
+ folio_put(folio);
break;
}
@@ -2333,6 +2348,7 @@ static int hpage_collapse_scan_file(stru
*/
present += folio_nr_pages(folio);
+ folio_put(folio);
if (need_resched()) {
xas_pause(&xas);

View File

@@ -0,0 +1,198 @@
From 214092002cbd9945b7cc6314e76ec42b3f588c01 Mon Sep 17 00:00:00 2001
From: Shivank Garg <shivankg@amd.com>
Date: Wed, 30 Apr 2025 10:01:51 +0000
Subject: mm: add folio_expected_ref_count() for reference count calculation
Patch series " JFS: Implement migrate_folio for jfs_metapage_aops" v5.
This patchset addresses a warning that occurs during memory compaction due
to JFS's missing migrate_folio operation. The warning was introduced by
commit 7ee3647243e5 ("migrate: Remove call to ->writepage") which added
explicit warnings when filesystem don't implement migrate_folio.
The syzbot reported following [1]:
jfs_metapage_aops does not implement migrate_folio
WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 fallback_migrate_folio mm/migrate.c:953 [inline]
WARNING: CPU: 1 PID: 5861 at mm/migrate.c:955 move_to_new_folio+0x70e/0x840 mm/migrate.c:1007
Modules linked in:
CPU: 1 UID: 0 PID: 5861 Comm: syz-executor280 Not tainted 6.15.0-rc1-next-20250411-syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025
RIP: 0010:fallback_migrate_folio mm/migrate.c:953 [inline]
RIP: 0010:move_to_new_folio+0x70e/0x840 mm/migrate.c:1007
To fix this issue, this series implement metapage_migrate_folio() for JFS
which handles both single and multiple metapages per page configurations.
While most filesystems leverage existing migration implementations like
filemap_migrate_folio(), buffer_migrate_folio_norefs() or
buffer_migrate_folio() (which internally used folio_expected_refs()),
JFS's metapage architecture requires special handling of its private data
during migration. To support this, this series introduce the
folio_expected_ref_count(), which calculates external references to a
folio from page/swap cache, private data, and page table mappings.
This standardized implementation replaces the previous ad-hoc
folio_expected_refs() function and enables JFS to accurately determine
whether a folio has unexpected references before attempting migration.
Implement folio_expected_ref_count() to calculate expected folio reference
counts from:
- Page/swap cache (1 per page)
- Private data (1)
- Page table mappings (1 per map)
While originally needed for page migration operations, this improved
implementation standardizes reference counting by consolidating all
refcount contributors into a single, reusable function that can benefit
any subsystem needing to detect unexpected references to folios.
The folio_expected_ref_count() returns the sum of these external
references without including any reference the caller itself might hold.
Callers comparing against the actual folio_ref_count() must account for
their own references separately.
Link: https://syzkaller.appspot.com/bug?extid=8bb6fd945af4e0ad9299 [1]
Link: https://lkml.kernel.org/r/20250430100150.279751-1-shivankg@amd.com
Link: https://lkml.kernel.org/r/20250430100150.279751-2-shivankg@amd.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Co-developed-by: David Hildenbrand <david@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Dave Kleikamp <shaggy@kernel.org>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/mm.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++
mm/migrate.c | 22 ++++---------------
2 files changed, 59 insertions(+), 18 deletions(-)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2307,6 +2307,61 @@ static inline bool folio_maybe_mapped_sh
return folio_test_large_maybe_mapped_shared(folio);
}
+/**
+ * folio_expected_ref_count - calculate the expected folio refcount
+ * @folio: the folio
+ *
+ * Calculate the expected folio refcount, taking references from the pagecache,
+ * swapcache, PG_private and page table mappings into account. Useful in
+ * combination with folio_ref_count() to detect unexpected references (e.g.,
+ * GUP or other temporary references).
+ *
+ * Does currently not consider references from the LRU cache. If the folio
+ * was isolated from the LRU (which is the case during migration or split),
+ * the LRU cache does not apply.
+ *
+ * Calling this function on an unmapped folio -- !folio_mapped() -- that is
+ * locked will return a stable result.
+ *
+ * Calling this function on a mapped folio will not result in a stable result,
+ * because nothing stops additional page table mappings from coming (e.g.,
+ * fork()) or going (e.g., munmap()).
+ *
+ * Calling this function without the folio lock will also not result in a
+ * stable result: for example, the folio might get dropped from the swapcache
+ * concurrently.
+ *
+ * However, even when called without the folio lock or on a mapped folio,
+ * this function can be used to detect unexpected references early (for example,
+ * if it makes sense to even lock the folio and unmap it).
+ *
+ * The caller must add any reference (e.g., from folio_try_get()) it might be
+ * holding itself to the result.
+ *
+ * Returns the expected folio refcount.
+ */
+static inline int folio_expected_ref_count(const struct folio *folio)
+{
+ const int order = folio_order(folio);
+ int ref_count = 0;
+
+ if (WARN_ON_ONCE(folio_test_slab(folio)))
+ return 0;
+
+ if (folio_test_anon(folio)) {
+ /* One reference per page from the swapcache. */
+ ref_count += folio_test_swapcache(folio) << order;
+ } else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) {
+ /* One reference per page from the pagecache. */
+ ref_count += !!folio->mapping << order;
+ /* One reference from PG_private. */
+ ref_count += folio_test_private(folio);
+ }
+
+ /* One reference per page table mapping. */
+ return ref_count + folio_mapcount(folio);
+}
+
#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
static inline int arch_make_folio_accessible(struct folio *folio)
{
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -445,20 +445,6 @@ unlock:
}
#endif
-static int folio_expected_refs(struct address_space *mapping,
- struct folio *folio)
-{
- int refs = 1;
- if (!mapping)
- return refs;
-
- refs += folio_nr_pages(folio);
- if (folio_test_private(folio))
- refs++;
-
- return refs;
-}
-
/*
* Replace the folio in the mapping.
*
@@ -601,7 +587,7 @@ static int __folio_migrate_mapping(struc
int folio_migrate_mapping(struct address_space *mapping,
struct folio *newfolio, struct folio *folio, int extra_count)
{
- int expected_count = folio_expected_refs(mapping, folio) + extra_count;
+ int expected_count = folio_expected_ref_count(folio) + extra_count + 1;
if (folio_ref_count(folio) != expected_count)
return -EAGAIN;
@@ -618,7 +604,7 @@ int migrate_huge_page_move_mapping(struc
struct folio *dst, struct folio *src)
{
XA_STATE(xas, &mapping->i_pages, folio_index(src));
- int rc, expected_count = folio_expected_refs(mapping, src);
+ int rc, expected_count = folio_expected_ref_count(src) + 1;
if (folio_ref_count(src) != expected_count)
return -EAGAIN;
@@ -749,7 +735,7 @@ static int __migrate_folio(struct addres
struct folio *src, void *src_private,
enum migrate_mode mode)
{
- int rc, expected_count = folio_expected_refs(mapping, src);
+ int rc, expected_count = folio_expected_ref_count(src) + 1;
/* Check whether src does not have extra refs before we do more work */
if (folio_ref_count(src) != expected_count)
@@ -837,7 +823,7 @@ static int __buffer_migrate_folio(struct
return migrate_folio(mapping, dst, src, mode);
/* Check whether page does not have extra refs before we do more work */
- expected_count = folio_expected_refs(mapping, src);
+ expected_count = folio_expected_ref_count(src) + 1;
if (folio_ref_count(src) != expected_count)
return -EAGAIN;

View File

@@ -0,0 +1,129 @@
From 0f52f05148589fe4115322a9cc8ffab760091a0a Mon Sep 17 00:00:00 2001
From: Pu Lehui <pulehui@huawei.com>
Date: Thu, 29 May 2025 15:56:47 +0000
Subject: mm: fix uprobe pte be overwritten when expanding vma
Patch series "Fix uprobe pte be overwritten when expanding vma".
This patch (of 4):
We encountered a BUG alert triggered by Syzkaller as follows:
BUG: Bad rss-counter state mm:00000000b4a60fca type:MM_ANONPAGES val:1
And we can reproduce it with the following steps:
1. register uprobe on file at zero offset
2. mmap the file at zero offset:
addr1 = mmap(NULL, 2 * 4096, PROT_NONE, MAP_PRIVATE, fd, 0);
3. mremap part of vma1 to new vma2:
addr2 = mremap(addr1, 4096, 2 * 4096, MREMAP_MAYMOVE);
4. mremap back to orig addr1:
mremap(addr2, 4096, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, addr1);
In step 3, the vma1 range [addr1, addr1 + 4096] will be remap to new vma2
with range [addr2, addr2 + 8192], and remap uprobe anon page from the vma1
to vma2, then unmap the vma1 range [addr1, addr1 + 4096].
In step 4, the vma2 range [addr2, addr2 + 4096] will be remap back to the
addr range [addr1, addr1 + 4096]. Since the addr range [addr1 + 4096,
addr1 + 8192] still maps the file, it will take vma_merge_new_range to
expand the range, and then do uprobe_mmap in vma_complete. Since the
merged vma pgoff is also zero offset, it will install uprobe anon page to
the merged vma. However, the upcomming move_page_tables step, which use
set_pte_at to remap the vma2 uprobe pte to the merged vma, will overwrite
the newly uprobe pte in the merged vma, and lead that pte to be orphan.
Since the uprobe pte will be remapped to the merged vma, we can remove the
unnecessary uprobe_mmap upon merged vma.
This problem was first found in linux-6.6.y and also exists in the
community syzkaller:
https://lore.kernel.org/all/000000000000ada39605a5e71711@google.com/T/
Link: https://lkml.kernel.org/r/20250529155650.4017699-1-pulehui@huaweicloud.com
Link: https://lkml.kernel.org/r/20250529155650.4017699-2-pulehui@huaweicloud.com
Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints")
Signed-off-by: Pu Lehui <pulehui@huawei.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/vma.c | 20 +++++++++++++++++---
mm/vma.h | 7 +++++++
2 files changed, 24 insertions(+), 3 deletions(-)
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -144,6 +144,9 @@ static void init_multi_vma_prep(struct v
vp->file = vma->vm_file;
if (vp->file)
vp->mapping = vma->vm_file->f_mapping;
+
+ if (vmg && vmg->skip_vma_uprobe)
+ vp->skip_vma_uprobe = true;
}
/*
@@ -333,10 +336,13 @@ static void vma_complete(struct vma_prep
if (vp->file) {
i_mmap_unlock_write(vp->mapping);
- uprobe_mmap(vp->vma);
- if (vp->adj_next)
- uprobe_mmap(vp->adj_next);
+ if (!vp->skip_vma_uprobe) {
+ uprobe_mmap(vp->vma);
+
+ if (vp->adj_next)
+ uprobe_mmap(vp->adj_next);
+ }
}
if (vp->remove) {
@@ -1783,6 +1789,14 @@ struct vm_area_struct *copy_vma(struct v
faulted_in_anon_vma = false;
}
+ /*
+ * If the VMA we are copying might contain a uprobe PTE, ensure
+ * that we do not establish one upon merge. Otherwise, when mremap()
+ * moves page tables, it will orphan the newly created PTE.
+ */
+ if (vma->vm_file)
+ vmg.skip_vma_uprobe = true;
+
new_vma = find_vma_prev(mm, addr, &vmg.prev);
if (new_vma && new_vma->vm_start < addr + len)
return NULL; /* should never get here */
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -19,6 +19,8 @@ struct vma_prepare {
struct vm_area_struct *insert;
struct vm_area_struct *remove;
struct vm_area_struct *remove2;
+
+ bool skip_vma_uprobe :1;
};
struct unlink_vma_file_batch {
@@ -120,6 +122,11 @@ struct vma_merge_struct {
*/
bool give_up_on_oom :1;
+ /*
+ * If set, skip uprobe_mmap upon merged vma.
+ */
+ bool skip_vma_uprobe :1;
+
/* Internal flags set during merge process: */
/*

View File

@@ -0,0 +1,217 @@
From 6f1e03b94f7777323aaefd9286d992a1cbd0adf7 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 27 May 2025 23:23:53 +0200
Subject: mm/hugetlb: unshare page tables during VMA split, not before
Currently, __split_vma() triggers hugetlb page table unsharing through
vm_ops->may_split(). This happens before the VMA lock and rmap locks are
taken - which is too early, it allows racing VMA-locked page faults in our
process and racing rmap walks from other processes to cause page tables to
be shared again before we actually perform the split.
Fix it by explicitly calling into the hugetlb unshare logic from
__split_vma() in the same place where THP splitting also happens. At that
point, both the VMA and the rmap(s) are write-locked.
An annoying detail is that we can now call into the helper
hugetlb_unshare_pmds() from two different locking contexts:
1. from hugetlb_split(), holding:
- mmap lock (exclusively)
- VMA lock
- file rmap lock (exclusively)
2. hugetlb_unshare_all_pmds(), which I think is designed to be able to
call us with only the mmap lock held (in shared mode), but currently
only runs while holding mmap lock (exclusively) and VMA lock
Backporting note:
This commit fixes a racy protection that was introduced in commit
b30c14cd6102 ("hugetlb: unshare some PMDs when splitting VMAs"); that
commit claimed to fix an issue introduced in 5.13, but it should actually
also go all the way back.
[jannh@google.com: v2]
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-1-1329349bad1a@google.com
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-0-1329349bad1a@google.com
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-1-f4136f5ec58a@google.com
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org> [b30c14cd6102: hugetlb: unshare some PMDs when splitting VMAs]
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/hugetlb.h | 3 ++
mm/hugetlb.c | 60 +++++++++++++++++++++++---------
mm/vma.c | 7 ++++
tools/testing/vma/vma_internal.h | 2 ++
4 files changed, 56 insertions(+), 16 deletions(-)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -276,6 +276,7 @@ bool is_hugetlb_entry_migration(pte_t pt
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
void fixup_hugetlb_reservations(struct vm_area_struct *vma);
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -473,6 +474,8 @@ static inline void fixup_hugetlb_reserva
{
}
+static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {}
+
#endif /* !CONFIG_HUGETLB_PAGE */
#ifndef pgd_write
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -120,7 +120,7 @@ static void hugetlb_vma_lock_free(struct
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end, bool take_locks);
static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static void hugetlb_free_folio(struct folio *folio)
@@ -5426,26 +5426,40 @@ static int hugetlb_vm_op_split(struct vm
{
if (addr & ~(huge_page_mask(hstate_vma(vma))))
return -EINVAL;
+ return 0;
+}
+void hugetlb_split(struct vm_area_struct *vma, unsigned long addr)
+{
/*
* PMD sharing is only possible for PUD_SIZE-aligned address ranges
* in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
* split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
+ * This function is called in the middle of a VMA split operation, with
+ * MM, VMA and rmap all write-locked to prevent concurrent page table
+ * walks (except hardware and gup_fast()).
*/
+ vma_assert_write_locked(vma);
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+
if (addr & ~PUD_MASK) {
- /*
- * hugetlb_vm_op_split is called right before we attempt to
- * split the VMA. We will need to unshare PMDs in the old and
- * new VMAs, so let's unshare before we split.
- */
unsigned long floor = addr & PUD_MASK;
unsigned long ceil = floor + PUD_SIZE;
- if (floor >= vma->vm_start && ceil <= vma->vm_end)
- hugetlb_unshare_pmds(vma, floor, ceil);
+ if (floor >= vma->vm_start && ceil <= vma->vm_end) {
+ /*
+ * Locking:
+ * Use take_locks=false here.
+ * The file rmap lock is already held.
+ * The hugetlb VMA lock can't be taken when we already
+ * hold the file rmap lock, and we don't need it because
+ * its purpose is to synchronize against concurrent page
+ * table walks, which are not possible thanks to the
+ * locks held by our caller.
+ */
+ hugetlb_unshare_pmds(vma, floor, ceil, /* take_locks = */ false);
+ }
}
-
- return 0;
}
static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
@@ -7884,9 +7898,16 @@ void move_hugetlb_state(struct folio *ol
spin_unlock_irq(&hugetlb_lock);
}
+/*
+ * If @take_locks is false, the caller must ensure that no concurrent page table
+ * access can happen (except for gup_fast() and hardware page walks).
+ * If @take_locks is true, we take the hugetlb VMA lock (to lock out things like
+ * concurrent page fault handling) and the file rmap lock.
+ */
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool take_locks)
{
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
@@ -7910,8 +7931,12 @@ static void hugetlb_unshare_pmds(struct
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm,
start, end);
mmu_notifier_invalidate_range_start(&range);
- hugetlb_vma_lock_write(vma);
- i_mmap_lock_write(vma->vm_file->f_mapping);
+ if (take_locks) {
+ hugetlb_vma_lock_write(vma);
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+ }
for (address = start; address < end; address += PUD_SIZE) {
ptep = hugetlb_walk(vma, address, sz);
if (!ptep)
@@ -7921,8 +7946,10 @@ static void hugetlb_unshare_pmds(struct
spin_unlock(ptl);
}
flush_hugetlb_tlb_range(vma, start, end);
- i_mmap_unlock_write(vma->vm_file->f_mapping);
- hugetlb_vma_unlock_write(vma);
+ if (take_locks) {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
/*
* No need to call mmu_notifier_arch_invalidate_secondary_tlbs(), see
* Documentation/mm/mmu_notifier.rst.
@@ -7937,7 +7964,8 @@ static void hugetlb_unshare_pmds(struct
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
{
hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
- ALIGN_DOWN(vma->vm_end, PUD_SIZE));
+ ALIGN_DOWN(vma->vm_end, PUD_SIZE),
+ /* take_locks = */ true);
}
/*
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -516,7 +516,14 @@ __split_vma(struct vma_iterator *vmi, st
init_vma_prep(&vp, vma);
vp.insert = new;
vma_prepare(&vp);
+
+ /*
+ * Get rid of huge pages and shared page tables straddling the split
+ * boundary.
+ */
vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_split(vma, addr);
if (new_below) {
vma->vm_start = addr;
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -793,6 +793,8 @@ static inline void vma_adjust_trans_huge
(void)next;
}
+static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
+
static inline void vma_iter_free(struct vma_iterator *vmi)
{
mas_destroy(&vmi->mas);

View File

@@ -0,0 +1,50 @@
From cbd0e47470ea4db11acf3612edf91b5047a90d24 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 27 May 2025 23:23:54 +0200
Subject: mm/hugetlb: fix huge_pmd_unshare() vs GUP-fast race
huge_pmd_unshare() drops a reference on a page table that may have
previously been shared across processes, potentially turning it into a
normal page table used in another process in which unrelated VMAs can
afterwards be installed.
If this happens in the middle of a concurrent gup_fast(), gup_fast() could
end up walking the page tables of another process. While I don't see any
way in which that immediately leads to kernel memory corruption, it is
really weird and unexpected.
Fix it with an explicit broadcast IPI through tlb_remove_table_sync_one(),
just like we do in khugepaged when removing page tables for a THP
collapse.
Link: https://lkml.kernel.org/r/20250528-hugetlb-fixes-splitrace-v2-2-1329349bad1a@google.com
Link: https://lkml.kernel.org/r/20250527-hugetlb-fixes-splitrace-v1-2-f4136f5ec58a@google.com
Fixes: 39dde65c9940 ("[PATCH] shared page table for hugetlb page")
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/hugetlb.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7628,6 +7628,13 @@ int huge_pmd_unshare(struct mm_struct *m
return 0;
pud_clear(pud);
+ /*
+ * Once our caller drops the rmap lock, some other process might be
+ * using this page table as a normal, non-hugetlb page table.
+ * Wait for pending gup_fast() in other threads to finish before letting
+ * that happen.
+ */
+ tlb_remove_table_sync_one();
ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
mm_dec_nr_pmds(mm);
return 1;

View File

@@ -0,0 +1,48 @@
From cb42e10062f07934d60ce2a9bc154ea7ac0bab5a Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 2 Jun 2025 10:49:26 -0700
Subject: mm/madvise: handle madvise_lock() failure during race unwinding
When unwinding race on -ERESTARTNOINTR handling of process_madvise(),
madvise_lock() failure is ignored. Check the failure and abort remaining
works in the case.
Link: https://lkml.kernel.org/r/20250602174926.1074-1-sj@kernel.org
Fixes: 4000e3d0a367 ("mm/madvise: remove redundant mmap_lock operations from process_madvise()")
Signed-off-by: SeongJae Park <sj@kernel.org>
Reported-by: Barry Song <21cnbao@gmail.com>
Closes: https://lore.kernel.org/CAGsJ_4xJXXO0G+4BizhohSZ4yDteziPw43_uF8nPXPWxUVChzw@mail.gmail.com
Reviewed-by: Jann Horn <jannh@google.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Barry Song <baohua@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/madvise.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1830,7 +1830,9 @@ static ssize_t vector_madvise(struct mm_
/* Drop and reacquire lock to unwind race. */
madvise_unlock(mm, behavior);
- madvise_lock(mm, behavior);
+ ret = madvise_lock(mm, behavior);
+ if (ret)
+ goto out;
continue;
}
if (ret < 0)
@@ -1839,6 +1841,7 @@ static ssize_t vector_madvise(struct mm_
}
madvise_unlock(mm, behavior);
+out:
ret = (total_len - iov_iter_count(iter)) ? : ret;
return ret;

View File

@@ -0,0 +1,164 @@
From 0aeb6f83ff11709bb4b6fc9afa2f742681ca36e1 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 28 May 2025 10:02:08 +0200
Subject: video: screen_info: Relocate framebuffers behind PCI bridges
Apply PCI host-bridge window offsets to screen_info framebuffers. Fixes
invalid access to I/O memory.
Resources behind a PCI host bridge can be relocated by a certain offset
in the kernel's CPU address range used for I/O. The framebuffer memory
range stored in screen_info refers to the CPU addresses as seen during
boot (where the offset is 0). During boot up, firmware may assign a
different memory offset to the PCI host bridge and thereby relocating
the framebuffer address of the PCI graphics device as seen by the kernel.
The information in screen_info must be updated as well.
The helper pcibios_bus_to_resource() performs the relocation of the
screen_info's framebuffer resource (given in PCI bus addresses). The
result matches the I/O-memory resource of the PCI graphics device (given
in CPU addresses). As before, we store away the information necessary to
later update the information in screen_info itself.
Commit 78aa89d1dfba ("firmware/sysfb: Update screen_info for relocated
EFI framebuffers") added the code for updating screen_info. It is based
on similar functionality that pre-existed in efifb. Efifb uses a pointer
to the PCI resource, while the newer code does a memcpy of the region.
Hence efifb sees any updates to the PCI resource and avoids the issue.
v3:
- Only use struct pci_bus_region for PCI bus addresses (Bjorn)
- Clarify address semantics in commit messages and comments (Bjorn)
v2:
- Fixed tags (Takashi, Ivan)
- Updated information on efifb
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Reported-by: "Ivan T. Ivanov" <iivanov@suse.de>
Closes: https://bugzilla.suse.com/show_bug.cgi?id=1240696
Tested-by: "Ivan T. Ivanov" <iivanov@suse.de>
Fixes: 78aa89d1dfba ("firmware/sysfb: Update screen_info for relocated EFI framebuffers")
Cc: dri-devel@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v6.9+
Link: https://lore.kernel.org/r/20250528080234.7380-1-tzimmermann@suse.de
---
drivers/video/screen_info_pci.c | 79 +++++++++++++++++++++------------
1 file changed, 50 insertions(+), 29 deletions(-)
--- a/drivers/video/screen_info_pci.c
+++ b/drivers/video/screen_info_pci.c
@@ -7,8 +7,8 @@
static struct pci_dev *screen_info_lfb_pdev;
static size_t screen_info_lfb_bar;
-static resource_size_t screen_info_lfb_offset;
-static struct resource screen_info_lfb_res = DEFINE_RES_MEM(0, 0);
+static resource_size_t screen_info_lfb_res_start; // original start of resource
+static resource_size_t screen_info_lfb_offset; // framebuffer offset within resource
static bool __screen_info_relocation_is_valid(const struct screen_info *si, struct resource *pr)
{
@@ -31,7 +31,7 @@ void screen_info_apply_fixups(void)
if (screen_info_lfb_pdev) {
struct resource *pr = &screen_info_lfb_pdev->resource[screen_info_lfb_bar];
- if (pr->start != screen_info_lfb_res.start) {
+ if (pr->start != screen_info_lfb_res_start) {
if (__screen_info_relocation_is_valid(si, pr)) {
/*
* Only update base if we have an actual
@@ -47,46 +47,67 @@ void screen_info_apply_fixups(void)
}
}
+static int __screen_info_lfb_pci_bus_region(const struct screen_info *si, unsigned int type,
+ struct pci_bus_region *r)
+{
+ u64 base, size;
+
+ base = __screen_info_lfb_base(si);
+ if (!base)
+ return -EINVAL;
+
+ size = __screen_info_lfb_size(si, type);
+ if (!size)
+ return -EINVAL;
+
+ r->start = base;
+ r->end = base + size - 1;
+
+ return 0;
+}
+
static void screen_info_fixup_lfb(struct pci_dev *pdev)
{
unsigned int type;
- struct resource res[SCREEN_INFO_MAX_RESOURCES];
- size_t i, numres;
+ struct pci_bus_region bus_region;
int ret;
+ struct resource r = {
+ .flags = IORESOURCE_MEM,
+ };
+ const struct resource *pr;
const struct screen_info *si = &screen_info;
if (screen_info_lfb_pdev)
return; // already found
type = screen_info_video_type(si);
- if (type != VIDEO_TYPE_EFI)
- return; // only applies to EFI
+ if (!__screen_info_has_lfb(type))
+ return; // only applies to EFI; maybe VESA
- ret = screen_info_resources(si, res, ARRAY_SIZE(res));
+ ret = __screen_info_lfb_pci_bus_region(si, type, &bus_region);
if (ret < 0)
return;
- numres = ret;
- for (i = 0; i < numres; ++i) {
- struct resource *r = &res[i];
- const struct resource *pr;
-
- if (!(r->flags & IORESOURCE_MEM))
- continue;
- pr = pci_find_resource(pdev, r);
- if (!pr)
- continue;
-
- /*
- * We've found a PCI device with the framebuffer
- * resource. Store away the parameters to track
- * relocation of the framebuffer aperture.
- */
- screen_info_lfb_pdev = pdev;
- screen_info_lfb_bar = pr - pdev->resource;
- screen_info_lfb_offset = r->start - pr->start;
- memcpy(&screen_info_lfb_res, r, sizeof(screen_info_lfb_res));
- }
+ /*
+ * Translate the PCI bus address to resource. Account
+ * for an offset if the framebuffer is behind a PCI host
+ * bridge.
+ */
+ pcibios_bus_to_resource(pdev->bus, &r, &bus_region);
+
+ pr = pci_find_resource(pdev, &r);
+ if (!pr)
+ return;
+
+ /*
+ * We've found a PCI device with the framebuffer
+ * resource. Store away the parameters to track
+ * relocation of the framebuffer aperture.
+ */
+ screen_info_lfb_pdev = pdev;
+ screen_info_lfb_bar = pr - pdev->resource;
+ screen_info_lfb_offset = r.start - pr->start;
+ screen_info_lfb_res_start = bus_region.start;
}
DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY, 16,
screen_info_fixup_lfb);

View File

@@ -0,0 +1,86 @@
From 06ff725d11ea8713876187973c834fb595cb26f1 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 3 Jun 2025 17:48:20 +0200
Subject: sysfb: Fix screen_info type check for VGA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Use the helper screen_info_video_type() to get the framebuffer
type from struct screen_info. Handle supported values in sorted
switch statement.
Reading orig_video_isVGA is unreliable. On most systems it is a
VIDEO_TYPE_ constant. On some systems with VGA it is simply set
to 1 to signal the presence of a VGA output. See vga_probe() for
an example. Retrieving the screen_info type with the helper
screen_info_video_type() detects these cases and returns the
appropriate VIDEO_TYPE_ constant. For VGA, sysfb creates a device
named "vga-framebuffer".
The sysfb code has been taken from vga16fb, where it likely didn't
work correctly either. With this bugfix applied, vga16fb loads for
compatible vga-framebuffer devices.
Fixes: 0db5b61e0dc0 ("fbdev/vga16fb: Create EGA/VGA devices in sysfb code")
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Tzung-Bi Shih <tzungbi@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: "Uwe Kleine-König" <u.kleine-koenig@baylibre.com>
Cc: Zsolt Kajtar <soci@c64.rulez.org>
Cc: <stable@vger.kernel.org> # v6.1+
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Tzung-Bi Shih <tzungbi@kernel.org>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://lore.kernel.org/r/20250603154838.401882-1-tzimmermann@suse.de
---
drivers/firmware/sysfb.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -143,6 +143,7 @@ static __init int sysfb_init(void)
{
struct screen_info *si = &screen_info;
struct device *parent;
+ unsigned int type;
struct simplefb_platform_data mode;
const char *name;
bool compatible;
@@ -170,17 +171,26 @@ static __init int sysfb_init(void)
goto put_device;
}
+ type = screen_info_video_type(si);
+
/* if the FB is incompatible, create a legacy framebuffer device */
- if (si->orig_video_isVGA == VIDEO_TYPE_EFI)
- name = "efi-framebuffer";
- else if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
- name = "vesa-framebuffer";
- else if (si->orig_video_isVGA == VIDEO_TYPE_VGAC)
- name = "vga-framebuffer";
- else if (si->orig_video_isVGA == VIDEO_TYPE_EGAC)
+ switch (type) {
+ case VIDEO_TYPE_EGAC:
name = "ega-framebuffer";
- else
+ break;
+ case VIDEO_TYPE_VGAC:
+ name = "vga-framebuffer";
+ break;
+ case VIDEO_TYPE_VLFB:
+ name = "vesa-framebuffer";
+ break;
+ case VIDEO_TYPE_EFI:
+ name = "efi-framebuffer";
+ break;
+ default:
name = "platform-framebuffer";
+ break;
+ }
pd = platform_device_alloc(name, 0);
if (!pd) {

View File

@@ -0,0 +1,113 @@
From ba4c83076943b477c90015581cc88e262a7d772f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 26 Feb 2025 16:01:57 +0100
Subject: x86/iopl: Cure TIF_IO_BITMAP inconsistencies
io_bitmap_exit() is invoked from exit_thread() when a task exists or
when a fork fails. In the latter case the exit_thread() cleans up
resources which were allocated during fork().
io_bitmap_exit() invokes task_update_io_bitmap(), which in turn ends up
in tss_update_io_bitmap(). tss_update_io_bitmap() operates on the
current task. If current has TIF_IO_BITMAP set, but no bitmap installed,
tss_update_io_bitmap() crashes with a NULL pointer dereference.
There are two issues, which lead to that problem:
1) io_bitmap_exit() should not invoke task_update_io_bitmap() when
the task, which is cleaned up, is not the current task. That's a
clear indicator for a cleanup after a failed fork().
2) A task should not have TIF_IO_BITMAP set and neither a bitmap
installed nor IOPL emulation level 3 activated.
This happens when a kernel thread is created in the context of
a user space thread, which has TIF_IO_BITMAP set as the thread
flags are copied and the IO bitmap pointer is cleared.
Other than in the failed fork() case this has no impact because
kernel threads including IO workers never return to user space and
therefore never invoke tss_update_io_bitmap().
Cure this by adding the missing cleanups and checks:
1) Prevent io_bitmap_exit() to invoke task_update_io_bitmap() if
the to be cleaned up task is not the current task.
2) Clear TIF_IO_BITMAP in copy_thread() unconditionally. For user
space forks it is set later, when the IO bitmap is inherited in
io_bitmap_share().
For paranoia sake, add a warning into tss_update_io_bitmap() to catch
the case, when that code is invoked with inconsistent state.
Fixes: ea5f1cd7ab49 ("x86/ioperm: Remove bitmap if all permissions dropped")
Reported-by: syzbot+e2b1803445d236442e54@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/87wmdceom2.ffs@tglx
---
arch/x86/kernel/ioport.c | 13 +++++++++----
arch/x86/kernel/process.c | 6 ++++++
2 files changed, 15 insertions(+), 4 deletions(-)
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct
set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
}
-static void task_update_io_bitmap(struct task_struct *tsk)
+static void task_update_io_bitmap(void)
{
+ struct task_struct *tsk = current;
struct thread_struct *t = &tsk->thread;
if (t->iopl_emul == 3 || t->io_bitmap) {
@@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *
struct io_bitmap *iobm = tsk->thread.io_bitmap;
tsk->thread.io_bitmap = NULL;
- task_update_io_bitmap(tsk);
+ /*
+ * Don't touch the TSS when invoked on a failed fork(). TSS
+ * reflects the state of @current and not the state of @tsk.
+ */
+ if (tsk == current)
+ task_update_io_bitmap();
if (iobm && refcount_dec_and_test(&iobm->refcnt))
kfree(iobm);
}
@@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, leve
}
t->iopl_emul = level;
- task_update_io_bitmap(current);
-
+ task_update_io_bitmap();
return 0;
}
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -181,6 +181,7 @@ int copy_thread(struct task_struct *p, c
frame->ret_addr = (unsigned long) ret_from_fork_asm;
p->thread.sp = (unsigned long) fork_frame;
p->thread.io_bitmap = NULL;
+ clear_tsk_thread_flag(p, TIF_IO_BITMAP);
p->thread.iopl_warn = 0;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -469,6 +470,11 @@ void native_tss_update_io_bitmap(void)
} else {
struct io_bitmap *iobm = t->io_bitmap;
+ if (WARN_ON_ONCE(!iobm)) {
+ clear_thread_flag(TIF_IO_BITMAP);
+ native_tss_invalidate_io_bitmap();
+ }
+
/*
* Only copy bitmap data when the sequence number differs. The
* update time is accounted to the incoming task.

Some files were not shown because too many files have changed in this diff Show More