add 3rd party/custom patches
3rd patchs (in alphabetical order): - bbr3 - ntsync5 - openwrt - pf-kernel - xanmod - zen no configuration changes for now
This commit is contained in:
1183
debian/patches/patchset-zen/sauce/0001-ZEN-Add-VHBA-driver.patch
vendored
Normal file
1183
debian/patches/patchset-zen/sauce/0001-ZEN-Add-VHBA-driver.patch
vendored
Normal file
File diff suppressed because it is too large
Load Diff
35
debian/patches/patchset-zen/sauce/0002-vhba-Fix-compat-with-kernel-6.11.patch
vendored
Normal file
35
debian/patches/patchset-zen/sauce/0002-vhba-Fix-compat-with-kernel-6.11.patch
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
From 6df7338351c342060088aa9abd561b81ccc113d2 Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
|
||||
Date: Sun, 15 Sep 2024 19:05:46 +0000
|
||||
Subject: vhba: Fix compat with kernel 6.11
|
||||
|
||||
Upstream commit 0edb555a65d1ef047a9805051c36922b52a38a9d changed the
|
||||
return value of the `remove` callback from `int` to `void`.
|
||||
---
|
||||
drivers/scsi/vhba/vhba.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
--- a/drivers/scsi/vhba/vhba.c
|
||||
+++ b/drivers/scsi/vhba/vhba.c
|
||||
@@ -1049,7 +1049,11 @@ static int vhba_probe (struct platform_d
|
||||
return 0;
|
||||
}
|
||||
|
||||
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0)
|
||||
static int vhba_remove (struct platform_device *pdev)
|
||||
+#else
|
||||
+static void vhba_remove (struct platform_device *pdev)
|
||||
+#endif
|
||||
{
|
||||
struct vhba_host *vhost;
|
||||
struct Scsi_Host *shost;
|
||||
@@ -1062,7 +1066,9 @@ static int vhba_remove (struct platform_
|
||||
|
||||
kfree(vhost->commands);
|
||||
|
||||
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0)
|
||||
return 0;
|
||||
+#endif
|
||||
}
|
||||
|
||||
static void vhba_release (struct device * dev)
|
626
debian/patches/patchset-zen/sauce/0003-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch
vendored
Normal file
626
debian/patches/patchset-zen/sauce/0003-ZEN-PCI-Add-Intel-remapped-NVMe-device-support.patch
vendored
Normal file
@@ -0,0 +1,626 @@
|
||||
From 567908cc05dc5e02b1b9c26620bce3791559f9d4 Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Drake <drake@endlessm.com>
|
||||
Date: Tue, 4 Jun 2019 14:51:21 +0800
|
||||
Subject: ZEN: PCI: Add Intel remapped NVMe device support
|
||||
|
||||
Contains:
|
||||
- PCI: Add Intel remapped NVMe device support
|
||||
|
||||
Consumer products that are configured by default to run the Intel SATA AHCI
|
||||
controller in "RAID" or "Intel RST Premium With Intel Optane System
|
||||
Acceleration" mode are becoming increasingly prevalent.
|
||||
|
||||
Unde this mode, NVMe devices are remapped into the SATA device and become
|
||||
hidden from the PCI bus, which means that Linux users cannot access their
|
||||
storage devices unless they go into the firmware setup menu to revert back
|
||||
to AHCI mode - assuming such option is available. Lack of support for this
|
||||
mode is also causing complications for vendors who distribute Linux.
|
||||
|
||||
Add support for the remapped NVMe mode by creating a virtual PCI bus,
|
||||
where the AHCI and NVMe devices are presented separately, allowing the
|
||||
ahci and nvme drivers to bind in the normal way.
|
||||
|
||||
Unfortunately the NVMe device configuration space is inaccesible under
|
||||
this scheme, so we provide a fake one, and hope that no DeviceID-based
|
||||
quirks are needed. The interrupt is shared between the AHCI and NVMe
|
||||
devices.
|
||||
|
||||
Allow pci_real_dma_dev() to traverse back to the real DMA device from
|
||||
the PCI devices created on our virtual bus, in case the iommu driver
|
||||
will be involved with data transfers here.
|
||||
|
||||
The existing ahci driver is modified to not claim devices where remapped
|
||||
NVMe devices are present, allowing this new driver to step in.
|
||||
|
||||
The details of the remapping scheme came from patches previously
|
||||
posted by Dan Williams and the resulting discussion.
|
||||
|
||||
https://phabricator.endlessm.com/T24358
|
||||
https://phabricator.endlessm.com/T29119
|
||||
|
||||
Signed-off-by: Daniel Drake <drake@endlessm.com>
|
||||
|
||||
- PCI: Fix order of remapped NVMe devices
|
||||
---
|
||||
arch/x86/include/asm/pci.h | 6 +
|
||||
arch/x86/pci/common.c | 7 +-
|
||||
drivers/ata/ahci.c | 23 +-
|
||||
drivers/pci/controller/Makefile | 6 +
|
||||
drivers/pci/controller/intel-nvme-remap.c | 462 ++++++++++++++++++++++
|
||||
5 files changed, 488 insertions(+), 16 deletions(-)
|
||||
create mode 100644 drivers/pci/controller/intel-nvme-remap.c
|
||||
|
||||
--- a/arch/x86/include/asm/pci.h
|
||||
+++ b/arch/x86/include/asm/pci.h
|
||||
@@ -26,6 +26,7 @@ struct pci_sysdata {
|
||||
#if IS_ENABLED(CONFIG_VMD)
|
||||
struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */
|
||||
#endif
|
||||
+ struct pci_dev *nvme_remap_dev; /* AHCI Device if NVME remapped bus */
|
||||
};
|
||||
|
||||
extern int pci_routeirq;
|
||||
@@ -69,6 +70,11 @@ static inline bool is_vmd(struct pci_bus
|
||||
#define is_vmd(bus) false
|
||||
#endif /* CONFIG_VMD */
|
||||
|
||||
+static inline bool is_nvme_remap(struct pci_bus *bus)
|
||||
+{
|
||||
+ return to_pci_sysdata(bus)->nvme_remap_dev != NULL;
|
||||
+}
|
||||
+
|
||||
/* Can be used to override the logic in pci_scan_bus for skipping
|
||||
already-configured bus numbers - to be used for buggy BIOSes
|
||||
or architectures with incomplete PCI setup by the loader */
|
||||
--- a/arch/x86/pci/common.c
|
||||
+++ b/arch/x86/pci/common.c
|
||||
@@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-#if IS_ENABLED(CONFIG_VMD)
|
||||
struct pci_dev *pci_real_dma_dev(struct pci_dev *dev)
|
||||
{
|
||||
+#if IS_ENABLED(CONFIG_VMD)
|
||||
if (is_vmd(dev->bus))
|
||||
return to_pci_sysdata(dev->bus)->vmd_dev;
|
||||
+#endif
|
||||
+
|
||||
+ if (is_nvme_remap(dev->bus))
|
||||
+ return to_pci_sysdata(dev->bus)->nvme_remap_dev;
|
||||
|
||||
return dev;
|
||||
}
|
||||
-#endif
|
||||
--- a/drivers/ata/ahci.c
|
||||
+++ b/drivers/ata/ahci.c
|
||||
@@ -1618,7 +1618,7 @@ static irqreturn_t ahci_thunderx_irq_han
|
||||
}
|
||||
#endif
|
||||
|
||||
-static void ahci_remap_check(struct pci_dev *pdev, int bar,
|
||||
+static int ahci_remap_check(struct pci_dev *pdev, int bar,
|
||||
struct ahci_host_priv *hpriv)
|
||||
{
|
||||
int i;
|
||||
@@ -1631,7 +1631,7 @@ static void ahci_remap_check(struct pci_
|
||||
pci_resource_len(pdev, bar) < SZ_512K ||
|
||||
bar != AHCI_PCI_BAR_STANDARD ||
|
||||
!(readl(hpriv->mmio + AHCI_VSCAP) & 1))
|
||||
- return;
|
||||
+ return 0;
|
||||
|
||||
cap = readq(hpriv->mmio + AHCI_REMAP_CAP);
|
||||
for (i = 0; i < AHCI_MAX_REMAP; i++) {
|
||||
@@ -1646,18 +1646,11 @@ static void ahci_remap_check(struct pci_
|
||||
}
|
||||
|
||||
if (!hpriv->remapped_nvme)
|
||||
- return;
|
||||
+ return 0;
|
||||
|
||||
- dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n",
|
||||
- hpriv->remapped_nvme);
|
||||
- dev_warn(&pdev->dev,
|
||||
- "Switch your BIOS from RAID to AHCI mode to use them.\n");
|
||||
-
|
||||
- /*
|
||||
- * Don't rely on the msi-x capability in the remap case,
|
||||
- * share the legacy interrupt across ahci and remapped devices.
|
||||
- */
|
||||
- hpriv->flags |= AHCI_HFLAG_NO_MSI;
|
||||
+ /* Abort probe, allowing intel-nvme-remap to step in when available */
|
||||
+ dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n");
|
||||
+ return -ENODEV;
|
||||
}
|
||||
|
||||
static int ahci_get_irq_vector(struct ata_host *host, int port)
|
||||
@@ -1896,7 +1889,9 @@ static int ahci_init_one(struct pci_dev
|
||||
hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
|
||||
|
||||
/* detect remapped nvme devices */
|
||||
- ahci_remap_check(pdev, ahci_pci_bar, hpriv);
|
||||
+ rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv);
|
||||
+ if (rc)
|
||||
+ return rc;
|
||||
|
||||
sysfs_add_file_to_group(&pdev->dev.kobj,
|
||||
&dev_attr_remapped_nvme.attr,
|
||||
--- a/drivers/pci/controller/Makefile
|
||||
+++ b/drivers/pci/controller/Makefile
|
||||
@@ -1,4 +1,10 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
+ifdef CONFIG_X86_64
|
||||
+ifdef CONFIG_SATA_AHCI
|
||||
+obj-y += intel-nvme-remap.o
|
||||
+endif
|
||||
+endif
|
||||
+
|
||||
obj-$(CONFIG_PCIE_CADENCE) += cadence/
|
||||
obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
|
||||
obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o
|
||||
--- /dev/null
|
||||
+++ b/drivers/pci/controller/intel-nvme-remap.c
|
||||
@@ -0,0 +1,462 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+/*
|
||||
+ * Intel remapped NVMe device support.
|
||||
+ *
|
||||
+ * Copyright (c) 2019 Endless Mobile, Inc.
|
||||
+ * Author: Daniel Drake <drake@endlessm.com>
|
||||
+ *
|
||||
+ * Some products ship by default with the SATA controller in "RAID" or
|
||||
+ * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this
|
||||
+ * mode, which we refer to as "remapped NVMe" mode, any installed NVMe
|
||||
+ * devices disappear from the PCI bus, and instead their I/O memory becomes
|
||||
+ * available within the AHCI device BARs.
|
||||
+ *
|
||||
+ * This scheme is understood to be a way of avoiding usage of the standard
|
||||
+ * Windows NVMe driver under that OS, instead mandating usage of Intel's
|
||||
+ * driver instead, which has better power management, and presumably offers
|
||||
+ * some RAID/disk-caching solutions too.
|
||||
+ *
|
||||
+ * Here in this driver, we support the remapped NVMe mode by claiming the
|
||||
+ * AHCI device and creating a fake PCIe root port. On the new bus, the
|
||||
+ * original AHCI device is exposed with only minor tweaks. Then, fake PCI
|
||||
+ * devices corresponding to the remapped NVMe devices are created. The usual
|
||||
+ * ahci and nvme drivers are then expected to bind to these devices and
|
||||
+ * operate as normal.
|
||||
+ *
|
||||
+ * The PCI configuration space for the NVMe devices is completely
|
||||
+ * unavailable, so we fake a minimal one and hope for the best.
|
||||
+ *
|
||||
+ * Interrupts are shared between the AHCI and NVMe devices. For simplicity,
|
||||
+ * we only support the legacy interrupt here, although MSI support
|
||||
+ * could potentially be added later.
|
||||
+ */
|
||||
+
|
||||
+#define MODULE_NAME "intel-nvme-remap"
|
||||
+
|
||||
+#include <linux/ahci-remap.h>
|
||||
+#include <linux/irq.h>
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/pci.h>
|
||||
+
|
||||
+#define AHCI_PCI_BAR_STANDARD 5
|
||||
+
|
||||
+struct nvme_remap_dev {
|
||||
+ struct pci_dev *dev; /* AHCI device */
|
||||
+ struct pci_bus *bus; /* our fake PCI bus */
|
||||
+ struct pci_sysdata sysdata;
|
||||
+ int irq_base; /* our fake interrupts */
|
||||
+
|
||||
+ /*
|
||||
+ * When we detect an all-ones write to a BAR register, this flag
|
||||
+ * is set, so that we return the BAR size on the next read (a
|
||||
+ * standard PCI behaviour).
|
||||
+ * This includes the assumption that an all-ones BAR write is
|
||||
+ * immediately followed by a read of the same register.
|
||||
+ */
|
||||
+ bool bar_sizing;
|
||||
+
|
||||
+ /*
|
||||
+ * Resources copied from the AHCI device, to be regarded as
|
||||
+ * resources on our fake bus.
|
||||
+ */
|
||||
+ struct resource ahci_resources[PCI_NUM_RESOURCES];
|
||||
+
|
||||
+ /* Resources corresponding to the NVMe devices. */
|
||||
+ struct resource remapped_dev_mem[AHCI_MAX_REMAP];
|
||||
+
|
||||
+ /* Number of remapped NVMe devices found. */
|
||||
+ int num_remapped_devices;
|
||||
+};
|
||||
+
|
||||
+static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus)
|
||||
+{
|
||||
+ return container_of(bus->sysdata, struct nvme_remap_dev, sysdata);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/******** PCI configuration space **********/
|
||||
+
|
||||
+/*
|
||||
+ * Helper macros for tweaking returned contents of PCI configuration space.
|
||||
+ *
|
||||
+ * value contains len bytes of data read from reg.
|
||||
+ * If fixup_reg is included in that range, fix up the contents of that
|
||||
+ * register to fixed_value.
|
||||
+ */
|
||||
+#define NR_FIX8(fixup_reg, fixed_value) do { \
|
||||
+ if (reg <= fixup_reg && fixup_reg < reg + len) \
|
||||
+ ((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define NR_FIX16(fixup_reg, fixed_value) do { \
|
||||
+ NR_FIX8(fixup_reg, fixed_value); \
|
||||
+ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define NR_FIX24(fixup_reg, fixed_value) do { \
|
||||
+ NR_FIX8(fixup_reg, fixed_value); \
|
||||
+ NR_FIX8(fixup_reg + 1, fixed_value >> 8); \
|
||||
+ NR_FIX8(fixup_reg + 2, fixed_value >> 16); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define NR_FIX32(fixup_reg, fixed_value) do { \
|
||||
+ NR_FIX16(fixup_reg, (u16) fixed_value); \
|
||||
+ NR_FIX16(fixup_reg + 2, fixed_value >> 16); \
|
||||
+ } while (0)
|
||||
+
|
||||
+/*
|
||||
+ * Read PCI config space of the slot 0 (AHCI) device.
|
||||
+ * We pass through the read request to the underlying device, but
|
||||
+ * tweak the results in some cases.
|
||||
+ */
|
||||
+static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg,
|
||||
+ int len, u32 *value)
|
||||
+{
|
||||
+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
|
||||
+ struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn,
|
||||
+ reg, len, value);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ /*
|
||||
+ * Adjust the device class, to prevent this driver from attempting to
|
||||
+ * additionally probe the device we're simulating here.
|
||||
+ */
|
||||
+ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI);
|
||||
+
|
||||
+ /*
|
||||
+ * Unset interrupt pin, otherwise ACPI tries to find routing
|
||||
+ * info for our virtual IRQ, fails, and complains.
|
||||
+ */
|
||||
+ NR_FIX8(PCI_INTERRUPT_PIN, 0);
|
||||
+
|
||||
+ /*
|
||||
+ * Truncate the AHCI BAR to not include the region that covers the
|
||||
+ * hidden devices. This will cause the ahci driver to successfully
|
||||
+ * probe th new device (instead of handing it over to this driver).
|
||||
+ */
|
||||
+ if (nrdev->bar_sizing) {
|
||||
+ NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1));
|
||||
+ nrdev->bar_sizing = false;
|
||||
+ }
|
||||
+
|
||||
+ return PCIBIOS_SUCCESSFUL;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Read PCI config space of a remapped device.
|
||||
+ * Since the original PCI config space is inaccessible, we provide a minimal,
|
||||
+ * fake config space instead.
|
||||
+ */
|
||||
+static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port,
|
||||
+ int reg, int len, u32 *value)
|
||||
+{
|
||||
+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
|
||||
+ struct resource *remapped_mem;
|
||||
+
|
||||
+ if (port > nrdev->num_remapped_devices)
|
||||
+ return PCIBIOS_DEVICE_NOT_FOUND;
|
||||
+
|
||||
+ *value = 0;
|
||||
+ remapped_mem = &nrdev->remapped_dev_mem[port - 1];
|
||||
+
|
||||
+ /* Set a Vendor ID, otherwise Linux assumes no device is present */
|
||||
+ NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL);
|
||||
+
|
||||
+ /* Always appear on & bus mastering */
|
||||
+ NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
|
||||
+
|
||||
+ /* Set class so that nvme driver probes us */
|
||||
+ NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS);
|
||||
+
|
||||
+ if (nrdev->bar_sizing) {
|
||||
+ NR_FIX32(PCI_BASE_ADDRESS_0,
|
||||
+ ~(resource_size(remapped_mem) - 1));
|
||||
+ nrdev->bar_sizing = false;
|
||||
+ } else {
|
||||
+ resource_size_t mem_start = remapped_mem->start;
|
||||
+
|
||||
+ mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64;
|
||||
+ NR_FIX32(PCI_BASE_ADDRESS_0, mem_start);
|
||||
+ mem_start >>= 32;
|
||||
+ NR_FIX32(PCI_BASE_ADDRESS_1, mem_start);
|
||||
+ }
|
||||
+
|
||||
+ return PCIBIOS_SUCCESSFUL;
|
||||
+}
|
||||
+
|
||||
+/* Read PCI configuration space. */
|
||||
+static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn,
|
||||
+ int reg, int len, u32 *value)
|
||||
+{
|
||||
+ if (PCI_SLOT(devfn) == 0)
|
||||
+ return nvme_remap_pci_read_slot0(bus, reg, len, value);
|
||||
+ else
|
||||
+ return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn),
|
||||
+ reg, len, value);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Write PCI config space of the slot 0 (AHCI) device.
|
||||
+ * Apart from the special case of BAR sizing, we disable all writes.
|
||||
+ * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master)
|
||||
+ * that would affect the operation of the NVMe devices.
|
||||
+ */
|
||||
+static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg,
|
||||
+ int len, u32 value)
|
||||
+{
|
||||
+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
|
||||
+ struct pci_bus *ahci_dev_bus = nrdev->dev->bus;
|
||||
+
|
||||
+ if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) {
|
||||
+ /*
|
||||
+ * Writing all-ones to a BAR means that the size of the
|
||||
+ * memory region is being checked. Flag this so that we can
|
||||
+ * reply with an appropriate size on the next read.
|
||||
+ */
|
||||
+ if (value == ~0)
|
||||
+ nrdev->bar_sizing = true;
|
||||
+
|
||||
+ return ahci_dev_bus->ops->write(ahci_dev_bus,
|
||||
+ nrdev->dev->devfn,
|
||||
+ reg, len, value);
|
||||
+ }
|
||||
+
|
||||
+ return PCIBIOS_SET_FAILED;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Write PCI config space of a remapped device.
|
||||
+ * Since the original PCI config space is inaccessible, we reject all
|
||||
+ * writes, except for the special case of BAR probing.
|
||||
+ */
|
||||
+static int nvme_remap_pci_write_remapped(struct pci_bus *bus,
|
||||
+ unsigned int port,
|
||||
+ int reg, int len, u32 value)
|
||||
+{
|
||||
+ struct nvme_remap_dev *nrdev = nrdev_from_bus(bus);
|
||||
+
|
||||
+ if (port > nrdev->num_remapped_devices)
|
||||
+ return PCIBIOS_DEVICE_NOT_FOUND;
|
||||
+
|
||||
+ /*
|
||||
+ * Writing all-ones to a BAR means that the size of the memory
|
||||
+ * region is being checked. Flag this so that we can reply with
|
||||
+ * an appropriate size on the next read.
|
||||
+ */
|
||||
+ if (value == ~0 && reg >= PCI_BASE_ADDRESS_0
|
||||
+ && reg <= PCI_BASE_ADDRESS_5) {
|
||||
+ nrdev->bar_sizing = true;
|
||||
+ return PCIBIOS_SUCCESSFUL;
|
||||
+ }
|
||||
+
|
||||
+ return PCIBIOS_SET_FAILED;
|
||||
+}
|
||||
+
|
||||
+/* Write PCI configuration space. */
|
||||
+static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn,
|
||||
+ int reg, int len, u32 value)
|
||||
+{
|
||||
+ if (PCI_SLOT(devfn) == 0)
|
||||
+ return nvme_remap_pci_write_slot0(bus, reg, len, value);
|
||||
+ else
|
||||
+ return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn),
|
||||
+ reg, len, value);
|
||||
+}
|
||||
+
|
||||
+static struct pci_ops nvme_remap_pci_ops = {
|
||||
+ .read = nvme_remap_pci_read,
|
||||
+ .write = nvme_remap_pci_write,
|
||||
+};
|
||||
+
|
||||
+
|
||||
+/******** Initialization & exit **********/
|
||||
+
|
||||
+/*
|
||||
+ * Find a PCI domain ID to use for our fake bus.
|
||||
+ * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits).
|
||||
+ */
|
||||
+static int find_free_domain(void)
|
||||
+{
|
||||
+ int domain = 0xffff;
|
||||
+ struct pci_bus *bus = NULL;
|
||||
+
|
||||
+ while ((bus = pci_find_next_bus(bus)) != NULL)
|
||||
+ domain = max_t(int, domain, pci_domain_nr(bus));
|
||||
+
|
||||
+ return domain + 1;
|
||||
+}
|
||||
+
|
||||
+static int find_remapped_devices(struct nvme_remap_dev *nrdev,
|
||||
+ struct list_head *resources)
|
||||
+{
|
||||
+ void __iomem *mmio;
|
||||
+ int i, count = 0;
|
||||
+ u32 cap;
|
||||
+
|
||||
+ mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD,
|
||||
+ pci_resource_len(nrdev->dev,
|
||||
+ AHCI_PCI_BAR_STANDARD));
|
||||
+ if (!mmio)
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ /* Check if this device might have remapped nvme devices. */
|
||||
+ if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K ||
|
||||
+ !(readl(mmio + AHCI_VSCAP) & 1))
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ cap = readq(mmio + AHCI_REMAP_CAP);
|
||||
+ for (i = AHCI_MAX_REMAP-1; i >= 0; i--) {
|
||||
+ struct resource *remapped_mem;
|
||||
+
|
||||
+ if ((cap & (1 << i)) == 0)
|
||||
+ continue;
|
||||
+ if (readl(mmio + ahci_remap_dcc(i))
|
||||
+ != PCI_CLASS_STORAGE_EXPRESS)
|
||||
+ continue;
|
||||
+
|
||||
+ /* We've found a remapped device */
|
||||
+ remapped_mem = &nrdev->remapped_dev_mem[count++];
|
||||
+ remapped_mem->start =
|
||||
+ pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD)
|
||||
+ + ahci_remap_base(i);
|
||||
+ remapped_mem->end = remapped_mem->start
|
||||
+ + AHCI_REMAP_N_SIZE - 1;
|
||||
+ remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED;
|
||||
+ pci_add_resource(resources, remapped_mem);
|
||||
+ }
|
||||
+
|
||||
+ pcim_iounmap(nrdev->dev, mmio);
|
||||
+
|
||||
+ if (count == 0)
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ nrdev->num_remapped_devices = count;
|
||||
+ dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n",
|
||||
+ nrdev->num_remapped_devices);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void nvme_remap_remove_root_bus(void *data)
|
||||
+{
|
||||
+ struct pci_bus *bus = data;
|
||||
+
|
||||
+ pci_stop_root_bus(bus);
|
||||
+ pci_remove_root_bus(bus);
|
||||
+}
|
||||
+
|
||||
+static int nvme_remap_probe(struct pci_dev *dev,
|
||||
+ const struct pci_device_id *id)
|
||||
+{
|
||||
+ struct nvme_remap_dev *nrdev;
|
||||
+ LIST_HEAD(resources);
|
||||
+ int i;
|
||||
+ int ret;
|
||||
+ struct pci_dev *child;
|
||||
+
|
||||
+ nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL);
|
||||
+ nrdev->sysdata.domain = find_free_domain();
|
||||
+ nrdev->sysdata.nvme_remap_dev = dev;
|
||||
+ nrdev->dev = dev;
|
||||
+ pci_set_drvdata(dev, nrdev);
|
||||
+
|
||||
+ ret = pcim_enable_device(dev);
|
||||
+ if (ret < 0)
|
||||
+ return ret;
|
||||
+
|
||||
+ pci_set_master(dev);
|
||||
+
|
||||
+ ret = find_remapped_devices(nrdev, &resources);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ /* Add resources from the original AHCI device */
|
||||
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
|
||||
+ struct resource *res = &dev->resource[i];
|
||||
+
|
||||
+ if (res->start) {
|
||||
+ struct resource *nr_res = &nrdev->ahci_resources[i];
|
||||
+
|
||||
+ nr_res->start = res->start;
|
||||
+ nr_res->end = res->end;
|
||||
+ nr_res->flags = res->flags;
|
||||
+ pci_add_resource(&resources, nr_res);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Create virtual interrupts */
|
||||
+ nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0,
|
||||
+ nrdev->num_remapped_devices + 1,
|
||||
+ 0);
|
||||
+ if (nrdev->irq_base < 0)
|
||||
+ return nrdev->irq_base;
|
||||
+
|
||||
+ /* Create and populate PCI bus */
|
||||
+ nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops,
|
||||
+ &nrdev->sysdata, &resources);
|
||||
+ if (!nrdev->bus)
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus,
|
||||
+ nrdev->bus))
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ /* We don't support sharing MSI interrupts between these devices */
|
||||
+ nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
|
||||
+
|
||||
+ pci_scan_child_bus(nrdev->bus);
|
||||
+
|
||||
+ list_for_each_entry(child, &nrdev->bus->devices, bus_list) {
|
||||
+ /*
|
||||
+ * Prevent PCI core from trying to move memory BARs around.
|
||||
+ * The hidden NVMe devices are at fixed locations.
|
||||
+ */
|
||||
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
|
||||
+ struct resource *res = &child->resource[i];
|
||||
+
|
||||
+ if (res->flags & IORESOURCE_MEM)
|
||||
+ res->flags |= IORESOURCE_PCI_FIXED;
|
||||
+ }
|
||||
+
|
||||
+ /* Share the legacy IRQ between all devices */
|
||||
+ child->irq = dev->irq;
|
||||
+ }
|
||||
+
|
||||
+ pci_assign_unassigned_bus_resources(nrdev->bus);
|
||||
+ pci_bus_add_devices(nrdev->bus);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static const struct pci_device_id nvme_remap_ids[] = {
|
||||
+ /*
|
||||
+ * Match all Intel RAID controllers.
|
||||
+ *
|
||||
+ * There's overlap here with the set of devices detected by the ahci
|
||||
+ * driver, but ahci will only successfully probe when there
|
||||
+ * *aren't* any remapped NVMe devices, and this driver will only
|
||||
+ * successfully probe when there *are* remapped NVMe devices that
|
||||
+ * need handling.
|
||||
+ */
|
||||
+ {
|
||||
+ PCI_VDEVICE(INTEL, PCI_ANY_ID),
|
||||
+ .class = PCI_CLASS_STORAGE_RAID << 8,
|
||||
+ .class_mask = 0xffffff00,
|
||||
+ },
|
||||
+ {0,}
|
||||
+};
|
||||
+MODULE_DEVICE_TABLE(pci, nvme_remap_ids);
|
||||
+
|
||||
+static struct pci_driver nvme_remap_drv = {
|
||||
+ .name = MODULE_NAME,
|
||||
+ .id_table = nvme_remap_ids,
|
||||
+ .probe = nvme_remap_probe,
|
||||
+};
|
||||
+module_pci_driver(nvme_remap_drv);
|
||||
+
|
||||
+MODULE_AUTHOR("Daniel Drake <drake@endlessm.com>");
|
||||
+MODULE_LICENSE("GPL v2");
|
29
debian/patches/patchset-zen/sauce/0004-ZEN-Disable-stack-conservation-for-GCC.patch
vendored
Normal file
29
debian/patches/patchset-zen/sauce/0004-ZEN-Disable-stack-conservation-for-GCC.patch
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
From 89a4975b413afa5f591c7a18109d35b5e848b582 Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Sun, 8 Mar 2020 00:31:35 -0800
|
||||
Subject: ZEN: Disable stack conservation for GCC
|
||||
|
||||
There's plenty of room on the stack for a few more inlined bytes here
|
||||
and there. The measured stack usage at runtime is still safe without
|
||||
this, and performance is surely improved at a microscopic level, so
|
||||
remove it.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
---
|
||||
Makefile | 5 -----
|
||||
1 file changed, 5 deletions(-)
|
||||
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -1003,11 +1003,6 @@ KBUILD_CFLAGS += -fno-strict-overflow
|
||||
# Make sure -fstack-check isn't enabled (like gentoo apparently did)
|
||||
KBUILD_CFLAGS += -fno-stack-check
|
||||
|
||||
-# conserve stack if available
|
||||
-ifdef CONFIG_CC_IS_GCC
|
||||
-KBUILD_CFLAGS += -fconserve-stack
|
||||
-endif
|
||||
-
|
||||
# change __FILE__ to the relative path from the srctree
|
||||
KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
|
||||
|
43
debian/patches/patchset-zen/sauce/0005-ZEN-Initialize-ata-before-graphics.patch
vendored
Normal file
43
debian/patches/patchset-zen/sauce/0005-ZEN-Initialize-ata-before-graphics.patch
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
From 5f2e6f795ce9908851acb20ab03af6550ae54f3b Mon Sep 17 00:00:00 2001
|
||||
From: Arjan van de Ven <arjan@linux.intel.com>
|
||||
Date: Thu, 2 Jun 2016 23:36:32 -0500
|
||||
Subject: ZEN: Initialize ata before graphics
|
||||
|
||||
ATA init is the long pole in the boot process, and its asynchronous.
|
||||
move the graphics init after it so that ata and graphics initialize
|
||||
in parallel
|
||||
---
|
||||
drivers/Makefile | 13 +++++++------
|
||||
1 file changed, 7 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/Makefile
|
||||
+++ b/drivers/Makefile
|
||||
@@ -61,14 +61,8 @@ obj-y += char/
|
||||
# iommu/ comes before gpu as gpu are using iommu controllers
|
||||
obj-y += iommu/
|
||||
|
||||
-# gpu/ comes after char for AGP vs DRM startup and after iommu
|
||||
-obj-y += gpu/
|
||||
-
|
||||
obj-$(CONFIG_CONNECTOR) += connector/
|
||||
|
||||
-# i810fb depends on char/agp/
|
||||
-obj-$(CONFIG_FB_I810) += video/fbdev/i810/
|
||||
-
|
||||
obj-$(CONFIG_PARPORT) += parport/
|
||||
obj-y += base/ block/ misc/ mfd/ nfc/
|
||||
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
|
||||
@@ -80,6 +74,13 @@ obj-y += macintosh/
|
||||
obj-y += scsi/
|
||||
obj-y += nvme/
|
||||
obj-$(CONFIG_ATA) += ata/
|
||||
+
|
||||
+# gpu/ comes after char for AGP vs DRM startup and after iommu
|
||||
+obj-y += gpu/
|
||||
+
|
||||
+# i810fb depends on char/agp/
|
||||
+obj-$(CONFIG_FB_I810) += video/fbdev/i810/
|
||||
+
|
||||
obj-$(CONFIG_TARGET_CORE) += target/
|
||||
obj-$(CONFIG_MTD) += mtd/
|
||||
obj-$(CONFIG_SPI) += spi/
|
105
debian/patches/patchset-zen/sauce/0006-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch
vendored
Normal file
105
debian/patches/patchset-zen/sauce/0006-ZEN-Input-evdev-use-call_rcu-when-detaching-client.patch
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
From 3d92c251c04b1b4c6363018220af42ec3a294d1e Mon Sep 17 00:00:00 2001
|
||||
From: Kenny Levinsen <kl@kl.wtf>
|
||||
Date: Sun, 27 Dec 2020 14:43:13 +0000
|
||||
Subject: ZEN: Input: evdev - use call_rcu when detaching client
|
||||
|
||||
Significant time was spent on synchronize_rcu in evdev_detach_client
|
||||
when applications closed evdev devices. Switching VT away from a
|
||||
graphical environment commonly leads to mass input device closures,
|
||||
which could lead to noticable delays on systems with many input devices.
|
||||
|
||||
Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev
|
||||
client struct till after the RCU grace period instead of blocking the
|
||||
calling application.
|
||||
|
||||
While this does not solve all slow evdev fd closures, it takes care of a
|
||||
good portion of them, including this simple test:
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int idx, fd;
|
||||
const char *path = "/dev/input/event0";
|
||||
for (idx = 0; idx < 1000; idx++) {
|
||||
if ((fd = open(path, O_RDWR)) == -1) {
|
||||
return -1;
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Time to completion of above test when run locally:
|
||||
|
||||
Before: 0m27.111s
|
||||
After: 0m0.018s
|
||||
|
||||
Signed-off-by: Kenny Levinsen <kl@kl.wtf>
|
||||
---
|
||||
drivers/input/evdev.c | 19 +++++++++++--------
|
||||
1 file changed, 11 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/drivers/input/evdev.c
|
||||
+++ b/drivers/input/evdev.c
|
||||
@@ -46,6 +46,7 @@ struct evdev_client {
|
||||
struct fasync_struct *fasync;
|
||||
struct evdev *evdev;
|
||||
struct list_head node;
|
||||
+ struct rcu_head rcu;
|
||||
enum input_clock_type clk_type;
|
||||
bool revoked;
|
||||
unsigned long *evmasks[EV_CNT];
|
||||
@@ -368,13 +369,22 @@ static void evdev_attach_client(struct e
|
||||
spin_unlock(&evdev->client_lock);
|
||||
}
|
||||
|
||||
+static void evdev_reclaim_client(struct rcu_head *rp)
|
||||
+{
|
||||
+ struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
|
||||
+ unsigned int i;
|
||||
+ for (i = 0; i < EV_CNT; ++i)
|
||||
+ bitmap_free(client->evmasks[i]);
|
||||
+ kvfree(client);
|
||||
+}
|
||||
+
|
||||
static void evdev_detach_client(struct evdev *evdev,
|
||||
struct evdev_client *client)
|
||||
{
|
||||
spin_lock(&evdev->client_lock);
|
||||
list_del_rcu(&client->node);
|
||||
spin_unlock(&evdev->client_lock);
|
||||
- synchronize_rcu();
|
||||
+ call_rcu(&client->rcu, evdev_reclaim_client);
|
||||
}
|
||||
|
||||
static int evdev_open_device(struct evdev *evdev)
|
||||
@@ -427,7 +437,6 @@ static int evdev_release(struct inode *i
|
||||
{
|
||||
struct evdev_client *client = file->private_data;
|
||||
struct evdev *evdev = client->evdev;
|
||||
- unsigned int i;
|
||||
|
||||
mutex_lock(&evdev->mutex);
|
||||
|
||||
@@ -439,11 +448,6 @@ static int evdev_release(struct inode *i
|
||||
|
||||
evdev_detach_client(evdev, client);
|
||||
|
||||
- for (i = 0; i < EV_CNT; ++i)
|
||||
- bitmap_free(client->evmasks[i]);
|
||||
-
|
||||
- kvfree(client);
|
||||
-
|
||||
evdev_close_device(evdev);
|
||||
|
||||
return 0;
|
||||
@@ -486,7 +490,6 @@ static int evdev_open(struct inode *inod
|
||||
|
||||
err_free_client:
|
||||
evdev_detach_client(evdev, client);
|
||||
- kvfree(client);
|
||||
return error;
|
||||
}
|
||||
|
@@ -0,0 +1,31 @@
|
||||
From 67c446794b5fc16009bc1f31aee8846576796b11 Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Mon, 11 Jul 2022 19:10:30 -0500
|
||||
Subject: ZEN: cpufreq: Remove schedutil dependency on Intel/AMD P-State
|
||||
drivers
|
||||
|
||||
Although both P-State drivers depend on schedutil in Kconfig, both code
|
||||
bases do not use any schedutil code. This arbitrarily enables schedutil
|
||||
when unwanted in some configurations.
|
||||
---
|
||||
drivers/cpufreq/Kconfig.x86 | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/Kconfig.x86
|
||||
+++ b/drivers/cpufreq/Kconfig.x86
|
||||
@@ -9,7 +9,6 @@ config X86_INTEL_PSTATE
|
||||
select ACPI_PROCESSOR if ACPI
|
||||
select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO
|
||||
select CPU_FREQ_GOV_PERFORMANCE
|
||||
- select CPU_FREQ_GOV_SCHEDUTIL if SMP
|
||||
help
|
||||
This driver provides a P state for Intel core processors.
|
||||
The driver implements an internal governor and will become
|
||||
@@ -39,7 +38,6 @@ config X86_AMD_PSTATE
|
||||
depends on X86 && ACPI
|
||||
select ACPI_PROCESSOR
|
||||
select ACPI_CPPC_LIB if X86_64
|
||||
- select CPU_FREQ_GOV_SCHEDUTIL if SMP
|
||||
help
|
||||
This driver adds a CPUFreq driver which utilizes a fine grain
|
||||
processor performance frequency control range instead of legacy
|
53
debian/patches/patchset-zen/sauce/0008-ZEN-intel-pstate-Implement-enable-parameter.patch
vendored
Normal file
53
debian/patches/patchset-zen/sauce/0008-ZEN-intel-pstate-Implement-enable-parameter.patch
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
From 1d5cc90283f9de0c4cc996a2f3e6ba0306c1f14d Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Wed, 15 Jan 2020 20:43:56 -0600
|
||||
Subject: ZEN: intel-pstate: Implement "enable" parameter
|
||||
|
||||
If intel-pstate is compiled into the kernel, it will preempt the loading
|
||||
of acpi-cpufreq so you can take advantage of hardware p-states without
|
||||
any friction.
|
||||
|
||||
However, intel-pstate is not completely superior to cpufreq's ondemand
|
||||
for one reason. There's no concept of an up_threshold property.
|
||||
|
||||
In ondemand, up_threshold essentially reduces the maximum utilization to
|
||||
compare against, allowing you to hit max frequencies and turbo boost
|
||||
from a much lower core utilization.
|
||||
|
||||
With intel-pstate, you have the concept of minimum and maximum
|
||||
performance, but no tunable that lets you define, maximum frequency
|
||||
means 50% core utilization. For just this oversight, there's reasons
|
||||
you may want ondemand.
|
||||
|
||||
Lets support setting "enable" in kernel boot parameters. This lets
|
||||
kernel maintainers include "intel_pstate=disable" statically in the
|
||||
static boot parameters, but let users of the kernel override this
|
||||
selection.
|
||||
---
|
||||
Documentation/admin-guide/kernel-parameters.txt | 3 +++
|
||||
drivers/cpufreq/intel_pstate.c | 2 ++
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -2237,6 +2237,9 @@
|
||||
disable
|
||||
Do not enable intel_pstate as the default
|
||||
scaling driver for the supported processors
|
||||
+ enable
|
||||
+ Enable intel_pstate in-case "disable" was passed
|
||||
+ previously in the kernel boot parameters
|
||||
active
|
||||
Use intel_pstate driver to bypass the scaling
|
||||
governors layer of cpufreq and provides it own
|
||||
--- a/drivers/cpufreq/intel_pstate.c
|
||||
+++ b/drivers/cpufreq/intel_pstate.c
|
||||
@@ -3524,6 +3524,8 @@ static int __init intel_pstate_setup(cha
|
||||
|
||||
if (!strcmp(str, "disable"))
|
||||
no_load = 1;
|
||||
+ else if (!strcmp(str, "enable"))
|
||||
+ no_load = 0;
|
||||
else if (!strcmp(str, "active"))
|
||||
default_driver = &intel_pstate;
|
||||
else if (!strcmp(str, "passive"))
|
@@ -0,0 +1,91 @@
|
||||
From 493aee188c1c7c5cee2791820bfc779932bc10dc Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Fri, 15 Mar 2024 12:36:51 -0500
|
||||
Subject: ZEN: drm/amdgpu/pm: Allow override of min_power_limit with
|
||||
ignore_min_pcap
|
||||
|
||||
---
|
||||
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
|
||||
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 ++++++++++
|
||||
drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 +++
|
||||
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 ++++++++++++--
|
||||
4 files changed, 26 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
|
||||
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
|
||||
@@ -162,6 +162,7 @@ struct amdgpu_watchdog_timer {
|
||||
*/
|
||||
extern int amdgpu_modeset;
|
||||
extern unsigned int amdgpu_vram_limit;
|
||||
+extern int amdgpu_ignore_min_pcap;
|
||||
extern int amdgpu_vis_vram_limit;
|
||||
extern int amdgpu_gart_size;
|
||||
extern int amdgpu_gtt_size;
|
||||
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
|
||||
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
|
||||
@@ -135,6 +135,7 @@ enum AMDGPU_DEBUG_MASK {
|
||||
};
|
||||
|
||||
unsigned int amdgpu_vram_limit = UINT_MAX;
|
||||
+int amdgpu_ignore_min_pcap = 0; /* do not ignore by default */
|
||||
int amdgpu_vis_vram_limit;
|
||||
int amdgpu_gart_size = -1; /* auto */
|
||||
int amdgpu_gtt_size = -1; /* auto */
|
||||
@@ -249,6 +250,15 @@ struct amdgpu_watchdog_timer amdgpu_watc
|
||||
};
|
||||
|
||||
/**
|
||||
+ * DOC: ignore_min_pcap (int)
|
||||
+ * Ignore the minimum power cap.
|
||||
+ * Useful on graphics cards where the minimum power cap is very high.
|
||||
+ * The default is 0 (Do not ignore).
|
||||
+ */
|
||||
+MODULE_PARM_DESC(ignore_min_pcap, "Ignore the minimum power cap");
|
||||
+module_param_named(ignore_min_pcap, amdgpu_ignore_min_pcap, int, 0600);
|
||||
+
|
||||
+/**
|
||||
* DOC: vramlimit (int)
|
||||
* Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM).
|
||||
*/
|
||||
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
|
||||
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
|
||||
@@ -3272,6 +3272,9 @@ static ssize_t amdgpu_hwmon_show_power_c
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
+ if (amdgpu_ignore_min_pcap)
|
||||
+ return sysfs_emit(buf, "%i\n", 0);
|
||||
+
|
||||
return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN);
|
||||
}
|
||||
|
||||
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
|
||||
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
|
||||
@@ -2762,7 +2762,10 @@ int smu_get_power_limit(void *handle,
|
||||
*limit = smu->max_power_limit;
|
||||
break;
|
||||
case SMU_PPT_LIMIT_MIN:
|
||||
- *limit = smu->min_power_limit;
|
||||
+ if (amdgpu_ignore_min_pcap)
|
||||
+ *limit = 0;
|
||||
+ else
|
||||
+ *limit = smu->min_power_limit;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
@@ -2786,7 +2789,14 @@ static int smu_set_power_limit(void *han
|
||||
if (smu->ppt_funcs->set_power_limit)
|
||||
return smu->ppt_funcs->set_power_limit(smu, limit_type, limit);
|
||||
|
||||
- if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) {
|
||||
+ if (amdgpu_ignore_min_pcap) {
|
||||
+ if ((limit > smu->max_power_limit)) {
|
||||
+ dev_err(smu->adev->dev,
|
||||
+ "New power limit (%d) is over the max allowed %d\n",
|
||||
+ limit, smu->max_power_limit);
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ } else if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) {
|
||||
dev_err(smu->adev->dev,
|
||||
"New power limit (%d) is out of range [%d,%d]\n",
|
||||
limit, smu->min_power_limit, smu->max_power_limit);
|
29
debian/patches/patchset-zen/sauce/0010-ZEN-Set-default-max-map-count-to-INT_MAX-5.patch
vendored
Normal file
29
debian/patches/patchset-zen/sauce/0010-ZEN-Set-default-max-map-count-to-INT_MAX-5.patch
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
From 531d884259632814e998d1662690daa1e57dd98c Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Thu, 27 Apr 2023 14:43:57 -0500
|
||||
Subject: ZEN: Set default max map count to (INT_MAX - 5)
|
||||
|
||||
Per [Fedora][1], they intend to change the default max map count for
|
||||
their distribution to improve OOTB compatibility with games played
|
||||
through Steam/Proton. The value they picked comes from the Steam Deck,
|
||||
which defaults to INT_MAX - MAPCOUNT_ELF_CORE_MARGIN.
|
||||
|
||||
Since most ZEN and Liquorix users probably play games, follow Valve's
|
||||
lead and raise this value to their default.
|
||||
|
||||
[1]: https://fedoraproject.org/wiki/Changes/IncreaseVmMaxMapCount
|
||||
---
|
||||
include/linux/mm.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/mm.h
|
||||
+++ b/include/linux/mm.h
|
||||
@@ -197,7 +197,7 @@ static inline void __mm_zero_struct_page
|
||||
* that.
|
||||
*/
|
||||
#define MAPCOUNT_ELF_CORE_MARGIN (5)
|
||||
-#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
|
||||
+#define DEFAULT_MAX_MAP_COUNT (INT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
|
||||
|
||||
extern int sysctl_max_map_count;
|
||||
|
24
debian/patches/patchset-zen/sauce/0011-ZEN-INTERACTIVE-Base-config-item.patch
vendored
Normal file
24
debian/patches/patchset-zen/sauce/0011-ZEN-INTERACTIVE-Base-config-item.patch
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
From 032775267df11a87616d2ec7f09c0b1b12da5da7 Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||
Date: Mon, 27 Jan 2020 18:10:06 +0100
|
||||
Subject: ZEN: INTERACTIVE: Base config item
|
||||
|
||||
---
|
||||
init/Kconfig | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -134,6 +134,12 @@ config THREAD_INFO_IN_TASK
|
||||
|
||||
menu "General setup"
|
||||
|
||||
+config ZEN_INTERACTIVE
|
||||
+ bool "Tune kernel for interactivity"
|
||||
+ default y
|
||||
+ help
|
||||
+ Tunes the kernel for responsiveness at the cost of throughput and power usage.
|
||||
+
|
||||
config BROKEN
|
||||
bool
|
||||
|
@@ -0,0 +1,37 @@
|
||||
From c614dbbfd3480cf18c90fd51bb52abd53339b790 Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||
Date: Mon, 27 Jan 2020 18:11:05 +0100
|
||||
Subject: ZEN: INTERACTIVE: Use BFQ as the elevator for SQ devices
|
||||
|
||||
---
|
||||
block/elevator.c | 4 ++++
|
||||
init/Kconfig | 4 ++++
|
||||
2 files changed, 8 insertions(+)
|
||||
|
||||
--- a/block/elevator.c
|
||||
+++ b/block/elevator.c
|
||||
@@ -569,7 +569,11 @@ static struct elevator_type *elevator_ge
|
||||
!blk_mq_is_shared_tags(q->tag_set->flags))
|
||||
return NULL;
|
||||
|
||||
+#if defined(CONFIG_ZEN_INTERACTIVE) && defined(CONFIG_IOSCHED_BFQ)
|
||||
+ return elevator_find_get(q, "bfq");
|
||||
+#else
|
||||
return elevator_find_get(q, "mq-deadline");
|
||||
+#endif
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -140,6 +140,10 @@ config ZEN_INTERACTIVE
|
||||
help
|
||||
Tunes the kernel for responsiveness at the cost of throughput and power usage.
|
||||
|
||||
+ --- Block Layer ----------------------------------------
|
||||
+
|
||||
+ Default scheduler for SQ..: mq-deadline -> bfq
|
||||
+
|
||||
config BROKEN
|
||||
bool
|
||||
|
@@ -0,0 +1,36 @@
|
||||
From b87281991e8f34e557cf2fb1614b3f4808100233 Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
|
||||
Date: Mon, 12 Dec 2022 00:03:03 +0100
|
||||
Subject: ZEN: INTERACTIVE: Use Kyber as the elevator for MQ devices
|
||||
|
||||
---
|
||||
block/elevator.c | 6 ++++++
|
||||
init/Kconfig | 1 +
|
||||
2 files changed, 7 insertions(+)
|
||||
|
||||
--- a/block/elevator.c
|
||||
+++ b/block/elevator.c
|
||||
@@ -567,7 +567,13 @@ static struct elevator_type *elevator_ge
|
||||
|
||||
if (q->nr_hw_queues != 1 &&
|
||||
!blk_mq_is_shared_tags(q->tag_set->flags))
|
||||
+#if defined(CONFIG_ZEN_INTERACTIVE) && defined(CONFIG_MQ_IOSCHED_KYBER)
|
||||
+ return elevator_find_get(q, "kyber");
|
||||
+#elif defined(CONFIG_ZEN_INTERACTIVE)
|
||||
+ return elevator_find_get(q, "mq-deadline");
|
||||
+#else
|
||||
return NULL;
|
||||
+#endif
|
||||
|
||||
#if defined(CONFIG_ZEN_INTERACTIVE) && defined(CONFIG_IOSCHED_BFQ)
|
||||
return elevator_find_get(q, "bfq");
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -143,6 +143,7 @@ config ZEN_INTERACTIVE
|
||||
--- Block Layer ----------------------------------------
|
||||
|
||||
Default scheduler for SQ..: mq-deadline -> bfq
|
||||
+ Default scheduler for MQ..: none -> kyber
|
||||
|
||||
config BROKEN
|
||||
bool
|
@@ -0,0 +1,59 @@
|
||||
From e2db8ce3c52c7bd37e93728d6c12a483f17634bc Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||
Date: Mon, 27 Jan 2020 18:21:09 +0100
|
||||
Subject: ZEN: INTERACTIVE: Enable background reclaim of hugepages
|
||||
|
||||
Use [defer+madvise] as default khugepaged defrag strategy:
|
||||
|
||||
For some reason, the default strategy to respond to THP fault fallbacks
|
||||
is still just madvise, meaning stall if the program wants transparent
|
||||
hugepages, but don't trigger a background reclaim / compaction if THP
|
||||
begins to fail allocations. This creates a snowball affect where we
|
||||
still use the THP code paths, but we almost always fail once a system
|
||||
has been active and busy for a while.
|
||||
|
||||
The option "defer" was created for interactive systems where THP can
|
||||
still improve performance. If we have to fallback to a regular page due
|
||||
to an allocation failure or anything else, we will trigger a background
|
||||
reclaim and compaction so future THP attempts succeed and previous
|
||||
attempts eventually have their smaller pages combined without stalling
|
||||
running applications.
|
||||
|
||||
We still want madvise to stall applications that explicitely want THP,
|
||||
so defer+madvise _does_ make a ton of sense. Make it the default for
|
||||
interactive systems, especially if the kernel maintainer left
|
||||
transparent hugepages on "always".
|
||||
|
||||
Reasoning and details in the original patch: https://lwn.net/Articles/711248/
|
||||
---
|
||||
init/Kconfig | 4 ++++
|
||||
mm/huge_memory.c | 4 ++++
|
||||
2 files changed, 8 insertions(+)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -145,6 +145,10 @@ config ZEN_INTERACTIVE
|
||||
Default scheduler for SQ..: mq-deadline -> bfq
|
||||
Default scheduler for MQ..: none -> kyber
|
||||
|
||||
+ --- Virtual Memory Subsystem ---------------------------
|
||||
+
|
||||
+ Background-reclaim hugepages...: no -> yes
|
||||
+
|
||||
config BROKEN
|
||||
bool
|
||||
|
||||
--- a/mm/huge_memory.c
|
||||
+++ b/mm/huge_memory.c
|
||||
@@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
|
||||
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
|
||||
#endif
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
|
||||
+#else
|
||||
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
|
||||
+#endif
|
||||
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
|
||||
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
|
||||
|
@@ -0,0 +1,41 @@
|
||||
From f5b82cc382eaf3ddf5c26f60965037bde8733445 Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Wed, 11 Aug 2021 18:47:46 -0500
|
||||
Subject: ZEN: INTERACTIVE: Tune mgLRU to protect cache used in the last second
|
||||
|
||||
Although not identical to the le9 patches that protect a byte-amount of
|
||||
cache through tunables, multigenerational LRU now supports protecting
|
||||
cache accessed in the last X milliseconds.
|
||||
|
||||
In #218, Yu recommends starting with 1000ms and tuning as needed. This
|
||||
looks like a safe default and turning on this feature should help users
|
||||
that don't know they need it.
|
||||
---
|
||||
init/Kconfig | 1 +
|
||||
mm/vmscan.c | 4 ++++
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -148,6 +148,7 @@ config ZEN_INTERACTIVE
|
||||
--- Virtual Memory Subsystem ---------------------------
|
||||
|
||||
Background-reclaim hugepages...: no -> yes
|
||||
+ MG-LRU minimum cache TTL.......: 0 -> 1000 ms
|
||||
|
||||
config BROKEN
|
||||
bool
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -3968,7 +3968,11 @@ static bool lruvec_is_reclaimable(struct
|
||||
}
|
||||
|
||||
/* to protect the working set of the last N jiffies */
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+static unsigned long lru_gen_min_ttl __read_mostly = HZ;
|
||||
+#else
|
||||
static unsigned long lru_gen_min_ttl __read_mostly;
|
||||
+#endif
|
||||
|
||||
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
{
|
104
debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch
vendored
Normal file
104
debian/patches/patchset-zen/sauce/0016-ZEN-INTERACTIVE-Tune-EEVDF-for-interactivity.patch
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
From 44a6d7ca11b601b34724dc41e086576499a096bd Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
|
||||
Date: Tue, 31 Oct 2023 19:03:10 +0100
|
||||
Subject: ZEN: INTERACTIVE: Tune EEVDF for interactivity
|
||||
|
||||
5.7:
|
||||
Take "sysctl_sched_nr_migrate" tune from early XanMod builds of 128. As
|
||||
of 5.7, XanMod uses 256 but that may affect applications that require
|
||||
timely response to IRQs.
|
||||
|
||||
5.15:
|
||||
Per [a comment][1] on our ZEN INTERACTIVE commit, reducing the cost of
|
||||
migration causes the system less responsive under high load. Most
|
||||
likely the combination of reduced migration cost + the higher number of
|
||||
tasks that can be migrated at once contributes to this.
|
||||
|
||||
To better handle this situation, restore the mainline migration cost
|
||||
value and also reduce the max number of tasks that can be migrated in
|
||||
batch from 128 to 64.
|
||||
|
||||
If this doesn't help, we'll restore the reduced migration cost and keep
|
||||
total number of tasks that can be migrated at once to 32.
|
||||
|
||||
[1]: https://github.com/zen-kernel/zen-kernel/commit/be5ba234ca0a5aabe74bfc7e1f636f085bd3823c#commitcomment-63159674
|
||||
|
||||
6.6:
|
||||
Port the tuning to EEVDF, which removed a couple of settings.
|
||||
|
||||
6.7:
|
||||
Instead of increasing the number of tasks that migrate at once, migrate
|
||||
the amount acceptable for PREEMPT_RT, but reduce the cost so migrations
|
||||
occur more often.
|
||||
|
||||
This should make CFS/EEVDF behave more like out-of-tree schedulers that
|
||||
aggressively use idle cores to reduce latency, but without the jank
|
||||
caused by rebalancing too many tasks at once.
|
||||
---
|
||||
init/Kconfig | 7 +++++++
|
||||
kernel/sched/fair.c | 13 +++++++++++++
|
||||
kernel/sched/sched.h | 2 +-
|
||||
3 files changed, 21 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -150,6 +150,13 @@ config ZEN_INTERACTIVE
|
||||
Background-reclaim hugepages...: no -> yes
|
||||
MG-LRU minimum cache TTL.......: 0 -> 1000 ms
|
||||
|
||||
+ --- EEVDF CPU Scheduler --------------------------------
|
||||
+
|
||||
+ Minimal granularity............: 0.75 -> 0.4 ms
|
||||
+ Migration cost.................: 0.5 -> 0.25 ms
|
||||
+ Bandwidth slice size...........: 5 -> 3 ms
|
||||
+ Task rebalancing threshold.....: 32 -> 8
|
||||
+
|
||||
config BROKEN
|
||||
bool
|
||||
|
||||
--- a/kernel/sched/fair.c
|
||||
+++ b/kernel/sched/fair.c
|
||||
@@ -73,10 +73,19 @@ unsigned int sysctl_sched_tunable_scalin
|
||||
*
|
||||
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
|
||||
*/
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+unsigned int sysctl_sched_base_slice = 400000ULL;
|
||||
+static unsigned int normalized_sysctl_sched_base_slice = 400000ULL;
|
||||
+#else
|
||||
unsigned int sysctl_sched_base_slice = 750000ULL;
|
||||
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
|
||||
+#endif
|
||||
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+const_debug unsigned int sysctl_sched_migration_cost = 250000UL;
|
||||
+#else
|
||||
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
|
||||
+#endif
|
||||
|
||||
static int __init setup_sched_thermal_decay_shift(char *str)
|
||||
{
|
||||
@@ -121,8 +130,12 @@ int __weak arch_asym_cpu_priority(int cp
|
||||
*
|
||||
* (default: 5 msec, units: microseconds)
|
||||
*/
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL;
|
||||
+#else
|
||||
static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
|
||||
#endif
|
||||
+#endif
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
/* Restrict the NUMA promotion throughput (MB/s) for each target node. */
|
||||
--- a/kernel/sched/sched.h
|
||||
+++ b/kernel/sched/sched.h
|
||||
@@ -2591,7 +2591,7 @@ extern void deactivate_task(struct rq *r
|
||||
|
||||
extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
|
||||
|
||||
-#ifdef CONFIG_PREEMPT_RT
|
||||
+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_ZEN_INTERACTIVE)
|
||||
# define SCHED_NR_MIGRATE_BREAK 8
|
||||
#else
|
||||
# define SCHED_NR_MIGRATE_BREAK 32
|
@@ -0,0 +1,90 @@
|
||||
From e9c6f500f4429c32f583d6da11352b2f0bcce4c8 Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <jan.steffens@gmail.com>
|
||||
Date: Mon, 27 Jan 2020 18:27:16 +0100
|
||||
Subject: ZEN: INTERACTIVE: Tune ondemand governor for interactivity
|
||||
|
||||
4.10:
|
||||
During some personal testing with the Dolphin emulator, MuQSS has
|
||||
serious problems scaling its frequencies causing poor performance where
|
||||
boosting the CPU frequencies would have fixed them. Reducing the
|
||||
up_threshold to 45 with MuQSS appears to fix the issue, letting the
|
||||
introduction to "Star Wars: Rogue Leader" run at 100% speed versus about
|
||||
80% on my test system.
|
||||
|
||||
Also, lets refactor the definitions and include some indentation to help
|
||||
the reader discern what the scope of all the macros are.
|
||||
|
||||
5.4:
|
||||
On the last custom kernel benchmark from Phoronix with Xanmod, Michael
|
||||
configured all the kernels to run using ondemand instead of the kernel's
|
||||
[default selection][1]. This reminded me that another option outside of
|
||||
the kernels control is the user's choice to change the cpufreq governor,
|
||||
for better or for worse.
|
||||
|
||||
In Liquorix, performance is the default governor whether you're running
|
||||
acpi-cpufreq or intel-pstate. I expect laptop users to install TLP or
|
||||
LMT to control the power balance on their system, especially when
|
||||
they're plugged in or on battery. However, it's pretty clear to me a
|
||||
lot of people would choose ondemand over performance since it's not
|
||||
obvious it has huge performance ramifications with MuQSS, and ondemand
|
||||
otherwise is "good enough" for most people.
|
||||
|
||||
Lets codify lower up thresholds for MuQSS to more closely synergize with
|
||||
its aggressive thread migration behavior. This way when ondemand is
|
||||
configured, you get sort of a "performance-lite" type of result but with
|
||||
the power savings you expect when leaving the running system idle.
|
||||
|
||||
[1]: https://www.phoronix.com/scan.php?page=article&item=xanmod-2020-kernel
|
||||
|
||||
5.14:
|
||||
Although CFS and similar schedulers (BMQ, PDS, and CacULE), reuse a lot
|
||||
more of mainline scheduling and do a good job of pinning single threaded
|
||||
tasks to their respective core, there's still applications that
|
||||
confusingly run steady near 50% and benefit from going full speed or
|
||||
turbo when they need to run (emulators for more recent consoles come to
|
||||
mind).
|
||||
|
||||
Drop the up threshold for all non-MuQSS schedulers from 80/95 to 55/60.
|
||||
|
||||
5.15:
|
||||
Remove MuQSS cpufreq configuration.
|
||||
---
|
||||
drivers/cpufreq/cpufreq_ondemand.c | 8 +++++++-
|
||||
init/Kconfig | 6 ++++++
|
||||
2 files changed, 13 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/cpufreq_ondemand.c
|
||||
+++ b/drivers/cpufreq/cpufreq_ondemand.c
|
||||
@@ -18,10 +18,16 @@
|
||||
#include "cpufreq_ondemand.h"
|
||||
|
||||
/* On-demand governor macros */
|
||||
+#if defined(CONFIG_ZEN_INTERACTIVE)
|
||||
+#define DEF_FREQUENCY_UP_THRESHOLD (55)
|
||||
+#define MICRO_FREQUENCY_UP_THRESHOLD (60)
|
||||
+#define DEF_SAMPLING_DOWN_FACTOR (5)
|
||||
+#else
|
||||
#define DEF_FREQUENCY_UP_THRESHOLD (63)
|
||||
+#define MICRO_FREQUENCY_UP_THRESHOLD (70)
|
||||
#define DEF_SAMPLING_DOWN_FACTOR (100)
|
||||
+#endif
|
||||
#define MAX_SAMPLING_DOWN_FACTOR (100000)
|
||||
-#define MICRO_FREQUENCY_UP_THRESHOLD (70)
|
||||
#define MIN_FREQUENCY_UP_THRESHOLD (1)
|
||||
#define MAX_FREQUENCY_UP_THRESHOLD (100)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -157,6 +157,12 @@ config ZEN_INTERACTIVE
|
||||
Bandwidth slice size...........: 5 -> 3 ms
|
||||
Task rebalancing threshold.....: 32 -> 8
|
||||
|
||||
+ --- CPUFreq Settings -----------------------------------
|
||||
+
|
||||
+ Ondemand sampling down factor..: 100 -> 5
|
||||
+ Ondemand default up threshold..: 63 -> 55
|
||||
+ Ondemand micro up threshold....: 70 -> 60
|
||||
+
|
||||
config BROKEN
|
||||
bool
|
||||
|
33
debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch
vendored
Normal file
33
debian/patches/patchset-zen/sauce/0018-ZEN-INTERACTIVE-mm-Disable-unevictable-compaction.patch
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
From 4706a3fb5823c97dc6acc1e86958b71e2c048ec5 Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Sat, 5 Mar 2022 11:37:14 -0600
|
||||
Subject: ZEN: INTERACTIVE: mm: Disable unevictable compaction
|
||||
|
||||
This option is already disabled when CONFIG_PREEMPT_RT is enabled, lets
|
||||
turn it off when CONFIG_ZEN_INTERACTIVE is set as well.
|
||||
---
|
||||
init/Kconfig | 1 +
|
||||
mm/Kconfig | 2 +-
|
||||
2 files changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -149,6 +149,7 @@ config ZEN_INTERACTIVE
|
||||
|
||||
Background-reclaim hugepages...: no -> yes
|
||||
MG-LRU minimum cache TTL.......: 0 -> 1000 ms
|
||||
+ Compact unevictable............: yes -> no
|
||||
|
||||
--- EEVDF CPU Scheduler --------------------------------
|
||||
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -649,7 +649,7 @@ config COMPACTION
|
||||
config COMPACT_UNEVICTABLE_DEFAULT
|
||||
int
|
||||
depends on COMPACTION
|
||||
- default 0 if PREEMPT_RT
|
||||
+ default 0 if PREEMPT_RT || ZEN_INTERACTIVE
|
||||
default 1
|
||||
|
||||
#
|
@@ -0,0 +1,38 @@
|
||||
From 8146f220f871c4db77c8363c831784041a5bcf7b Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Sat, 24 Oct 2020 22:17:49 -0700
|
||||
Subject: ZEN: INTERACTIVE: mm: Disable proactive compaction by default
|
||||
|
||||
On-demand compaction works fine assuming that you don't have a need to
|
||||
spam the page allocator nonstop for large order page allocations.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
---
|
||||
init/Kconfig | 1 +
|
||||
mm/compaction.c | 4 ++++
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -150,6 +150,7 @@ config ZEN_INTERACTIVE
|
||||
Background-reclaim hugepages...: no -> yes
|
||||
MG-LRU minimum cache TTL.......: 0 -> 1000 ms
|
||||
Compact unevictable............: yes -> no
|
||||
+ Compaction proactiveness.......: 20 -> 0
|
||||
|
||||
--- EEVDF CPU Scheduler --------------------------------
|
||||
|
||||
--- a/mm/compaction.c
|
||||
+++ b/mm/compaction.c
|
||||
@@ -1950,7 +1950,11 @@ static int sysctl_compact_unevictable_al
|
||||
* aggressively the kernel should compact memory in the
|
||||
* background. It takes values in the range [0, 100].
|
||||
*/
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+static unsigned int __read_mostly sysctl_compaction_proactiveness;
|
||||
+#else
|
||||
static unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
|
||||
+#endif
|
||||
static int sysctl_extfrag_threshold = 500;
|
||||
static int __read_mostly sysctl_compact_memory;
|
||||
|
@@ -0,0 +1,57 @@
|
||||
From 5f16843397798d2c709e3b8af4b1a73539d13aa8 Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Sat, 28 Mar 2020 13:06:28 -0700
|
||||
Subject: ZEN: INTERACTIVE: mm: Disable watermark boosting by default
|
||||
|
||||
What watermark boosting does is preemptively fire up kswapd to free
|
||||
memory when there hasn't been an allocation failure. It does this by
|
||||
increasing kswapd's high watermark goal and then firing up kswapd. The
|
||||
reason why this causes freezes is because, with the increased high
|
||||
watermark goal, kswapd will steal memory from processes that need it in
|
||||
order to make forward progress. These processes will, in turn, try to
|
||||
allocate memory again, which will cause kswapd to steal necessary pages
|
||||
from those processes again, in a positive feedback loop known as page
|
||||
thrashing. When page thrashing occurs, your system is essentially
|
||||
livelocked until the necessary forward progress can be made to stop
|
||||
processes from trying to continuously allocate memory and trigger
|
||||
kswapd to steal it back.
|
||||
|
||||
This problem already occurs with kswapd *without* watermark boosting,
|
||||
but it's usually only encountered on machines with a small amount of
|
||||
memory and/or a slow CPU. Watermark boosting just makes the existing
|
||||
problem worse enough to notice on higher spec'd machines.
|
||||
|
||||
Disable watermark boosting by default since it's a total dumpster fire.
|
||||
I can't imagine why anyone would want to explicitly enable it, but the
|
||||
option is there in case someone does.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
---
|
||||
init/Kconfig | 1 +
|
||||
mm/page_alloc.c | 4 ++++
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -151,6 +151,7 @@ config ZEN_INTERACTIVE
|
||||
MG-LRU minimum cache TTL.......: 0 -> 1000 ms
|
||||
Compact unevictable............: yes -> no
|
||||
Compaction proactiveness.......: 20 -> 0
|
||||
+ Watermark boost factor.........: 1.5 -> 0
|
||||
|
||||
--- EEVDF CPU Scheduler --------------------------------
|
||||
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -271,7 +271,11 @@ const char * const migratetype_names[MIG
|
||||
|
||||
int min_free_kbytes = 1024;
|
||||
int user_min_free_kbytes = -1;
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+static int watermark_boost_factor __read_mostly;
|
||||
+#else
|
||||
static int watermark_boost_factor __read_mostly = 15000;
|
||||
+#endif
|
||||
static int watermark_scale_factor = 10;
|
||||
|
||||
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
|
@@ -0,0 +1,57 @@
|
||||
From eb51c53e5ded1743830368815c550b871f950738 Mon Sep 17 00:00:00 2001
|
||||
From: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
Date: Wed, 20 Oct 2021 20:50:11 -0700
|
||||
Subject: ZEN: INTERACTIVE: mm: Lower the non-hugetlbpage pageblock size to
|
||||
reduce scheduling delays
|
||||
|
||||
The page allocator processes free pages in groups of pageblocks, where
|
||||
the size of a pageblock is typically quite large (1024 pages without
|
||||
hugetlbpage support). Pageblocks are processed atomically with the zone
|
||||
lock held, which can cause severe scheduling delays on both the CPU
|
||||
going through the pageblock and any other CPUs waiting to acquire the
|
||||
zone lock. A frequent offender is move_freepages_block(), which is used
|
||||
by rmqueue() for page allocation.
|
||||
|
||||
As it turns out, there's no requirement for pageblocks to be so large,
|
||||
so the pageblock order can simply be reduced to ease the scheduling
|
||||
delays and zone lock contention. PAGE_ALLOC_COSTLY_ORDER is used as a
|
||||
reasonable setting to ensure non-costly page allocation requests can
|
||||
still be serviced without always needing to free up more than one
|
||||
pageblock's worth of pages at a time.
|
||||
|
||||
This has a noticeable effect on overall system latency when memory
|
||||
pressure is elevated. The various mm functions which operate on
|
||||
pageblocks no longer appear in the preemptoff tracer, where previously
|
||||
they would spend up to 100 ms on a mobile arm64 CPU processing a
|
||||
pageblock with preemption disabled and the zone lock held.
|
||||
|
||||
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
|
||||
---
|
||||
include/linux/pageblock-flags.h | 4 ++++
|
||||
init/Kconfig | 1 +
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
--- a/include/linux/pageblock-flags.h
|
||||
+++ b/include/linux/pageblock-flags.h
|
||||
@@ -52,7 +52,11 @@ extern unsigned int pageblock_order;
|
||||
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+#define pageblock_order PAGE_ALLOC_COSTLY_ORDER
|
||||
+#else
|
||||
#define pageblock_order MAX_PAGE_ORDER
|
||||
+#endif
|
||||
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -152,6 +152,7 @@ config ZEN_INTERACTIVE
|
||||
Compact unevictable............: yes -> no
|
||||
Compaction proactiveness.......: 20 -> 0
|
||||
Watermark boost factor.........: 1.5 -> 0
|
||||
+ Pageblock order................: 10 -> 3
|
||||
|
||||
--- EEVDF CPU Scheduler --------------------------------
|
||||
|
@@ -0,0 +1,44 @@
|
||||
From a8a0d4b9f356610babe5b884500799310fe6dcdd Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Sat, 21 May 2022 15:15:09 -0500
|
||||
Subject: ZEN: INTERACTIVE: dm-crypt: Disable workqueues for crypto ops
|
||||
|
||||
Queueing in dm-crypt for crypto operations reduces performance on modern
|
||||
systems. As discussed in an article from Cloudflare, they discovered
|
||||
that queuing was introduced because the crypto subsystem used to be
|
||||
synchronous. Since it's now asynchronous, we get double queueing when
|
||||
using the subsystem through dm-crypt. This is obviously undesirable and
|
||||
reduces throughput and increases latency.
|
||||
|
||||
Disable queueing when using our Zen Interactive configuration.
|
||||
|
||||
Fixes: https://github.com/zen-kernel/zen-kernel/issues/282
|
||||
---
|
||||
drivers/md/dm-crypt.c | 5 +++++
|
||||
init/Kconfig | 1 +
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
--- a/drivers/md/dm-crypt.c
|
||||
+++ b/drivers/md/dm-crypt.c
|
||||
@@ -3310,6 +3310,11 @@ static int crypt_ctr(struct dm_target *t
|
||||
goto bad;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+ set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
|
||||
+ set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
|
||||
+#endif
|
||||
+
|
||||
ret = crypt_ctr_cipher(ti, argv[0], argv[1]);
|
||||
if (ret < 0)
|
||||
goto bad;
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -144,6 +144,7 @@ config ZEN_INTERACTIVE
|
||||
|
||||
Default scheduler for SQ..: mq-deadline -> bfq
|
||||
Default scheduler for MQ..: none -> kyber
|
||||
+ DM-Crypt workqueues.......: yes -> no
|
||||
|
||||
--- Virtual Memory Subsystem ---------------------------
|
||||
|
49
debian/patches/patchset-zen/sauce/0023-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch
vendored
Normal file
49
debian/patches/patchset-zen/sauce/0023-ZEN-INTERACTIVE-mm-swap-Disable-swap-in-readahead.patch
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
From 5a8fabcd4e7396500f2c0070f8b7ce9106eb9bfa Mon Sep 17 00:00:00 2001
|
||||
From: Steven Barrett <steven@liquorix.net>
|
||||
Date: Mon, 5 Sep 2022 11:35:20 -0500
|
||||
Subject: ZEN: INTERACTIVE: mm/swap: Disable swap-in readahead
|
||||
|
||||
Per an [issue][1] on the chromium project, swap-in readahead causes more
|
||||
jank than not. This might be caused by poor optimization on the
|
||||
swapping code, or the fact under memory pressure, we're pulling in pages
|
||||
we don't need, causing more swapping.
|
||||
|
||||
Either way, this is mainline/upstream to Chromium, and ChromeOS
|
||||
developers care a lot about system responsiveness. Lets implement the
|
||||
same change so Zen Kernel users benefit.
|
||||
|
||||
[1]: https://bugs.chromium.org/p/chromium/issues/detail?id=263561
|
||||
---
|
||||
init/Kconfig | 1 +
|
||||
mm/swap.c | 5 +++++
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
--- a/init/Kconfig
|
||||
+++ b/init/Kconfig
|
||||
@@ -154,6 +154,7 @@ config ZEN_INTERACTIVE
|
||||
Compaction proactiveness.......: 20 -> 0
|
||||
Watermark boost factor.........: 1.5 -> 0
|
||||
Pageblock order................: 10 -> 3
|
||||
+ Swap-in readahead..............: 3 -> 0
|
||||
|
||||
--- EEVDF CPU Scheduler --------------------------------
|
||||
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -1126,6 +1126,10 @@ void folio_batch_remove_exceptionals(str
|
||||
*/
|
||||
void __init swap_setup(void)
|
||||
{
|
||||
+#ifdef CONFIG_ZEN_INTERACTIVE
|
||||
+ /* Only swap-in pages requested, avoid readahead */
|
||||
+ page_cluster = 0;
|
||||
+#else
|
||||
unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);
|
||||
|
||||
/* Use a smaller cluster for small-memory machines */
|
||||
@@ -1137,4 +1141,5 @@ void __init swap_setup(void)
|
||||
* Right now other parts of the system means that we
|
||||
* _really_ don't want to cluster much more
|
||||
*/
|
||||
+#endif
|
||||
}
|
19
debian/patches/patchset-zen/sauce/0024-ZEN-Update-VHBA-driver.patch
vendored
Normal file
19
debian/patches/patchset-zen/sauce/0024-ZEN-Update-VHBA-driver.patch
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
From 238d23d6cb3f8610aa1cd3bdaeb398c63a4c9cb2 Mon Sep 17 00:00:00 2001
|
||||
From: "Jan Alexander Steffens (heftig)" <heftig@archlinux.org>
|
||||
Date: Tue, 1 Oct 2024 02:22:46 +0200
|
||||
Subject: ZEN: Update VHBA driver
|
||||
|
||||
remote https://github.com/cdemu/cdemu
|
||||
tag vhba-module-20240917
|
||||
---
|
||||
drivers/scsi/vhba/Makefile | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/scsi/vhba/Makefile
|
||||
+++ b/drivers/scsi/vhba/Makefile
|
||||
@@ -1,4 +1,4 @@
|
||||
-VHBA_VERSION := 20240202
|
||||
+VHBA_VERSION := 20240917
|
||||
|
||||
obj-$(CONFIG_VHBA) += vhba.o
|
||||
ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror
|
Reference in New Issue
Block a user