1
0
linux/debian/patches/patchset-zen/sauce/0025-ZEN-mm-Stop-kswapd-early-when-nothing-s-waiting-for-.patch

168 lines
5.6 KiB
Diff
Raw Permalink Normal View History

2024-11-15 10:44:41 +03:00
From c47df2793088980a32d6706da886fe32f7f045e6 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultan@kerneltoast.com>
Date: Sun, 19 Apr 2020 19:59:18 -0700
Subject: ZEN: mm: Stop kswapd early when nothing's waiting for it to free
pages
Contains:
- mm: Stop kswapd early when nothing's waiting for it to free pages
Keeping kswapd running when all the failed allocations that invoked it
are satisfied incurs a high overhead due to unnecessary page eviction
and writeback, as well as spurious VM pressure events to various
registered shrinkers. When kswapd doesn't need to work to make an
allocation succeed anymore, stop it prematurely to save resources.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
- mm: Don't stop kswapd on a per-node basis when there are no waiters
The page allocator wakes all kswapds in an allocation context's allowed
nodemask in the slow path, so it doesn't make sense to have the kswapd-
waiter count per each NUMA node. Instead, it should be a global counter
to stop all kswapds when there are no failed allocation requests.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
- mm: Increment kswapd_waiters for throttled direct reclaimers
Throttled direct reclaimers will wake up kswapd and wait for kswapd to
satisfy their page allocation request, even when the failed allocation
lacks the __GFP_KSWAPD_RECLAIM flag in its gfp mask. As a result, kswapd
may think that there are no waiters and thus exit prematurely, causing
throttled direct reclaimers lacking __GFP_KSWAPD_RECLAIM to stall on
waiting for kswapd to wake them up. Incrementing the kswapd_waiters
counter when such direct reclaimers become throttled fixes the problem.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
mm/internal.h | 1 +
mm/page_alloc.c | 17 ++++++++++++++---
mm/vmscan.c | 19 +++++++++++++------
3 files changed, 28 insertions(+), 9 deletions(-)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -686,6 +686,7 @@ extern void post_alloc_hook(struct page
extern bool free_pages_prepare(struct page *page, unsigned int order);
extern int user_min_free_kbytes;
+extern atomic_long_t kswapd_waiters;
void free_unref_page(struct page *page, unsigned int order);
void free_unref_folios(struct folio_batch *fbatch);
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -88,6 +88,8 @@ typedef int __bitwise fpi_t;
*/
#define FPI_TO_TAIL ((__force fpi_t)BIT(1))
+atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0);
+
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
static DEFINE_MUTEX(pcp_batch_high_lock);
#define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
@@ -4189,6 +4191,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
unsigned int cpuset_mems_cookie;
unsigned int zonelist_iter_cookie;
int reserve_flags;
+ bool woke_kswapd = false;
restart:
compaction_retries = 0;
@@ -4228,8 +4231,13 @@ restart:
goto nopage;
}
- if (alloc_flags & ALLOC_KSWAPD)
+ if (alloc_flags & ALLOC_KSWAPD) {
+ if (!woke_kswapd) {
+ atomic_long_inc(&kswapd_waiters);
+ woke_kswapd = true;
+ }
wake_all_kswapds(order, gfp_mask, ac);
+ }
/*
* The adjusted alloc_flags might result in immediate success, so try
@@ -4445,9 +4453,12 @@ nopage:
goto retry;
}
fail:
- warn_alloc(gfp_mask, ac->nodemask,
- "page allocation failure: order:%u", order);
got_pg:
+ if (woke_kswapd)
+ atomic_long_dec(&kswapd_waiters);
+ if (!page)
+ warn_alloc(gfp_mask, ac->nodemask,
+ "page allocation failure: order:%u", order);
return page;
}
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6332,7 +6332,7 @@ retry:
return 0;
}
-static bool allow_direct_reclaim(pg_data_t *pgdat)
+static bool allow_direct_reclaim(pg_data_t *pgdat, bool using_kswapd)
{
struct zone *zone;
unsigned long pfmemalloc_reserve = 0;
@@ -6361,6 +6361,10 @@ static bool allow_direct_reclaim(pg_data
wmark_ok = free_pages > pfmemalloc_reserve / 2;
+ /* The throttled direct reclaimer is now a kswapd waiter */
+ if (unlikely(!using_kswapd && !wmark_ok))
+ atomic_long_inc(&kswapd_waiters);
+
/* kswapd must be awake if processes are being throttled */
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL)
@@ -6426,7 +6430,7 @@ static bool throttle_direct_reclaim(gfp_
/* Throttle based on the first usable node */
pgdat = zone->zone_pgdat;
- if (allow_direct_reclaim(pgdat))
+ if (allow_direct_reclaim(pgdat, gfp_mask & __GFP_KSWAPD_RECLAIM))
goto out;
break;
}
@@ -6448,11 +6452,14 @@ static bool throttle_direct_reclaim(gfp_
*/
if (!(gfp_mask & __GFP_FS))
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
- allow_direct_reclaim(pgdat), HZ);
+ allow_direct_reclaim(pgdat, true), HZ);
else
/* Throttle until kswapd wakes the process */
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
- allow_direct_reclaim(pgdat));
+ allow_direct_reclaim(pgdat, true));
+
+ if (unlikely(!(gfp_mask & __GFP_KSWAPD_RECLAIM)))
+ atomic_long_dec(&kswapd_waiters);
if (fatal_signal_pending(current))
return true;
@@ -6955,14 +6962,14 @@ restart:
* able to safely make forward progress. Wake them
*/
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
- allow_direct_reclaim(pgdat))
+ allow_direct_reclaim(pgdat, true))
wake_up_all(&pgdat->pfmemalloc_wait);
/* Check if kswapd should be suspending */
__fs_reclaim_release(_THIS_IP_);
ret = kthread_freezable_should_stop(&was_frozen);
__fs_reclaim_acquire(_THIS_IP_);
- if (was_frozen || ret)
+ if (was_frozen || ret || !atomic_long_read(&kswapd_waiters))
break;
/*