bcachefs-tools/linux/shrinker.c
Chris Webb 872cd437e7 bcachefs-tools: Avoid glibc-specific mallinfo() in shrinker
Before 326d7c1, the shrinker used freeram and totalram from a struct
sysinfo (constructed from /proc/meminfo) to target 25% free physical
memory. As well as the slowness of repeatedly reading /proc/meminfo,
this was a problem as freeram rises when the system starts to swap.
We don't want swapping to reduce our estimate of memory pressure.

To work around this, in 326d7c1 the shrinker started to use the total
allocated heap from a glibc-specific interface mallinfo2(), aiming to
shrink such that our heap is less than 80% of physical memory, unless
overall free memory is less than 6% so that becomes the determining factor.

Unfortunately, a sign error in the calculation means this heuristic
never worked. It would shrink aggressively when the process was small,
and not at all when the process grew beyond 80% of physical RAM. Only the
fallback test ensuring the free physical RAM doesn't fall below 6% would
actually kick in under memory pressure. It also breaks portability to
anything other than recent glibc.

Later, in 2440469 the mallinfo2() was replaced with the older mallinfo()
to improve compatibility with older glibc. This is even more problematic:
it's still not portable but also struct mallinfo has (signed) int fields
which overflow for large processes on 32-bit machines with a 3G/1G split.

Rather than trying to use libc-specific debug interfaces and our own heap
to inform the shrinker, use the information about free and total swap
we already have from sysinfo(2) to explicitly compensate for swapping
in our estimate of free physical memory. Target free memory of 6% of
physical RAM adjusted for zero swap use when calculating the pressure
on the shrinker, based on the effective behaviour of 326d7c1 in practice
given the sign error.

As well as fixing portability to non-glibc systems, this loosens the
assumption that we are the only process using significant memory when
setting the shrinker target. It wouldn't be unreasonable to run two
fsck jobs against independent devices on a large RAM machine and want to
balance physical RAM between them.

Signed-off-by: Chris Webb <chris@arachsys.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-12-09 23:28:01 -05:00

138 lines
3.0 KiB
C

#include <stdio.h>
#include <unistd.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/shrinker.h>
#include "tools-util.h"
static LIST_HEAD(shrinker_list);
static DEFINE_MUTEX(shrinker_lock);
struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...)
{
return calloc(sizeof(struct shrinker), 1);
}
int shrinker_register(struct shrinker *shrinker)
{
mutex_lock(&shrinker_lock);
list_add_tail(&shrinker->list, &shrinker_list);
mutex_unlock(&shrinker_lock);
return 0;
}
void unregister_shrinker(struct shrinker *shrinker)
{
mutex_lock(&shrinker_lock);
list_del(&shrinker->list);
mutex_unlock(&shrinker_lock);
}
static void run_shrinkers_allocation_failed(gfp_t gfp_mask)
{
struct shrinker *shrinker;
mutex_lock(&shrinker_lock);
list_for_each_entry(shrinker, &shrinker_list, list) {
struct shrink_control sc = { .gfp_mask = gfp_mask, };
unsigned long have = shrinker->count_objects(shrinker, &sc);
sc.nr_to_scan = have / 8;
shrinker->scan_objects(shrinker, &sc);
}
mutex_unlock(&shrinker_lock);
}
void run_shrinkers(gfp_t gfp_mask, bool allocation_failed)
{
struct shrinker *shrinker;
struct sysinfo info;
s64 want_shrink;
if (!(gfp_mask & GFP_KERNEL))
return;
/* Fast out if there are no shrinkers to run. */
if (list_empty(&shrinker_list))
return;
if (allocation_failed) {
run_shrinkers_allocation_failed(gfp_mask);
return;
}
si_meminfo(&info);
/* Aim for 6% of physical RAM free without anything in swap */
want_shrink = (info.totalram << 4) - info.freeram
+ info.totalswap - info.freeswap;
if (want_shrink <= 0)
return;
mutex_lock(&shrinker_lock);
list_for_each_entry(shrinker, &shrinker_list, list) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nr_to_scan = want_shrink >> PAGE_SHIFT
};
shrinker->scan_objects(shrinker, &sc);
}
mutex_unlock(&shrinker_lock);
}
static int shrinker_thread(void *arg)
{
while (!kthread_should_stop()) {
struct timespec to;
int v;
clock_gettime(CLOCK_MONOTONIC, &to);
to.tv_sec += 1;
__set_current_state(TASK_INTERRUPTIBLE);
errno = 0;
while ((v = READ_ONCE(current->state)) != TASK_RUNNING &&
errno != ETIMEDOUT)
futex(&current->state, FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG,
v, &to, NULL, (uint32_t)~0);
if (kthread_should_stop())
break;
if (v != TASK_RUNNING)
__set_current_state(TASK_RUNNING);
run_shrinkers(GFP_KERNEL, false);
}
return 0;
}
struct task_struct *shrinker_task;
__attribute__((constructor(103)))
static void shrinker_thread_init(void)
{
shrinker_task = kthread_run(shrinker_thread, NULL, "shrinkers");
BUG_ON(IS_ERR(shrinker_task));
}
#if 0
/*
* We seem to be hitting a rare segfault when shutting down the shrinker thread.
* Disabling this is going to cause some harmless warnings about memory leaks:
*/
__attribute__((destructor(103)))
static void shrinker_thread_exit(void)
{
int ret = kthread_stop(shrinker_task);
BUG_ON(ret);
shrinker_task = NULL;
}
#endif