mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-08 00:00:12 +03:00
kill systemd integration
We probably won't ever need this, since we've steadily been doing more and more self healing. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
6375fda6ec
commit
5862ca9f02
46
Makefile
46
Makefile
@ -118,45 +118,8 @@ else
|
||||
INITRAMFS_DIR=/etc/initramfs-tools
|
||||
endif
|
||||
|
||||
PKGCONFIG_SERVICEDIR:=$(shell $(PKG_CONFIG) --variable=systemdsystemunitdir systemd)
|
||||
ifeq (,$(PKGCONFIG_SERVICEDIR))
|
||||
$(warning skipping systemd integration)
|
||||
else
|
||||
BCACHEFSCK_ARGS=-f -n
|
||||
systemd_libexecfiles=\
|
||||
fsck/bcachefsck_fail \
|
||||
fsck/bcachefsck_all
|
||||
|
||||
systemd_services=\
|
||||
fsck/bcachefsck_fail@.service \
|
||||
fsck/bcachefsck@.service \
|
||||
fsck/system-bcachefsck.slice \
|
||||
fsck/bcachefsck_all_fail.service \
|
||||
fsck/bcachefsck_all.service \
|
||||
fsck/bcachefsck_all.timer
|
||||
|
||||
built_scripts+=\
|
||||
fsck/bcachefsck_fail@.service \
|
||||
fsck/bcachefsck@.service \
|
||||
fsck/bcachefsck_all_fail.service \
|
||||
fsck/bcachefsck_all \
|
||||
fsck/bcachefsck_all.service
|
||||
|
||||
%.service: %.service.in
|
||||
@echo " [SED] $@"
|
||||
$(Q)sed -e "s|@libexecdir@|$(LIBEXECDIR)|g" \
|
||||
-e "s|@bcachefsck_args@|$(BCACHEFSCK_ARGS)|g" < $< > $@
|
||||
|
||||
fsck/bcachefsck_all: fsck/bcachefsck_all.in
|
||||
@echo " [SED] $@"
|
||||
$(Q)sed -e "s|@bcachefsck_args@|$(BCACHEFSCK_ARGS)|g" < $< > $@
|
||||
|
||||
optional_build+=$(systemd_libexecfiles) $(systemd_services)
|
||||
optional_install+=install_systemd
|
||||
endif # PKGCONFIG_SERVICEDIR
|
||||
|
||||
.PHONY: all
|
||||
all: bcachefs initramfs/hook dkms/dkms.conf $(optional_build)
|
||||
all: bcachefs initramfs/hook dkms/dkms.conf
|
||||
|
||||
.PHONY: debug
|
||||
debug: CFLAGS+=-Werror -DCONFIG_BCACHEFS_DEBUG=y -DCONFIG_VALGRIND=y
|
||||
@ -211,7 +174,7 @@ initramfs/hook: initramfs/hook.in
|
||||
.PHONY: install
|
||||
install: INITRAMFS_HOOK=$(INITRAMFS_DIR)/hooks/bcachefs
|
||||
install: INITRAMFS_SCRIPT=$(INITRAMFS_DIR)/scripts/local-premount/bcachefs
|
||||
install: all install_dkms $(optional_install)
|
||||
install: all install_dkms
|
||||
$(INSTALL) -m0755 -D $(BUILT_BIN) -t $(DESTDIR)$(ROOT_SBINDIR)
|
||||
$(INSTALL) -m0644 -D bcachefs.8 -t $(DESTDIR)$(PREFIX)/share/man/man8/
|
||||
$(INSTALL) -m0755 -D initramfs/script $(DESTDIR)$(INITRAMFS_SCRIPT)
|
||||
@ -224,11 +187,6 @@ install: all install_dkms $(optional_install)
|
||||
$(LN) -sfr $(DESTDIR)$(ROOT_SBINDIR)/bcachefs $(DESTDIR)$(ROOT_SBINDIR)/fsck.fuse.bcachefs
|
||||
$(LN) -sfr $(DESTDIR)$(ROOT_SBINDIR)/bcachefs $(DESTDIR)$(ROOT_SBINDIR)/mount.fuse.bcachefs
|
||||
|
||||
.PHONY: install_systemd
|
||||
install_systemd: $(systemd_services) $(systemd_libexecfiles)
|
||||
$(INSTALL) -m0755 -D $(systemd_libexecfiles) -t $(DESTDIR)$(LIBEXECDIR)
|
||||
$(INSTALL) -m0644 -D $(systemd_services) -t $(DESTDIR)$(PKGCONFIG_SERVICEDIR)
|
||||
|
||||
.PHONY: install_dkms
|
||||
install_dkms: dkms/dkms.conf
|
||||
$(INSTALL) -m0644 -D dkms/Makefile -t $(DESTDIR)$(DKMSDIR)
|
||||
|
||||
2
fsck/.gitignore
vendored
2
fsck/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
*.service
|
||||
bcachefsck_all
|
||||
@ -1,98 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
|
||||
# Author: Darrick J. Wong <djwong@kernel.org>
|
||||
|
||||
[Unit]
|
||||
Description=Online bcachefsck for %f
|
||||
OnFailure=bcachefsck@%i.service
|
||||
Documentation=man:bcachefs(8)
|
||||
|
||||
# Explicitly require the capabilities that this program needs
|
||||
ConditionCapability=CAP_SYS_ADMIN
|
||||
ConditionCapability=CAP_FOWNER
|
||||
ConditionCapability=CAP_DAC_OVERRIDE
|
||||
ConditionCapability=CAP_DAC_READ_SEARCH
|
||||
ConditionCapability=CAP_SYS_RAWIO
|
||||
|
||||
# Must be a mountpoint
|
||||
ConditionPathIsMountPoint=%f
|
||||
RequiresMountsFor=%f
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
Environment=SERVICE_MODE=1
|
||||
ExecStart=bcachefs fsck --real-mountpoint /tmp/scrub/ @bcachefsck_args@ %f
|
||||
SyslogIdentifier=%N
|
||||
|
||||
# Run scrub with minimal CPU and IO priority so that nothing else will starve.
|
||||
IOSchedulingClass=idle
|
||||
CPUSchedulingPolicy=idle
|
||||
CPUAccounting=true
|
||||
Nice=19
|
||||
|
||||
# Create the service underneath the background service slice so that we can
|
||||
# control resource usage.
|
||||
Slice=system-bcachefsck.slice
|
||||
|
||||
# No realtime CPU scheduling
|
||||
RestrictRealtime=true
|
||||
|
||||
# Dynamically create a user that isn't root
|
||||
DynamicUser=true
|
||||
|
||||
# Make the entire filesystem readonly and /home inaccessible, then bind mount
|
||||
# the filesystem we're supposed to be checking into our private /tmp dir.
|
||||
# 'norbind' means that we don't bind anything under that original mount.
|
||||
# This enables checking filesystems mounted under /tmp in the global mount
|
||||
# namespace.
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
PrivateTmp=true
|
||||
BindPaths=%f:/tmp/scrub:norbind
|
||||
|
||||
# No network access
|
||||
PrivateNetwork=true
|
||||
ProtectHostname=true
|
||||
RestrictAddressFamilies=none
|
||||
IPAddressDeny=any
|
||||
|
||||
# Don't let the program mess with the kernel configuration at all
|
||||
ProtectKernelLogs=true
|
||||
ProtectKernelModules=true
|
||||
ProtectKernelTunables=true
|
||||
ProtectControlGroups=true
|
||||
ProtectProc=invisible
|
||||
RestrictNamespaces=true
|
||||
|
||||
# Hide everything in /proc, even /proc/mounts
|
||||
ProcSubset=pid
|
||||
|
||||
# Only allow the default personality Linux
|
||||
LockPersonality=true
|
||||
|
||||
# No writable memory pages
|
||||
MemoryDenyWriteExecute=true
|
||||
|
||||
# Don't let our mounts leak out to the host
|
||||
PrivateMounts=true
|
||||
|
||||
# Restrict system calls to the native arch and only enough to get things going
|
||||
SystemCallArchitectures=native
|
||||
SystemCallFilter=@system-service
|
||||
SystemCallFilter=~@privileged
|
||||
SystemCallFilter=~@resources
|
||||
SystemCallFilter=~@mount
|
||||
|
||||
# bcachefsck needs these privileges to run, and no others
|
||||
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
|
||||
AmbientCapabilities=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
|
||||
NoNewPrivileges=true
|
||||
|
||||
# bcachefsck doesn't create files
|
||||
UMask=7777
|
||||
|
||||
# No access to hardware /dev files except for block devices
|
||||
ProtectClock=true
|
||||
DevicePolicy=closed
|
||||
DeviceAllow=block-*
|
||||
@ -1,481 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
# Copyright (C) 2023-2024 Oracle. All rights reserved.
|
||||
#
|
||||
# Author: Darrick J. Wong <djwong@kernel.org>
|
||||
|
||||
# Run bcachefsck in parallel, but avoid thrashing.
|
||||
|
||||
import subprocess
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import signal
|
||||
import dbus
|
||||
from io import TextIOWrapper
|
||||
from pathlib import Path
|
||||
from datetime import timedelta
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
|
||||
retcode = 0
|
||||
terminate = False
|
||||
debug = False
|
||||
|
||||
def DEVNULL():
|
||||
'''Return /dev/null in subprocess writable format.'''
|
||||
try:
|
||||
from subprocess import DEVNULL
|
||||
return DEVNULL
|
||||
except ImportError:
|
||||
return open(os.devnull, 'wb')
|
||||
|
||||
def find_mounts():
|
||||
'''Map mountpoints to physical disks.'''
|
||||
def find_bcachefs_mounts(bdev, fs, lastdisk):
|
||||
'''Attach all lastdisk to each fs found under bdev.'''
|
||||
if bdev['fstype'] == 'bcachefs' and bdev['mountpoint'] is not None:
|
||||
mnt = bdev['mountpoint']
|
||||
if mnt in fs:
|
||||
fs[mnt].add(lastdisk.split(':'))
|
||||
else:
|
||||
fs[mnt] = set(lastdisk.split(':'))
|
||||
if 'children' not in bdev:
|
||||
return
|
||||
for child in bdev['children']:
|
||||
find_bcachefs_mounts(child, fs, lastdisk)
|
||||
|
||||
fs = {}
|
||||
cmd=['lsblk', '-o', 'NAME,KNAME,TYPE,FSTYPE,MOUNTPOINT', '-J']
|
||||
result = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
||||
result.wait()
|
||||
if result.returncode != 0:
|
||||
return fs
|
||||
sarray = [x.decode(sys.stdout.encoding) for x in result.stdout.readlines()]
|
||||
output = ' '.join(sarray)
|
||||
bdevdata = json.loads(output)
|
||||
|
||||
# The lsblk output had better be in disks-then-partitions order
|
||||
for bdev in bdevdata['blockdevices']:
|
||||
lastdisk = bdev['kname']
|
||||
find_bcachefs_mounts(bdev, fs, lastdisk)
|
||||
|
||||
return fs
|
||||
|
||||
def backtick(cmd):
|
||||
'''Generator function that yields lines of a program's stdout.'''
|
||||
p = subprocess.Popen(cmd, stdout = subprocess.PIPE)
|
||||
for line in TextIOWrapper(p.stdout, encoding="utf-8"):
|
||||
yield line.strip()
|
||||
|
||||
def remove_killfunc(killfuncs, fn):
|
||||
'''Ensure fn is not in killfuncs.'''
|
||||
try:
|
||||
killfuncs.remove(fn)
|
||||
except:
|
||||
pass
|
||||
|
||||
class scrub_control(object):
|
||||
'''Control object for bcachefsck.'''
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def start(self):
|
||||
'''Start scrub and wait for it to complete. Returns -1 if the
|
||||
service was not started, 0 if it succeeded, or 1 if it
|
||||
failed.'''
|
||||
assert False
|
||||
|
||||
def stop(self):
|
||||
'''Stop scrub.'''
|
||||
assert False
|
||||
|
||||
class scrub_subprocess(scrub_control):
|
||||
'''Control object for bcachefsck subprocesses.'''
|
||||
def __init__(self, mnt):
|
||||
cmd = ['bcachefs', 'fsck']
|
||||
cmd += '@bcachefsck_args@'.split()
|
||||
cmd += [mnt]
|
||||
self.cmdline = cmd
|
||||
self.proc = None
|
||||
|
||||
def start(self):
|
||||
'''Start bcachefsck and wait for it to complete. Returns -1 if
|
||||
the service was not started, 0 if it succeeded, or 1 if it
|
||||
failed.'''
|
||||
global debug
|
||||
|
||||
if debug:
|
||||
print('run ', ' '.join(self.cmdline))
|
||||
|
||||
try:
|
||||
self.proc = subprocess.Popen(self.cmdline)
|
||||
self.proc.wait()
|
||||
except:
|
||||
return -1
|
||||
|
||||
proc = self.proc
|
||||
self.proc = None
|
||||
return proc.returncode
|
||||
|
||||
def stop(self):
|
||||
'''Stop bcachefsck.'''
|
||||
global debug
|
||||
|
||||
if debug:
|
||||
print('kill ', ' '.join(self.cmdline))
|
||||
if self.proc is not None:
|
||||
self.proc.terminate()
|
||||
|
||||
def run_subprocess(mnt, killfuncs):
|
||||
'''Run a killable program. Returns program retcode or -1 if we can't
|
||||
start it.'''
|
||||
try:
|
||||
p = scrub_subprocess(mnt)
|
||||
killfuncs.add(p.stop)
|
||||
ret = p.start()
|
||||
remove_killfunc(killfuncs, p.stop)
|
||||
return ret
|
||||
except:
|
||||
return -1
|
||||
|
||||
# systemd doesn't like unit instance names with slashes in them, so it
|
||||
# replaces them with dashes when it invokes the service. Filesystem paths
|
||||
# need a special --path argument so that dashes do not get mangled.
|
||||
def path_to_serviceunit(path):
|
||||
'''Convert a pathname into a systemd service unit name.'''
|
||||
|
||||
svcname = 'bcachefsck@.service'
|
||||
cmd = ['systemd-escape', '--template', svcname, '--path', path]
|
||||
|
||||
proc = subprocess.Popen(cmd, stdout = subprocess.PIPE)
|
||||
proc.wait()
|
||||
for line in proc.stdout:
|
||||
return line.decode(sys.stdout.encoding).strip()
|
||||
|
||||
def fibonacci(max_ret):
|
||||
'''Yield fibonacci sequence up to but not including max_ret.'''
|
||||
if max_ret < 1:
|
||||
return
|
||||
|
||||
x = 0
|
||||
y = 1
|
||||
yield 1
|
||||
|
||||
z = x + y
|
||||
while z <= max_ret:
|
||||
yield z
|
||||
x = y
|
||||
y = z
|
||||
z = x + y
|
||||
|
||||
class scrub_service(scrub_control):
|
||||
'''Control object for bcachefsck systemd service.'''
|
||||
def __init__(self, mnt):
|
||||
self.unitname = path_to_serviceunit(mnt)
|
||||
self.prop = None
|
||||
self.unit = None
|
||||
self.bind()
|
||||
|
||||
def bind(self):
|
||||
'''Bind to the dbus proxy object for this service.'''
|
||||
sysbus = dbus.SystemBus()
|
||||
systemd1 = sysbus.get_object('org.freedesktop.systemd1',
|
||||
'/org/freedesktop/systemd1')
|
||||
manager = dbus.Interface(systemd1,
|
||||
'org.freedesktop.systemd1.Manager')
|
||||
path = manager.LoadUnit(self.unitname)
|
||||
|
||||
svc_obj = sysbus.get_object('org.freedesktop.systemd1', path)
|
||||
self.prop = dbus.Interface(svc_obj,
|
||||
'org.freedesktop.DBus.Properties')
|
||||
self.unit = dbus.Interface(svc_obj,
|
||||
'org.freedesktop.systemd1.Unit')
|
||||
|
||||
def __dbusrun(self, lambda_fn):
|
||||
'''Call the lambda function to execute something on dbus. dbus
|
||||
exceptions result in retries with Fibonacci backoff, and the
|
||||
bindings will be rebuilt every time.'''
|
||||
global debug
|
||||
|
||||
fatal_ex = None
|
||||
|
||||
for i in fibonacci(30):
|
||||
try:
|
||||
return lambda_fn()
|
||||
except dbus.exceptions.DBusException as e:
|
||||
if debug:
|
||||
print(e)
|
||||
fatal_ex = e
|
||||
time.sleep(i)
|
||||
self.bind()
|
||||
raise fatal_ex
|
||||
|
||||
def state(self):
|
||||
'''Retrieve the active state for a systemd service. As of
|
||||
systemd 249, this is supposed to be one of the following:
|
||||
"active", "reloading", "inactive", "failed", "activating",
|
||||
or "deactivating". These strings are not localized.'''
|
||||
global debug
|
||||
|
||||
l = lambda: self.prop.Get('org.freedesktop.systemd1.Unit',
|
||||
'ActiveState')
|
||||
try:
|
||||
return self.__dbusrun(l)
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(e, file = sys.stderr)
|
||||
return 'failed'
|
||||
|
||||
def wait(self, interval = 1):
|
||||
'''Wait until the service finishes.'''
|
||||
global debug
|
||||
|
||||
# Use a poll/sleep loop to wait for the service to finish.
|
||||
# Avoid adding a dependency on python3 glib, which is required
|
||||
# to use an event loop to receive a dbus signal.
|
||||
s = self.state()
|
||||
while s not in ['failed', 'inactive']:
|
||||
if debug:
|
||||
print('waiting %s %s' % (self.unitname, s))
|
||||
time.sleep(interval)
|
||||
s = self.state()
|
||||
if debug:
|
||||
print('waited %s %s' % (self.unitname, s))
|
||||
if s == 'failed':
|
||||
return 1
|
||||
return 0
|
||||
|
||||
def start(self):
|
||||
'''Start the service and wait for it to complete. Returns -1
|
||||
if the service was not started, 0 if it succeeded, or 1 if it
|
||||
failed.'''
|
||||
global debug
|
||||
|
||||
if debug:
|
||||
print('starting %s' % self.unitname)
|
||||
|
||||
try:
|
||||
self.__dbusrun(lambda: self.unit.Start('replace'))
|
||||
return self.wait()
|
||||
except Exception as e:
|
||||
print(e, file = sys.stderr)
|
||||
return -1
|
||||
|
||||
def stop(self):
|
||||
'''Stop the service.'''
|
||||
global debug
|
||||
|
||||
if debug:
|
||||
print('stopping %s' % self.unitname)
|
||||
|
||||
try:
|
||||
self.__dbusrun(lambda: self.unit.Stop('replace'))
|
||||
return self.wait()
|
||||
except Exception as e:
|
||||
print(e, file = sys.stderr)
|
||||
return -1
|
||||
|
||||
def run_service(mnt, killfuncs):
|
||||
'''Run scrub as a service.'''
|
||||
try:
|
||||
svc = scrub_service(mnt)
|
||||
except:
|
||||
return -1
|
||||
|
||||
killfuncs.add(svc.stop)
|
||||
retcode = svc.start()
|
||||
remove_killfunc(killfuncs, svc.stop)
|
||||
return retcode
|
||||
|
||||
def run_scrub(mnt, cond, running_devs, mntdevs, killfuncs):
|
||||
'''Run a scrub process.'''
|
||||
global retcode, terminate
|
||||
|
||||
print("Scrubbing %s..." % mnt)
|
||||
sys.stdout.flush()
|
||||
|
||||
try:
|
||||
if terminate:
|
||||
return
|
||||
|
||||
# Run per-mount systemd bcachefsck service only if we ourselves
|
||||
# are running as a systemd service.
|
||||
if 'SERVICE_MODE' in os.environ:
|
||||
ret = run_service(mnt, killfuncs)
|
||||
if ret == 0 or ret == 1:
|
||||
print("Scrubbing %s done, (err=%d)" % (mnt, ret))
|
||||
sys.stdout.flush()
|
||||
retcode |= ret
|
||||
return
|
||||
|
||||
if terminate:
|
||||
return
|
||||
|
||||
# Invoke bcachefsck manually if we're running in the foreground.
|
||||
# We also permit this if we're running as a cronjob where
|
||||
# systemd services are unavailable.
|
||||
ret = run_subprocess(mnt, killfuncs)
|
||||
if ret >= 0:
|
||||
print("Scrubbing %s done, (err=%d)" % (mnt, ret))
|
||||
sys.stdout.flush()
|
||||
retcode |= ret
|
||||
return
|
||||
|
||||
if terminate:
|
||||
return
|
||||
|
||||
print("Unable to start scrub tool.")
|
||||
sys.stdout.flush()
|
||||
finally:
|
||||
running_devs -= mntdevs
|
||||
cond.acquire()
|
||||
cond.notify()
|
||||
cond.release()
|
||||
|
||||
def signal_scrubs(signum, cond):
|
||||
'''Handle termination signals by killing bcachefsck children.'''
|
||||
global debug, terminate
|
||||
|
||||
if debug:
|
||||
print('Signal handler called with signal', signum)
|
||||
sys.stdout.flush()
|
||||
|
||||
terminate = True
|
||||
cond.acquire()
|
||||
cond.notify()
|
||||
cond.release()
|
||||
|
||||
def wait_for_termination(cond, killfuncs):
|
||||
'''Wait for a child thread to terminate. Returns True if we should
|
||||
abort the program, False otherwise.'''
|
||||
global debug, terminate
|
||||
|
||||
if debug:
|
||||
print('waiting for threads to terminate')
|
||||
sys.stdout.flush()
|
||||
|
||||
cond.acquire()
|
||||
try:
|
||||
cond.wait()
|
||||
except KeyboardInterrupt:
|
||||
terminate = True
|
||||
cond.release()
|
||||
|
||||
if not terminate:
|
||||
return False
|
||||
|
||||
print("Terminating...")
|
||||
sys.stdout.flush()
|
||||
while len(killfuncs) > 0:
|
||||
fn = killfuncs.pop()
|
||||
fn()
|
||||
return True
|
||||
|
||||
def scan_interval(string):
|
||||
'''Convert a textual scan interval argument into a time delta.'''
|
||||
|
||||
if string.endswith('y'):
|
||||
year = timedelta(seconds = 31556952)
|
||||
return year * float(string[:-1])
|
||||
if string.endswith('q'):
|
||||
return timedelta(days = 90 * float(string[:-1]))
|
||||
if string.endswith('mo'):
|
||||
return timedelta(days = 30 * float(string[:-2]))
|
||||
if string.endswith('w'):
|
||||
return timedelta(weeks = float(string[:-1]))
|
||||
if string.endswith('d'):
|
||||
return timedelta(days = float(string[:-1]))
|
||||
if string.endswith('h'):
|
||||
return timedelta(hours = float(string[:-1]))
|
||||
if string.endswith('m'):
|
||||
return timedelta(minutes = float(string[:-1]))
|
||||
if string.endswith('s'):
|
||||
return timedelta(seconds = float(string[:-1]))
|
||||
return timedelta(seconds = int(string))
|
||||
|
||||
def utcnow():
|
||||
'''Create a representation of the time right now, in UTC.'''
|
||||
|
||||
dt = datetime.utcnow()
|
||||
return dt.replace(tzinfo = timezone.utc)
|
||||
|
||||
def main():
|
||||
'''Find mounts, schedule bcachefsck runs.'''
|
||||
def thr(mnt, devs):
|
||||
a = (mnt, cond, running_devs, devs, killfuncs)
|
||||
thr = threading.Thread(target = run_scrub, args = a)
|
||||
thr.start()
|
||||
global retcode, terminate, debug
|
||||
|
||||
parser = argparse.ArgumentParser( \
|
||||
description = "Scrub all mounted bcachefs filesystems.")
|
||||
parser.add_argument("--debug", help = "Enabling debugging messages.", \
|
||||
action = "store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.debug:
|
||||
debug = True
|
||||
|
||||
fs = find_mounts()
|
||||
|
||||
# Schedule scrub jobs...
|
||||
running_devs = set()
|
||||
killfuncs = set()
|
||||
cond = threading.Condition()
|
||||
|
||||
signal.signal(signal.SIGINT, lambda s, f: signal_scrubs(s, cond))
|
||||
signal.signal(signal.SIGTERM, lambda s, f: signal_scrubs(s, cond))
|
||||
|
||||
while len(fs) > 0:
|
||||
if len(running_devs) == 0:
|
||||
mnt, devs = fs.popitem()
|
||||
running_devs.update(devs)
|
||||
thr(mnt, devs)
|
||||
poppers = set()
|
||||
for mnt in fs:
|
||||
devs = fs[mnt]
|
||||
can_run = True
|
||||
for dev in devs:
|
||||
if dev in running_devs:
|
||||
can_run = False
|
||||
break
|
||||
if can_run:
|
||||
running_devs.update(devs)
|
||||
poppers.add(mnt)
|
||||
thr(mnt, devs)
|
||||
for p in poppers:
|
||||
fs.pop(p)
|
||||
|
||||
# Wait for one thread to finish
|
||||
if wait_for_termination(cond, killfuncs):
|
||||
break
|
||||
|
||||
# Wait for the rest of the threads to finish
|
||||
while len(killfuncs) > 0:
|
||||
wait_for_termination(cond, killfuncs)
|
||||
|
||||
# If we're being run as a service, the return code must fit the LSB
|
||||
# init script action error guidelines, which is to say that we compress
|
||||
# all errors to 1 ("generic or unspecified error", LSB 5.0 section
|
||||
# 22.2) and hope the admin will scan the log for what actually
|
||||
# happened.
|
||||
#
|
||||
# We have to sleep 2 seconds here because journald uses the pid to
|
||||
# connect our log messages to the systemd service. This is critical
|
||||
# for capturing all the log messages if the scrub fails, because the
|
||||
# fail service uses the service name to gather log messages for the
|
||||
# error report.
|
||||
if 'SERVICE_MODE' in os.environ:
|
||||
time.sleep(2)
|
||||
if retcode != 0:
|
||||
retcode = 1
|
||||
|
||||
sys.exit(retcode)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -1,84 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
|
||||
# Author: Darrick J. Wong <djwong@kernel.org>
|
||||
|
||||
[Unit]
|
||||
Description=Online bcachefsck for All Filesystems
|
||||
OnFailure=bcachefsck_all_fail.service
|
||||
ConditionACPower=true
|
||||
Documentation=man:bcachefsck_all(8)
|
||||
After=paths.target multi-user.target network.target network-online.target systemd-networkd.service NetworkManager.service connman.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
Environment=SERVICE_MODE=1
|
||||
ExecStart=@libexecdir@/bcachefsck_all
|
||||
SyslogIdentifier=bcachefsck_all
|
||||
|
||||
# Create the service underneath the scrub background service slice so that we
|
||||
# can control resource usage.
|
||||
Slice=system-bcachefsck.slice
|
||||
|
||||
# Run scrub_all with minimal CPU and IO priority so that nothing will starve.
|
||||
IOSchedulingClass=idle
|
||||
CPUSchedulingPolicy=idle
|
||||
CPUAccounting=true
|
||||
Nice=19
|
||||
|
||||
# No realtime scheduling
|
||||
RestrictRealtime=true
|
||||
|
||||
# No special privileges, but we still have to run as root so that we can
|
||||
# contact the service manager to start the sub-units.
|
||||
CapabilityBoundingSet=
|
||||
NoNewPrivileges=true
|
||||
RestrictSUIDSGID=true
|
||||
|
||||
# Make the entire filesystem readonly except for the media scan stamp file
|
||||
# directory. We don't want to hide anything because we need to find all
|
||||
# mounted bcachefs filesystems in the host.
|
||||
ProtectSystem=strict
|
||||
ProtectHome=read-only
|
||||
PrivateTmp=false
|
||||
|
||||
# No network access except to the systemd control socket
|
||||
PrivateNetwork=true
|
||||
ProtectHostname=true
|
||||
RestrictAddressFamilies=AF_UNIX
|
||||
IPAddressDeny=any
|
||||
|
||||
# Don't let the program mess with the kernel configuration at all
|
||||
ProtectKernelLogs=true
|
||||
ProtectKernelModules=true
|
||||
ProtectKernelTunables=true
|
||||
ProtectControlGroups=true
|
||||
ProtectProc=invisible
|
||||
RestrictNamespaces=true
|
||||
|
||||
# Hide everything in /proc, even /proc/mounts
|
||||
ProcSubset=pid
|
||||
|
||||
# Only allow the default personality Linux
|
||||
LockPersonality=true
|
||||
|
||||
# No writable memory pages
|
||||
MemoryDenyWriteExecute=true
|
||||
|
||||
# Don't let our mounts leak out to the host
|
||||
PrivateMounts=true
|
||||
|
||||
# Restrict system calls to the native arch and only enough to get things going
|
||||
SystemCallArchitectures=native
|
||||
SystemCallFilter=@system-service
|
||||
SystemCallFilter=~@privileged
|
||||
SystemCallFilter=~@resources
|
||||
SystemCallFilter=~@mount
|
||||
|
||||
# Media scan stamp file shouldn't be readable by regular users
|
||||
UMask=0077
|
||||
|
||||
# lsblk ignores mountpoints if it can't find the device files, so we cannot
|
||||
# hide them
|
||||
#ProtectClock=true
|
||||
#PrivateDevices=true
|
||||
@ -1,16 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
|
||||
# Author: Darrick J. Wong <djwong@kernel.org>
|
||||
|
||||
[Unit]
|
||||
Description=Periodic bcachefsck for All Filesystems
|
||||
|
||||
[Timer]
|
||||
# Run on Sunday at 3:10am, to avoid running afoul of DST changes
|
||||
OnCalendar=Sun *-*-* 03:10:00
|
||||
RandomizedDelaySec=60
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
@ -1,71 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
|
||||
# Author: Darrick J. Wong <djwong@kernel.org>
|
||||
|
||||
[Unit]
|
||||
Description=Online bcachefsck for All Filesystems Failure Reporting
|
||||
Documentation=man:bcachefsck_all(8)
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
Environment=EMAIL_ADDR=root
|
||||
ExecStart=@libexecdir@/bcachefsck_fail "${EMAIL_ADDR}" bcachefsck_all
|
||||
User=mail
|
||||
Group=mail
|
||||
SupplementaryGroups=systemd-journal
|
||||
|
||||
# No realtime scheduling
|
||||
RestrictRealtime=true
|
||||
|
||||
# Make the entire filesystem readonly and /home inaccessible.
|
||||
ProtectSystem=full
|
||||
ProtectHome=yes
|
||||
PrivateTmp=true
|
||||
RestrictSUIDSGID=true
|
||||
|
||||
# Emailing reports requires network access, but not the ability to change the
|
||||
# hostname.
|
||||
ProtectHostname=true
|
||||
|
||||
# Don't let the program mess with the kernel configuration at all
|
||||
ProtectKernelLogs=true
|
||||
ProtectKernelModules=true
|
||||
ProtectKernelTunables=true
|
||||
ProtectControlGroups=true
|
||||
ProtectProc=invisible
|
||||
RestrictNamespaces=true
|
||||
|
||||
# Can't hide /proc because journalctl needs it to find various pieces of log
|
||||
# information
|
||||
#ProcSubset=pid
|
||||
|
||||
# Only allow the default personality Linux
|
||||
LockPersonality=true
|
||||
|
||||
# No writable memory pages
|
||||
MemoryDenyWriteExecute=true
|
||||
|
||||
# Don't let our mounts leak out to the host
|
||||
PrivateMounts=true
|
||||
|
||||
# Restrict system calls to the native arch and only enough to get things going
|
||||
SystemCallArchitectures=native
|
||||
SystemCallFilter=@system-service
|
||||
SystemCallFilter=~@privileged
|
||||
SystemCallFilter=~@resources
|
||||
SystemCallFilter=~@mount
|
||||
|
||||
# xfs_scrub needs these privileges to run, and no others
|
||||
CapabilityBoundingSet=
|
||||
NoNewPrivileges=true
|
||||
|
||||
# Failure reporting shouldn't create world-readable files
|
||||
UMask=0077
|
||||
|
||||
# Clean up any IPC objects when this unit stops
|
||||
RemoveIPC=true
|
||||
|
||||
# No access to hardware device files
|
||||
PrivateDevices=true
|
||||
ProtectClock=true
|
||||
@ -1,63 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
|
||||
# Author: Darrick J. Wong <djwong@kernel.org>
|
||||
|
||||
# Email logs of failed bcachefsck and bcachefsck_all unit runs
|
||||
|
||||
recipient="$1"
|
||||
test -z "${recipient}" && exit 0
|
||||
service="$2"
|
||||
test -z "${service}" && exit 0
|
||||
mntpoint="$3"
|
||||
|
||||
hostname="$(hostname -f 2>/dev/null)"
|
||||
test -z "${hostname}" && hostname="${HOSTNAME}"
|
||||
|
||||
mailer="$(command -v sendmail)"
|
||||
if [ ! -x "${mailer}" ]; then
|
||||
echo "${mailer}: Mailer program not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
fail_mail_mntpoint() {
|
||||
local scrub_svc
|
||||
|
||||
# Turn the mountpoint into a properly escaped systemd instance name
|
||||
scrub_svc="$(systemd-escape --template "${service}@.service" --path "${mntpoint}")"
|
||||
cat << ENDL
|
||||
To: ${recipient}
|
||||
From: <${service}@${hostname}>
|
||||
Subject: ${service} failure on ${mntpoint}
|
||||
Content-Transfer-Encoding: 8bit
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
|
||||
So sorry, the automatic ${service} of ${mntpoint} on ${hostname} failed.
|
||||
Please do not reply to this mesage.
|
||||
|
||||
A log of what happened follows:
|
||||
ENDL
|
||||
systemctl status --full --lines 4294967295 "${scrub_svc}"
|
||||
}
|
||||
|
||||
fail_mail() {
|
||||
cat << ENDL
|
||||
To: ${recipient}
|
||||
From: <${service}@${hostname}>
|
||||
Subject: ${service} failure
|
||||
|
||||
So sorry, the automatic ${service} on ${hostname} failed.
|
||||
|
||||
A log of what happened follows:
|
||||
ENDL
|
||||
systemctl status --full --lines 4294967295 "${service}"
|
||||
}
|
||||
|
||||
if [ -n "${mntpoint}" ]; then
|
||||
fail_mail_mntpoint | "${mailer}" -t -i
|
||||
else
|
||||
fail_mail | "${mailer}" -t -i
|
||||
fi
|
||||
exit "${PIPESTATUS[1]}"
|
||||
@ -1,75 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
|
||||
# Author: Darrick J. Wong <djwong@kernel.org>
|
||||
|
||||
[Unit]
|
||||
Description=Online bcachefsck Failure Reporting for %f
|
||||
Documentation=man:bcachefs(8)
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
Environment=EMAIL_ADDR=root
|
||||
ExecStart=@libexecdir@/bcachefsck_fail "${EMAIL_ADDR}" bcachefs %f
|
||||
User=mail
|
||||
Group=mail
|
||||
SupplementaryGroups=systemd-journal
|
||||
|
||||
# Create the service underneath the background service slice so that we can
|
||||
# control resource usage.
|
||||
Slice=system-bcachefsck.slice
|
||||
|
||||
# No realtime scheduling
|
||||
RestrictRealtime=true
|
||||
|
||||
# Make the entire filesystem readonly and /home inaccessible.
|
||||
ProtectSystem=full
|
||||
ProtectHome=yes
|
||||
PrivateTmp=true
|
||||
RestrictSUIDSGID=true
|
||||
|
||||
# Emailing reports requires network access, but not the ability to change the
|
||||
# hostname.
|
||||
ProtectHostname=true
|
||||
|
||||
# Don't let the program mess with the kernel configuration at all
|
||||
ProtectKernelLogs=true
|
||||
ProtectKernelModules=true
|
||||
ProtectKernelTunables=true
|
||||
ProtectControlGroups=true
|
||||
ProtectProc=invisible
|
||||
RestrictNamespaces=true
|
||||
|
||||
# Can't hide /proc because journalctl needs it to find various pieces of log
|
||||
# information
|
||||
#ProcSubset=pid
|
||||
|
||||
# Only allow the default personality Linux
|
||||
LockPersonality=true
|
||||
|
||||
# No writable memory pages
|
||||
MemoryDenyWriteExecute=true
|
||||
|
||||
# Don't let our mounts leak out to the host
|
||||
PrivateMounts=true
|
||||
|
||||
# Restrict system calls to the native arch and only enough to get things going
|
||||
SystemCallArchitectures=native
|
||||
SystemCallFilter=@system-service
|
||||
SystemCallFilter=~@privileged
|
||||
SystemCallFilter=~@resources
|
||||
SystemCallFilter=~@mount
|
||||
|
||||
# xfs_scrub needs these privileges to run, and no others
|
||||
CapabilityBoundingSet=
|
||||
NoNewPrivileges=true
|
||||
|
||||
# Failure reporting shouldn't create world-readable files
|
||||
UMask=0077
|
||||
|
||||
# Clean up any IPC objects when this unit stops
|
||||
RemoveIPC=true
|
||||
|
||||
# No access to hardware device files
|
||||
PrivateDevices=true
|
||||
ProtectClock=true
|
||||
@ -1,30 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
|
||||
# Author: Darrick J. Wong <djwong@kernel.org>
|
||||
|
||||
[Unit]
|
||||
Description=bcachefsck background service slice
|
||||
Before=slices.target
|
||||
|
||||
[Slice]
|
||||
|
||||
# If the CPU usage cgroup controller is available, don't use more than 60% of a
|
||||
# single core for all background processes.
|
||||
CPUQuota=60%
|
||||
CPUAccounting=true
|
||||
|
||||
[Install]
|
||||
# As of systemd 249, the systemd cgroupv2 configuration code will drop resource
|
||||
# controllers from the root and system.slice cgroups at startup if it doesn't
|
||||
# find any direct dependencies that require a given controller. Newly
|
||||
# activated units with resource control directives are created under the system
|
||||
# slice but do not cause a reconfiguration of the slice's resource controllers.
|
||||
# Hence we cannot put CPUQuota= into the bcachefsck service units directly.
|
||||
#
|
||||
# For the CPUQuota directive to have any effect, we must therefore create an
|
||||
# explicit definition file for the slice that systemd creates to contain the
|
||||
# bcachefsck instance units (e.g. bcachefsck@.service) and we must configure this
|
||||
# slice as a dependency of the system slice to establish the direct dependency
|
||||
# relation.
|
||||
WantedBy=system.slice
|
||||
Loading…
x
Reference in New Issue
Block a user