Update bcachefs sources to c9d875f9be1f bcachefs: Casefold is now a regular opts.h option
Some checks are pending
build / bcachefs-tools-deb (ubuntu-22.04) (push) Waiting to run
build / bcachefs-tools-deb (ubuntu-24.04) (push) Waiting to run
build / bcachefs-tools-rpm (push) Waiting to run
build / bcachefs-tools-msrv (push) Waiting to run
Nix Flake actions / nix-matrix (push) Waiting to run
Nix Flake actions / ${{ matrix.name }} (${{ matrix.system }}) (push) Blocked by required conditions

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-04-20 17:01:39 -04:00
parent 98f2c06d41
commit 6a83d70ef5
63 changed files with 1257 additions and 763 deletions

View File

@ -1 +1 @@
7307b739bbe5f1d9415f5c1da070723b3fb5abbd
c9d875f9be1f853e747c9e00421c678b0adf73d2

View File

@ -255,6 +255,7 @@ update-bcachefs-sources:
git rm -rf --ignore-unmatch libbcachefs
test -d libbcachefs || mkdir libbcachefs
cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/
rm libbcachefs/fast_list.c libbcachefs/async_objs.c
git add libbcachefs/*.[ch]
git rm -f libbcachefs/mean_and_variance_test.c
cp $(LINUX_DIR)/include/linux/closure.h include/linux/

View File

@ -79,8 +79,8 @@ ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
# Usage: MY_RUSTFLAGS += $(call __rustc-option,$(RUSTC),$(MY_RUSTFLAGS),-Cinstrument-coverage,-Zinstrument-coverage)
# TODO: remove RUSTC_BOOTSTRAP=1 when we raise the minimum GNU Make version to 4.4
__rustc-option = $(call try-run,\
echo '#![allow(missing_docs)]#![feature(no_core)]#![no_core]' | RUSTC_BOOTSTRAP=1\
$(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null,$(2)) $(3)\
echo '$(pound)![allow(missing_docs)]$(pound)![feature(no_core)]$(pound)![no_core]' | RUSTC_BOOTSTRAP=1\
$(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null --target=%,$(2)) $(3)\
--crate-type=rlib --out-dir=$(TMPOUT) --emit=obj=- - >/dev/null,$(3),$(4))
# rustc-option

View File

@ -521,11 +521,11 @@ static int cmd_device_resize(int argc, char *argv[])
struct bch_dev *resize = NULL;
for_each_online_member(c, ca) {
for_each_online_member(c, ca, 0) {
if (resize)
die("confused: more than one online device?");
resize = ca;
percpu_ref_get(&resize->io_ref[READ]);
enumerated_ref_get(&resize->io_ref[READ], 0);
}
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
@ -538,7 +538,7 @@ static int cmd_device_resize(int argc, char *argv[])
if (ret)
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
percpu_ref_put(&resize->io_ref[READ]);
enumerated_ref_put(&resize->io_ref[READ], 0);
bch2_fs_stop(c);
}
return 0;
@ -618,11 +618,11 @@ static int cmd_device_resize_journal(int argc, char *argv[])
struct bch_dev *resize = NULL;
for_each_online_member(c, ca) {
for_each_online_member(c, ca, 0) {
if (resize)
die("confused: more than one online device?");
resize = ca;
percpu_ref_get(&resize->io_ref[READ]);
enumerated_ref_get(&resize->io_ref[READ], 0);
}
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
@ -632,7 +632,7 @@ static int cmd_device_resize_journal(int argc, char *argv[])
if (ret)
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
percpu_ref_put(&resize->io_ref[READ]);
enumerated_ref_put(&resize->io_ref[READ], 0);
bch2_fs_stop(c);
}
return 0;

View File

@ -153,12 +153,12 @@ int cmd_dump(int argc, char *argv[])
down_read(&c->state_lock);
for_each_online_member(c, ca)
for_each_online_member(c, ca, 0)
nr_devices++;
BUG_ON(!nr_devices);
for_each_online_member(c, ca) {
for_each_online_member(c, ca, 0) {
int flags = O_WRONLY|O_CREAT|O_TRUNC;
if (!force)

View File

@ -36,6 +36,7 @@ static inline void reinit_completion(struct completion *x)
void complete(struct completion *);
void wait_for_completion(struct completion *);
unsigned long wait_for_completion_timeout(struct completion *, unsigned long);
#define wait_for_completion_interruptible(x) (wait_for_completion(x), 0)

View File

@ -17,6 +17,7 @@
#include "debug.h"
#include "disk_accounting.h"
#include "ec.h"
#include "enumerated_ref.h"
#include "error.h"
#include "lru.h"
#include "recovery.h"
@ -1381,7 +1382,7 @@ static void check_discard_freespace_key_work(struct work_struct *work)
container_of(work, struct check_discard_freespace_key_async, work);
bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos));
bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key);
enumerated_ref_put(&w->c->writes, BCH_WRITE_REF_check_discard_freespace_key);
kfree(w);
}
@ -1458,7 +1459,7 @@ delete:
if (!w)
goto out;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) {
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_check_discard_freespace_key)) {
kfree(w);
goto out;
}
@ -1952,26 +1953,26 @@ static void bch2_do_discards_work(struct work_struct *work)
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
percpu_ref_put(&ca->io_ref[WRITE]);
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard);
}
void bch2_dev_do_discards(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard))
return;
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_dev_do_discards))
goto put_write_ref;
if (queue_work(c->write_ref_wq, &ca->discard_work))
return;
percpu_ref_put(&ca->io_ref[WRITE]);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_dev_do_discards);
put_write_ref:
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard);
}
void bch2_do_discards(struct bch_fs *c)
@ -2047,8 +2048,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
bch2_trans_put(trans);
percpu_ref_put(&ca->io_ref[WRITE]);
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_bucket_fast);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast);
}
static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
@ -2058,18 +2059,18 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
if (discard_in_flight_add(ca, bucket, false))
return;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_discard_fast))
return;
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_discard_one_bucket_fast))
goto put_ref;
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
return;
percpu_ref_put(&ca->io_ref[WRITE]);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_discard_one_bucket_fast);
put_ref:
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_discard_fast);
}
static int invalidate_one_bp(struct btree_trans *trans,
@ -2261,27 +2262,27 @@ restart_err:
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
percpu_ref_put(&ca->io_ref[WRITE]);
bch2_bkey_buf_exit(&last_flushed, c);
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate);
}
void bch2_dev_do_invalidates(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_invalidate))
return;
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE, BCH_DEV_WRITE_REF_do_invalidates))
goto put_ref;
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
return;
percpu_ref_put(&ca->io_ref[WRITE]);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates);
put_ref:
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate);
}
void bch2_do_invalidates(struct bch_fs *c)
@ -2503,15 +2504,15 @@ void bch2_recalc_capacity(struct bch_fs *c)
lockdep_assert_held(&c->state_lock);
for_each_online_member(c, ca) {
struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi;
rcu_read_lock();
for_each_member_device_rcu(c, ca, NULL) {
struct block_device *bdev = READ_ONCE(ca->disk_sb.bdev);
if (bdev)
ra_pages += bdev->bd_disk->bdi->ra_pages;
ra_pages += bdi->ra_pages;
}
if (ca->mi.state != BCH_MEMBER_STATE_rw)
continue;
bch2_set_ra_pages(c, ra_pages);
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
u64 dev_reserve = 0;
/*
@ -2548,6 +2549,9 @@ void bch2_recalc_capacity(struct bch_fs *c)
bucket_size_max = max_t(unsigned, bucket_size_max,
ca->mi.bucket_size);
}
rcu_read_unlock();
bch2_set_ra_pages(c, ra_pages);
gc_reserve = c->opts.gc_reserve_bytes
? c->opts.gc_reserve_bytes >> 9
@ -2570,8 +2574,10 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *c)
{
u64 ret = U64_MAX;
for_each_rw_member(c, ca)
rcu_read_lock();
for_each_rw_member_rcu(c, ca)
ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
rcu_read_unlock();
return ret;
}
@ -2595,8 +2601,12 @@ static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
void bch2_dev_allocator_set_rw(struct bch_fs *c, struct bch_dev *ca, bool rw)
{
/* BCH_DATA_free == all rw devs */
for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
if (rw && (ca->mi.data_allowed & BIT(i)))
if (rw &&
(i == BCH_DATA_free ||
(ca->mi.data_allowed & BIT(i))))
set_bit(ca->dev_idx, c->rw_devs[i].d);
else
clear_bit(ca->dev_idx, c->rw_devs[i].d);

View File

@ -1623,7 +1623,12 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
for_each_online_member(c, ca) {
bch2_printbuf_make_room(&buf, 4096);
rcu_read_lock();
buf.atomic++;
for_each_online_member_rcu(c, ca) {
prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
printbuf_indent_add(&buf, 2);
bch2_dev_alloc_debug_to_text(&buf, ca);
@ -1631,6 +1636,9 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
prt_newline(&buf);
}
--buf.atomic;
rcu_read_unlock();
prt_printf(&buf, "Copygc debug:\n");
printbuf_indent_add(&buf, 2);
bch2_copygc_wait_to_text(&buf, c);

44
libbcachefs/async_objs.h Normal file
View File

@ -0,0 +1,44 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ASYNC_OBJS_H
#define _BCACHEFS_ASYNC_OBJS_H
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
static inline void __async_object_list_del(struct fast_list *head, unsigned idx)
{
fast_list_remove(head, idx);
}
static inline int __async_object_list_add(struct fast_list *head, void *obj, unsigned *idx)
{
int ret = fast_list_add(head, obj);
*idx = ret > 0 ? ret : 0;
return ret < 0 ? ret : 0;
}
#define async_object_list_del(_c, _list, idx) \
__async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, idx)
#define async_object_list_add(_c, _list, obj, idx) \
__async_object_list_add(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, obj, idx)
void bch2_fs_async_obj_debugfs_init(struct bch_fs *);
void bch2_fs_async_obj_exit(struct bch_fs *);
int bch2_fs_async_obj_init(struct bch_fs *);
#else /* CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS */
#define async_object_list_del(_c, _n, idx) do {} while (0)
static inline int __async_object_list_add(void)
{
return 0;
}
#define async_object_list_add(_c, _n, obj, idx) __async_object_list_add()
static inline void bch2_fs_async_obj_debugfs_init(struct bch_fs *c) {}
static inline void bch2_fs_async_obj_exit(struct bch_fs *c) {}
static inline int bch2_fs_async_obj_init(struct bch_fs *c) { return 0; }
#endif /* CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS */
#endif /* _BCACHEFS_ASYNC_OBJS_H */

View File

@ -0,0 +1,24 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ASYNC_OBJS_TYPES_H
#define _BCACHEFS_ASYNC_OBJS_TYPES_H
#define BCH_ASYNC_OBJ_LISTS() \
x(promote) \
x(rbio) \
x(btree_read_bio) \
x(btree_write_bio)
enum bch_async_obj_lists {
#define x(n) BCH_ASYNC_OBJ_LIST_##n,
BCH_ASYNC_OBJ_LISTS()
#undef x
BCH_ASYNC_OBJ_NR
};
struct async_obj_list {
struct fast_list list;
void (*obj_to_text)(struct printbuf *, void *);
unsigned idx;
};
#endif /* _BCACHEFS_ASYNC_OBJS_TYPES_H */

View File

@ -437,7 +437,8 @@ found:
bytes = p.crc.compressed_size << 9;
struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ);
struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ,
BCH_DEV_READ_REF_check_extent_checksums);
if (!ca)
return false;
@ -474,7 +475,8 @@ err:
if (bio)
bio_put(bio);
kvfree(data_buf);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_check_extent_checksums);
printbuf_exit(&buf);
return ret;
}

View File

@ -209,6 +209,7 @@
#include "btree_journal_iter_types.h"
#include "disk_accounting_types.h"
#include "errcode.h"
#include "fast_list.h"
#include "fifo.h"
#include "nocow_locking_types.h"
#include "opts.h"
@ -219,7 +220,7 @@
#include "util.h"
#ifdef CONFIG_BCACHEFS_DEBUG
#define BCH_WRITE_REF_DEBUG
#define ENUMERATED_REF_DEBUG
#endif
#ifndef dynamic_fault
@ -474,6 +475,7 @@ enum bch_time_stats {
};
#include "alloc_types.h"
#include "async_objs_types.h"
#include "btree_gc_types.h"
#include "btree_types.h"
#include "btree_node_scan_types.h"
@ -483,6 +485,7 @@ enum bch_time_stats {
#include "clock_types.h"
#include "disk_groups_types.h"
#include "ec_types.h"
#include "enumerated_ref_types.h"
#include "journal_types.h"
#include "keylist_types.h"
#include "quota_types.h"
@ -515,6 +518,51 @@ struct discard_in_flight {
u64 bucket:63;
};
#define BCH_DEV_READ_REFS() \
x(bch2_online_devs) \
x(trans_mark_dev_sbs) \
x(read_fua_test) \
x(sb_field_resize) \
x(write_super) \
x(journal_read) \
x(fs_journal_alloc) \
x(fs_resize_on_mount) \
x(btree_node_read) \
x(btree_node_read_all_replicas) \
x(btree_node_scrub) \
x(btree_node_write) \
x(btree_node_scan) \
x(btree_verify_replicas) \
x(btree_node_ondisk_to_text) \
x(io_read) \
x(check_extent_checksums) \
x(ec_block)
enum bch_dev_read_ref {
#define x(n) BCH_DEV_READ_REF_##n,
BCH_DEV_READ_REFS()
#undef x
BCH_DEV_READ_REF_NR,
};
#define BCH_DEV_WRITE_REFS() \
x(journal_write) \
x(journal_do_discards) \
x(dev_do_discards) \
x(discard_one_bucket_fast) \
x(do_invalidates) \
x(nocow_flush) \
x(io_write) \
x(ec_block) \
x(ec_bucket_zero)
enum bch_dev_write_ref {
#define x(n) BCH_DEV_WRITE_REF_##n,
BCH_DEV_WRITE_REFS()
#undef x
BCH_DEV_WRITE_REF_NR,
};
struct bch_dev {
struct kobject kobj;
#ifdef CONFIG_BCACHEFS_DEBUG
@ -525,8 +573,7 @@ struct bch_dev {
struct percpu_ref ref;
#endif
struct completion ref_completion;
struct percpu_ref io_ref[2];
struct completion io_ref_completion[2];
struct enumerated_ref io_ref[2];
struct bch_fs *fs;
@ -733,11 +780,7 @@ struct bch_fs {
struct rw_semaphore state_lock;
/* Counts outstanding writes, for clean transition to read-only */
#ifdef BCH_WRITE_REF_DEBUG
atomic_long_t writes[BCH_WRITE_REF_NR];
#else
struct percpu_ref writes;
#endif
struct enumerated_ref writes;
/*
* Certain operations are only allowed in single threaded mode, during
* recovery, and we want to assert that this is the case:
@ -891,6 +934,7 @@ struct bch_fs {
struct workqueue_struct *write_ref_wq;
/* ALLOCATION */
struct bch_devs_mask online_devs;
struct bch_devs_mask rw_devs[BCH_DATA_NR];
unsigned long rw_devs_change_count;
@ -985,6 +1029,10 @@ struct bch_fs {
nocow_locks;
struct rhashtable promote_table;
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
struct async_obj_list async_objs[BCH_ASYNC_OBJ_NR];
#endif
mempool_t compression_bounce[2];
mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR];
size_t zstd_workspace_size;
@ -1073,6 +1121,7 @@ struct bch_fs {
/* DEBUG JUNK */
struct dentry *fs_debug_dir;
struct dentry *btree_debug_dir;
struct dentry *async_obj_dir;
struct btree_debug btree_debug[BTREE_ID_NR];
struct btree *verify_data;
struct btree_node *verify_ondisk;
@ -1114,54 +1163,6 @@ struct bch_fs {
extern struct wait_queue_head bch2_read_only_wait;
static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref)
{
#ifdef BCH_WRITE_REF_DEBUG
atomic_long_inc(&c->writes[ref]);
#else
percpu_ref_get(&c->writes);
#endif
}
static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
{
#ifdef BCH_WRITE_REF_DEBUG
return !test_bit(BCH_FS_going_ro, &c->flags) &&
atomic_long_inc_not_zero(&c->writes[ref]);
#else
return percpu_ref_tryget(&c->writes);
#endif
}
static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref)
{
#ifdef BCH_WRITE_REF_DEBUG
return !test_bit(BCH_FS_going_ro, &c->flags) &&
atomic_long_inc_not_zero(&c->writes[ref]);
#else
return percpu_ref_tryget_live(&c->writes);
#endif
}
static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref)
{
#ifdef BCH_WRITE_REF_DEBUG
long v = atomic_long_dec_return(&c->writes[ref]);
BUG_ON(v < 0);
if (v)
return;
for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
if (atomic_long_read(&c->writes[i]))
return;
set_bit(BCH_FS_write_disable_complete, &c->flags);
wake_up(&bch2_read_only_wait);
#else
percpu_ref_put(&c->writes);
#endif
}
static inline bool bch2_ro_ref_tryget(struct bch_fs *c)
{
if (test_bit(BCH_FS_stopping, &c->flags))

View File

@ -868,6 +868,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
LE64_BITMASK(BCH_SB_DEGRADED_ACTION, struct bch_sb, flags[6], 20, 22);
LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{

View File

@ -22,6 +22,7 @@
#include "debug.h"
#include "disk_accounting.h"
#include "ec.h"
#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "journal.h"
@ -1233,14 +1234,14 @@ static void bch2_gc_gens_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, gc_gens_work);
bch2_gc_gens(c);
bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
}
void bch2_gc_gens_async(struct bch_fs *c)
{
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_gc_gens) &&
if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_gc_gens) &&
!queue_work(c->write_ref_wq, &c->gc_gens_work))
bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_gc_gens);
}
void bch2_fs_btree_gc_init_early(struct bch_fs *c)

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "async_objs.h"
#include "bkey_buf.h"
#include "bkey_methods.h"
#include "bkey_sort.h"
@ -13,6 +14,7 @@
#include "buckets.h"
#include "checksum.h"
#include "debug.h"
#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "io_write.h"
@ -1324,7 +1326,7 @@ static void btree_node_read_work(struct work_struct *work)
while (1) {
retry = true;
bch_info(c, "retrying read");
ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ);
ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = ca != NULL;
rb->start_time = local_clock();
bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
@ -1349,7 +1351,7 @@ start:
"btree read error %s for %s",
bch2_blk_status_to_str(bio->bi_status), buf.buf);
if (rb->have_ioref)
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read);
rb->have_ioref = false;
bch2_mark_io_failure(&failed, &rb->pick, false);
@ -1374,6 +1376,7 @@ start:
}
}
async_object_list_del(c, btree_read_bio, rb->list_idx);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
rb->start_time);
bio_put(&rb->bio);
@ -1413,6 +1416,11 @@ static void btree_node_read_endio(struct bio *bio)
queue_work(c->btree_read_complete_wq, &rb->work);
}
void bch2_btree_read_bio_to_text(struct printbuf *out, struct btree_read_bio *rbio)
{
bch2_bio_to_text(out, &rbio->bio);
}
struct btree_node_read_all {
struct closure cl;
struct bch_fs *c;
@ -1605,7 +1613,8 @@ static void btree_node_read_all_replicas_endio(struct bio *bio)
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
bch2_latency_acct(ca, rb->start_time, READ);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_btree_node_read_all_replicas);
}
ra->err[rb->idx] = bio->bi_status;
@ -1645,7 +1654,8 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, pick, entry) {
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_btree_node_read_all_replicas);
struct btree_read_bio *rb =
container_of(ra->bio[i], struct btree_read_bio, bio);
rb->c = c;
@ -1722,7 +1732,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
return;
}
ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read);
bio = bio_alloc_bioset(NULL,
buf_pages(b->data, btree_buf_bytes(b)),
@ -1741,6 +1751,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
bio->bi_end_io = btree_node_read_endio;
bch2_bio_map(bio, b->data, btree_buf_bytes(b));
async_object_list_add(c, btree_read_bio, rb, &rb->list_idx);
if (rb->have_ioref) {
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
bio_sectors(bio));
@ -1925,9 +1937,9 @@ err:
printbuf_exit(&err);
bch2_bkey_buf_exit(&scrub->key, c);;
btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
percpu_ref_put(&scrub->ca->io_ref[READ]);
enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub);
kfree(scrub);
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub);
}
static void btree_node_scrub_endio(struct bio *bio)
@ -1946,7 +1958,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
struct bch_fs *c = trans->c;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_node_scrub))
return -BCH_ERR_erofs_no_writes;
struct extent_ptr_decoded pick;
@ -1954,7 +1966,8 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
if (ret <= 0)
goto err;
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_btree_node_scrub);
if (!ca) {
ret = -BCH_ERR_device_offline;
goto err;
@ -1994,9 +2007,9 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
return 0;
err_free:
btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub);
err:
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_node_scrub);
return ret;
}
@ -2111,6 +2124,7 @@ static void btree_node_write_work(struct work_struct *work)
goto err;
}
out:
async_object_list_del(c, btree_write_bio, wbio->list_idx);
bio_put(&wbio->wbio.bio);
btree_node_write_done(c, b, start_time);
return;
@ -2162,7 +2176,8 @@ static void btree_node_write_endio(struct bio *bio)
* btree writes yet (due to device removal/ro):
*/
if (wbio->have_ioref)
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_btree_node_write);
if (parent) {
bio_put(bio);
@ -2461,6 +2476,8 @@ do_write:
atomic64_inc(&c->btree_write_stats[type].nr);
atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
async_object_list_add(c, btree_write_bio, wbio, &wbio->list_idx);
INIT_WORK(&wbio->work, btree_write_submit);
queue_work(c->btree_write_submit_wq, &wbio->work);
return;

View File

@ -41,6 +41,9 @@ struct btree_read_bio {
u64 start_time;
unsigned have_ioref:1;
unsigned idx:7;
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
unsigned list_idx;
#endif
struct extent_ptr_decoded pick;
struct work_struct work;
struct bio bio;
@ -53,6 +56,9 @@ struct btree_write_bio {
unsigned data_bytes;
unsigned sector_offset;
u64 start_time;
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
unsigned list_idx;
#endif
struct bch_write_bio wbio;
};
@ -133,6 +139,8 @@ void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
const struct bkey_i *, unsigned);
void bch2_btree_read_bio_to_text(struct printbuf *, struct btree_read_bio *);
int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, unsigned);

View File

@ -271,7 +271,7 @@ static int read_btree_nodes_worker(void *p)
err:
bio_put(bio);
free_page((unsigned long) buf);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
closure_put(w->cl);
kfree(w);
return 0;
@ -285,13 +285,13 @@ static int read_btree_nodes(struct find_btree_nodes *f)
closure_init_stack(&cl);
for_each_online_member(c, ca) {
for_each_online_member(c, ca, BCH_DEV_READ_REF_btree_node_scan) {
if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
continue;
struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
if (!w) {
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
ret = -ENOMEM;
goto err;
}
@ -303,14 +303,14 @@ static int read_btree_nodes(struct find_btree_nodes *f)
struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
ret = PTR_ERR_OR_ZERO(t);
if (ret) {
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
kfree(w);
bch_err_msg(c, ret, "starting kthread");
break;
}
closure_get(&cl);
percpu_ref_get(&ca->io_ref[READ]);
enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
wake_up_process(t);
}
err:

View File

@ -11,6 +11,7 @@
#include "btree_write_buffer.h"
#include "buckets.h"
#include "disk_accounting.h"
#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "journal.h"
@ -994,7 +995,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
goto out_reset;
if (!(flags & BCH_TRANS_COMMIT_no_check_rw) &&
unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) {
unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) {
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags)))
ret = do_bch2_trans_commit_to_journal_replay(trans);
else
@ -1060,7 +1061,7 @@ retry:
trace_and_count(c, transaction_commit, trans, _RET_IP_);
out:
if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw)))
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_trans);
out_reset:
if (!ret)
bch2_trans_downgrade(trans);

View File

@ -14,6 +14,7 @@
#include "btree_locking.h"
#include "buckets.h"
#include "clock.h"
#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "io_write.h"
@ -2326,7 +2327,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
closure_wake_up(&c->btree_node_rewrites_wait);
bch2_bkey_buf_exit(&a->key, c);
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_node_rewrite);
kfree(a);
}
@ -2348,7 +2349,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
spin_lock(&c->btree_node_rewrites_lock);
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_journal_replay &&
bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) {
enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) {
list_add(&a->list, &c->btree_node_rewrites);
now = true;
} else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) {
@ -2387,7 +2388,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
if (!a)
break;
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
enumerated_ref_get(&c->writes, BCH_WRITE_REF_node_rewrite);
queue_work(c->btree_node_rewrite_worker, &a->work);
}
}

View File

@ -7,6 +7,7 @@
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "disk_accounting.h"
#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "journal.h"
@ -629,11 +630,11 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer))
return -BCH_ERR_erofs_no_writes;
int ret = bch2_btree_write_buffer_flush_nocheck_rw(trans);
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
return ret;
}
@ -692,7 +693,7 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
} while (!ret && bch2_btree_write_buffer_should_flush(c));
mutex_unlock(&wb->flushing.lock);
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
}
static void wb_accounting_sort(struct btree_write_buffer *wb)
@ -821,9 +822,9 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_
bch2_journal_pin_drop(&c->journal, &dst->wb->pin);
if (bch2_btree_write_buffer_should_flush(c) &&
__bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_write_buffer) &&
__enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_btree_write_buffer) &&
!queue_work(system_unbound_wq, &c->btree_write_buffer.flush_work))
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer);
if (dst->wb == &wb->flushing)
mutex_unlock(&wb->flushing.lock);

View File

@ -1139,10 +1139,10 @@ int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca,
int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c,
enum btree_iter_update_trigger_flags flags)
{
for_each_online_member(c, ca) {
for_each_online_member(c, ca, BCH_DEV_READ_REF_trans_mark_dev_sbs) {
int ret = bch2_trans_mark_dev_sb(c, ca, flags);
if (ret) {
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_trans_mark_dev_sbs);
return ret;
}
}

View File

@ -613,11 +613,13 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
if (!dev)
return -EINVAL;
for_each_online_member(c, ca)
rcu_read_lock();
for_each_online_member_rcu(c, ca)
if (ca->dev == dev) {
percpu_ref_put(&ca->io_ref[READ]);
rcu_read_unlock();
return ca->dev_idx;
}
rcu_read_unlock();
return -BCH_ERR_ENOENT_dev_idx_not_found;
}

View File

@ -587,6 +587,10 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
prt_str_indented(out, "extra replicas:\t");
prt_u64(out, data_opts->extra_replicas);
prt_newline(out);
prt_str_indented(out, "scrub:\t");
prt_u64(out, data_opts->scrub);
}
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
@ -607,9 +611,17 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update
prt_newline(out);
printbuf_indent_add(out, 2);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
prt_printf(out, "read_done:\t%u\n", m->read_done);
bch2_write_op_to_text(out, &m->op);
printbuf_indent_sub(out, 2);
if (!m->read_done) {
prt_printf(out, "read:\n");
printbuf_indent_add(out, 2);
bch2_read_bio_to_text(out, &m->rbio);
} else {
prt_printf(out, "write:\n");
printbuf_indent_add(out, 2);
bch2_write_op_to_text(out, &m->op);
}
printbuf_indent_sub(out, 4);
}
int bch2_extent_drop_ptrs(struct btree_trans *trans,

View File

@ -50,6 +50,21 @@ struct data_update {
struct bio_vec *bvecs;
};
struct promote_op {
struct rcu_head rcu;
u64 start_time;
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
unsigned list_idx;
#endif
struct rhash_head hash;
struct bpos pos;
struct work_struct work;
struct data_update write;
struct bio_vec bi_inline_vecs[]; /* must be last */
};
void bch2_data_update_to_text(struct printbuf *, struct data_update *);
void bch2_data_update_inflight_to_text(struct printbuf *, struct data_update *);

View File

@ -8,6 +8,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "async_objs.h"
#include "bkey_methods.h"
#include "btree_cache.h"
#include "btree_io.h"
@ -16,6 +17,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
#include "data_update.h"
#include "debug.h"
#include "error.h"
#include "extents.h"
@ -42,7 +44,8 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
struct bio *bio;
bool failed = false, saw_error = false;
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_btree_verify_replicas);
if (!ca)
return false;
@ -57,7 +60,8 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
submit_bio_wait(bio);
bio_put(bio);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_btree_verify_replicas);
memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
@ -196,7 +200,8 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
return;
}
ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_btree_node_ondisk_to_text);
if (!ca) {
prt_printf(out, "error getting device to read from: not online\n");
return;
@ -297,28 +302,13 @@ out:
if (bio)
bio_put(bio);
kvfree(n_ondisk);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_btree_node_ondisk_to_text);
}
#ifdef CONFIG_DEBUG_FS
/* XXX: bch_fs refcounting */
struct dump_iter {
struct bch_fs *c;
enum btree_id id;
struct bpos from;
struct bpos prev_node;
u64 iter;
struct printbuf buf;
char __user *ubuf; /* destination user buffer */
size_t size; /* size of requested read */
ssize_t ret; /* bytes read so far */
};
static ssize_t flush_buf(struct dump_iter *i)
ssize_t bch2_debugfs_flush_buf(struct dump_iter *i)
{
if (i->buf.pos) {
size_t bytes = min_t(size_t, i->buf.pos, i->size);
@ -356,7 +346,7 @@ static int bch2_dump_open(struct inode *inode, struct file *file)
return 0;
}
static int bch2_dump_release(struct inode *inode, struct file *file)
int bch2_dump_release(struct inode *inode, struct file *file)
{
struct dump_iter *i = file->private_data;
@ -374,7 +364,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
return flush_buf(i) ?:
return bch2_debugfs_flush_buf(i) ?:
bch2_trans_run(i->c,
for_each_btree_key(trans, iter, i->id, i->from,
BTREE_ITER_prefetch|
@ -383,7 +373,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
prt_newline(&i->buf);
bch2_trans_unlock(trans);
i->from = bpos_successor(iter.pos);
flush_buf(i);
bch2_debugfs_flush_buf(i);
}))) ?:
i->ret;
}
@ -404,7 +394,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
ssize_t ret = flush_buf(i);
ssize_t ret = bch2_debugfs_flush_buf(i);
if (ret)
return ret;
@ -418,7 +408,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
? bpos_successor(b->key.k.p)
: b->key.k.p;
drop_locks_do(trans, flush_buf(i));
drop_locks_do(trans, bch2_debugfs_flush_buf(i));
}))) ?: i->ret;
}
@ -438,7 +428,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
return flush_buf(i) ?:
return bch2_debugfs_flush_buf(i) ?:
bch2_trans_run(i->c,
for_each_btree_key(trans, iter, i->id, i->from,
BTREE_ITER_prefetch|
@ -456,7 +446,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
bch2_bfloat_to_text(&i->buf, l->b, _k);
bch2_trans_unlock(trans);
i->from = bpos_successor(iter.pos);
flush_buf(i);
bch2_debugfs_flush_buf(i);
}))) ?:
i->ret;
}
@ -517,7 +507,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
struct rhash_head *pos;
struct btree *b;
ret = flush_buf(i);
ret = bch2_debugfs_flush_buf(i);
if (ret)
return ret;
@ -540,7 +530,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
ret = -ENOMEM;
if (!ret)
ret = flush_buf(i);
ret = bch2_debugfs_flush_buf(i);
return ret ?: i->ret;
}
@ -614,7 +604,7 @@ restart:
closure_put(&trans->ref);
ret = flush_buf(i);
ret = bch2_debugfs_flush_buf(i);
if (ret)
goto unlocked;
@ -627,7 +617,7 @@ unlocked:
ret = -ENOMEM;
if (!ret)
ret = flush_buf(i);
ret = bch2_debugfs_flush_buf(i);
return ret ?: i->ret;
}
@ -652,7 +642,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
i->ret = 0;
while (1) {
err = flush_buf(i);
err = bch2_debugfs_flush_buf(i);
if (err)
return err;
@ -695,7 +685,7 @@ static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
i->iter++;
}
err = flush_buf(i);
err = bch2_debugfs_flush_buf(i);
if (err)
return err;
@ -753,7 +743,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
while (1) {
struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
err = flush_buf(i);
err = bch2_debugfs_flush_buf(i);
if (err)
return err;
@ -874,7 +864,7 @@ static ssize_t bch2_simple_print(struct file *file, char __user *buf,
ret = -ENOMEM;
if (!ret)
ret = flush_buf(i);
ret = bch2_debugfs_flush_buf(i);
return ret ?: i->ret;
}
@ -963,6 +953,8 @@ void bch2_fs_debug_init(struct bch_fs *c)
debugfs_create_file("write_points", 0400, c->fs_debug_dir,
c->btree_debug, &write_points_ops);
bch2_fs_async_obj_debugfs_init(c);
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
if (IS_ERR_OR_NULL(c->btree_debug_dir))
return;

View File

@ -19,6 +19,24 @@ static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b)
}
#ifdef CONFIG_DEBUG_FS
struct dump_iter {
struct bch_fs *c;
struct async_obj_list *list;
enum btree_id id;
struct bpos from;
struct bpos prev_node;
u64 iter;
struct printbuf buf;
char __user *ubuf; /* destination user buffer */
size_t size; /* size of requested read */
ssize_t ret; /* bytes read so far */
};
ssize_t bch2_debugfs_flush_buf(struct dump_iter *);
int bch2_dump_release(struct inode *, struct file *);
void bch2_fs_debug_exit(struct bch_fs *);
void bch2_fs_debug_init(struct bch_fs *);
#else

View File

@ -555,14 +555,12 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
? rcu_dereference(c->devs[t.dev])
: NULL;
if (ca && percpu_ref_tryget(&ca->io_ref[READ])) {
if (ca && ca->disk_sb.bdev)
prt_printf(out, "/dev/%s", ca->name);
percpu_ref_put(&ca->io_ref[READ]);
} else if (ca) {
else if (ca)
prt_printf(out, "offline device %u", t.dev);
} else {
else
prt_printf(out, "invalid device %u", t.dev);
}
rcu_read_unlock();
out->atomic--;

View File

@ -16,6 +16,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
#include "enumerated_ref.h"
#include "error.h"
#include "io_read.h"
#include "io_write.h"
@ -706,6 +707,9 @@ static void ec_block_endio(struct bio *bio)
struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private;
int rw = ec_bio->rw;
unsigned ref = rw == READ
? BCH_DEV_READ_REF_ec_block
: BCH_DEV_WRITE_REF_ec_block;
bch2_account_io_completion(ca, bio_data_dir(bio),
ec_bio->submit_time, !bio->bi_status);
@ -727,7 +731,7 @@ static void ec_block_endio(struct bio *bio)
}
bio_put(&ec_bio->bio);
percpu_ref_put(&ca->io_ref[rw]);
enumerated_ref_put(&ca->io_ref[rw], ref);
closure_put(cl);
}
@ -741,8 +745,11 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
? BCH_DATA_user
: BCH_DATA_parity;
int rw = op_is_write(opf);
unsigned ref = rw == READ
? BCH_DEV_READ_REF_ec_block
: BCH_DEV_WRITE_REF_ec_block;
struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw);
struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw, ref);
if (!ca) {
clear_bit(idx, buf->valid);
return;
@ -788,14 +795,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
closure_get(cl);
percpu_ref_get(&ca->io_ref[rw]);
enumerated_ref_get(&ca->io_ref[rw], ref);
submit_bio(&ec_bio->bio);
offset += b;
}
percpu_ref_put(&ca->io_ref[rw]);
enumerated_ref_put(&ca->io_ref[rw], ref);
}
static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
@ -1017,14 +1024,14 @@ static void ec_stripe_delete_work(struct work_struct *work)
BCH_TRANS_COMMIT_no_enospc, ({
ec_stripe_delete(trans, lru_k.k->p.offset);
})));
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
}
void bch2_do_stripe_deletes(struct bch_fs *c)
{
if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_stripe_delete) &&
!queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
}
/* stripe creation: */
@ -1252,7 +1259,8 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
unsigned block,
struct open_bucket *ob)
{
struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE);
struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE,
BCH_DEV_WRITE_REF_ec_bucket_zero);
if (!ca) {
s->err = -BCH_ERR_erofs_no_writes;
return;
@ -1268,7 +1276,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
ob->sectors_free,
GFP_KERNEL, 0);
percpu_ref_put(&ca->io_ref[WRITE]);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_ec_bucket_zero);
if (ret)
s->err = ret;
@ -1418,15 +1426,15 @@ static void ec_stripe_create_work(struct work_struct *work)
while ((s = get_pending_stripe(c)))
ec_stripe_create(s);
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
}
void bch2_ec_do_stripe_creates(struct bch_fs *c)
{
bch2_write_ref_get(c, BCH_WRITE_REF_stripe_create);
enumerated_ref_get(&c->writes, BCH_WRITE_REF_stripe_create);
if (!queue_work(system_long_wq, &c->ec_stripe_create_work))
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
}
static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)

View File

@ -0,0 +1,144 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "enumerated_ref.h"
#include "util.h"
#include <linux/completion.h>
#ifdef ENUMERATED_REF_DEBUG
void enumerated_ref_get(struct enumerated_ref *ref, unsigned idx)
{
BUG_ON(idx >= ref->nr);
atomic_long_inc(&ref->refs[idx]);
}
bool __enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
{
BUG_ON(idx >= ref->nr);
return atomic_long_inc_not_zero(&ref->refs[idx]);
}
bool enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
{
BUG_ON(idx >= ref->nr);
return !ref->dying &&
atomic_long_inc_not_zero(&ref->refs[idx]);
}
void enumerated_ref_put(struct enumerated_ref *ref, unsigned idx)
{
BUG_ON(idx >= ref->nr);
long v = atomic_long_dec_return(&ref->refs[idx]);
BUG_ON(v < 0);
if (v)
return;
for (unsigned i = 0; i < ref->nr; i++)
if (atomic_long_read(&ref->refs[i]))
return;
if (ref->stop_fn)
ref->stop_fn(ref);
complete(&ref->stop_complete);
}
#endif
#ifndef ENUMERATED_REF_DEBUG
static void enumerated_ref_kill_cb(struct percpu_ref *percpu_ref)
{
struct enumerated_ref *ref =
container_of(percpu_ref, struct enumerated_ref, ref);
if (ref->stop_fn)
ref->stop_fn(ref);
complete(&ref->stop_complete);
}
#endif
void enumerated_ref_stop_async(struct enumerated_ref *ref)
{
reinit_completion(&ref->stop_complete);
#ifndef ENUMERATED_REF_DEBUG
percpu_ref_kill(&ref->ref);
#else
ref->dying = true;
for (unsigned i = 0; i < ref->nr; i++)
enumerated_ref_put(ref, i);
#endif
}
void enumerated_ref_stop(struct enumerated_ref *ref,
const char * const names[])
{
enumerated_ref_stop_async(ref);
while (!wait_for_completion_timeout(&ref->stop_complete, HZ * 10)) {
struct printbuf buf = PRINTBUF;
prt_str(&buf, "Waited for 10 seconds to shutdown enumerated ref\n");
prt_str(&buf, "Outstanding refs:\n");
enumerated_ref_to_text(&buf, ref, names);
printk(KERN_ERR "%s", buf.buf);
printbuf_exit(&buf);
}
}
void enumerated_ref_start(struct enumerated_ref *ref)
{
#ifndef ENUMERATED_REF_DEBUG
percpu_ref_reinit(&ref->ref);
#else
ref->dying = false;
for (unsigned i = 0; i < ref->nr; i++) {
BUG_ON(atomic_long_read(&ref->refs[i]));
atomic_long_inc(&ref->refs[i]);
}
#endif
}
void enumerated_ref_exit(struct enumerated_ref *ref)
{
#ifndef ENUMERATED_REF_DEBUG
percpu_ref_exit(&ref->ref);
#else
kfree(ref->refs);
ref->refs = NULL;
ref->nr = 0;
#endif
}
int enumerated_ref_init(struct enumerated_ref *ref, unsigned nr,
void (*stop_fn)(struct enumerated_ref *))
{
init_completion(&ref->stop_complete);
ref->stop_fn = stop_fn;
#ifndef ENUMERATED_REF_DEBUG
return percpu_ref_init(&ref->ref, enumerated_ref_kill_cb,
PERCPU_REF_INIT_DEAD, GFP_KERNEL);
#else
ref->refs = kzalloc(sizeof(ref->refs[0]) * nr, GFP_KERNEL);
if (!ref->refs)
return -ENOMEM;
ref->nr = nr;
return 0;
#endif
}
void enumerated_ref_to_text(struct printbuf *out,
struct enumerated_ref *ref,
const char * const names[])
{
#ifdef ENUMERATED_REF_DEBUG
bch2_printbuf_tabstop_push(out, 32);
for (unsigned i = 0; i < ref->nr; i++)
prt_printf(out, "%s\t%li\n", names[i],
atomic_long_read(&ref->refs[i]));
#else
prt_str(out, "(not in debug mode)\n");
#endif
}

View File

@ -0,0 +1,66 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ENUMERATED_REF_H
#define _BCACHEFS_ENUMERATED_REF_H
#include "enumerated_ref_types.h"
/*
* A refcount where the users are enumerated: in debug mode, we create sepate
* refcounts for each user, to make leaks and refcount errors easy to track
* down:
*/
#ifdef ENUMERATED_REF_DEBUG
void enumerated_ref_get(struct enumerated_ref *, unsigned);
bool __enumerated_ref_tryget(struct enumerated_ref *, unsigned);
bool enumerated_ref_tryget(struct enumerated_ref *, unsigned);
void enumerated_ref_put(struct enumerated_ref *, unsigned);
#else
static inline void enumerated_ref_get(struct enumerated_ref *ref, unsigned idx)
{
percpu_ref_get(&ref->ref);
}
static inline bool __enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
{
return percpu_ref_tryget(&ref->ref);
}
static inline bool enumerated_ref_tryget(struct enumerated_ref *ref, unsigned idx)
{
return percpu_ref_tryget_live(&ref->ref);
}
static inline void enumerated_ref_put(struct enumerated_ref *ref, unsigned idx)
{
percpu_ref_put(&ref->ref);
}
#endif
static inline bool enumerated_ref_is_zero(struct enumerated_ref *ref)
{
#ifndef ENUMERATED_REF_DEBUG
return percpu_ref_is_zero(&ref->ref);
#else
for (unsigned i = 0; i < ref->nr; i++)
if (atomic_long_read(&ref->refs[i]))
return false;
return true;
#endif
}
void enumerated_ref_stop_async(struct enumerated_ref *);
void enumerated_ref_stop(struct enumerated_ref *, const char * const[]);
void enumerated_ref_start(struct enumerated_ref *);
void enumerated_ref_exit(struct enumerated_ref *);
int enumerated_ref_init(struct enumerated_ref *, unsigned,
void (*stop_fn)(struct enumerated_ref *));
struct printbuf;
void enumerated_ref_to_text(struct printbuf *,
struct enumerated_ref *,
const char * const[]);
#endif /* _BCACHEFS_ENUMERATED_REF_H */

View File

@ -0,0 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_ENUMERATED_REF_TYPES_H
#define _BCACHEFS_ENUMERATED_REF_TYPES_H
#include <linux/percpu-refcount.h>
struct enumerated_ref {
#ifdef ENUMERATED_REF_DEBUG
unsigned nr;
bool dying;
atomic_long_t *refs;
#else
struct percpu_ref ref;
#endif
void (*stop_fn)(struct enumerated_ref *);
struct completion stop_complete;
};
#endif /* _BCACHEFS_ENUMERATED_REF_TYPES_H */

View File

@ -53,6 +53,7 @@
x(ENOMEM, ENOMEM_dio_write_bioset_init) \
x(ENOMEM, ENOMEM_nocow_flush_bioset_init) \
x(ENOMEM, ENOMEM_promote_table_init) \
x(ENOMEM, ENOMEM_async_obj_init) \
x(ENOMEM, ENOMEM_compression_bounce_read_init) \
x(ENOMEM, ENOMEM_compression_bounce_write_init) \
x(ENOMEM, ENOMEM_compression_workspace_init) \

41
libbcachefs/fast_list.h Normal file
View File

@ -0,0 +1,41 @@
#ifndef _LINUX_FAST_LIST_H
#define _LINUX_FAST_LIST_H
#include <linux/generic-radix-tree.h>
#include <linux/idr.h>
#include <linux/percpu.h>
struct fast_list_pcpu;
struct fast_list {
GENRADIX(void *) items;
struct ida slots_allocated;;
struct fast_list_pcpu __percpu
*buffer;
};
static inline void *fast_list_iter_peek(struct genradix_iter *iter,
struct fast_list *list)
{
void **p;
while ((p = genradix_iter_peek(iter, &list->items)) && !*p)
genradix_iter_advance(iter, &list->items);
return p ? *p : NULL;
}
#define fast_list_for_each_from(_list, _iter, _i, _start) \
for (_iter = genradix_iter_init(&(_list)->items, _start); \
(_i = fast_list_iter_peek(&(_iter), _list)) != NULL; \
genradix_iter_advance(&(_iter), &(_list)->items))
#define fast_list_for_each(_list, _iter, _i) \
fast_list_for_each_from(_list, _iter, _i, 0)
int fast_list_get_idx(struct fast_list *l);
int fast_list_add(struct fast_list *l, void *item);
void fast_list_remove(struct fast_list *l, unsigned idx);
void fast_list_exit(struct fast_list *l);
int fast_list_init(struct fast_list *l);
#endif /* _LINUX_FAST_LIST_H */

View File

@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "enumerated_ref.h"
#include "fs.h"
#include "fs-io.h"
#include "fs-io-direct.h"
@ -401,7 +402,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_dio_write);
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end(&inode->v);
@ -606,7 +607,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
prefetch(&inode->ei_inode);
prefetch((void *) &inode->ei_inode + 64);
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_dio_write))
return -EROFS;
inode_lock(&inode->v);
@ -675,7 +676,7 @@ err_put_bio:
bio_put(bio);
inode_dio_end(&inode->v);
err_put_write_ref:
bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_dio_write);
goto out;
}

View File

@ -7,6 +7,7 @@
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "extent_update.h"
@ -48,7 +49,8 @@ static void nocow_flush_endio(struct bio *_bio)
struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio);
closure_put(bio->cl);
percpu_ref_put(&bio->ca->io_ref[WRITE]);
enumerated_ref_put(&bio->ca->io_ref[WRITE],
BCH_DEV_WRITE_REF_nocow_flush);
bio_put(&bio->bio);
}
@ -71,7 +73,8 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c,
for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) {
rcu_read_lock();
ca = rcu_dereference(c->devs[dev]);
if (ca && !percpu_ref_tryget(&ca->io_ref[WRITE]))
if (ca && !enumerated_ref_tryget(&ca->io_ref[WRITE],
BCH_DEV_WRITE_REF_nocow_flush))
ca = NULL;
rcu_read_unlock();
@ -205,7 +208,7 @@ static int bch2_flush_inode(struct bch_fs *c,
if (c->opts.journal_flush_disabled)
return 0;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fsync))
return -EROFS;
u64 seq;
@ -213,7 +216,7 @@ static int bch2_flush_inode(struct bch_fs *c,
bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?:
bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?:
bch2_inode_flush_nocow_writes(c, inode);
bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_fsync);
return ret;
}
@ -796,7 +799,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
long ret;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fallocate))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_fallocate))
return -EROFS;
inode_lock(&inode->v);
@ -820,7 +823,7 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
err:
bch2_pagecache_block_put(inode);
inode_unlock(&inode->v);
bch2_write_ref_put(c, BCH_WRITE_REF_fallocate);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_fallocate);
return bch2_err_class(ret);
}

View File

@ -21,206 +21,6 @@
#define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
struct flags_set {
unsigned mask;
unsigned flags;
unsigned projid;
bool set_projinherit;
bool projinherit;
};
static int bch2_inode_flags_set(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
void *p)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
/*
* We're relying on btree locking here for exclusion with other ioctl
* calls - use the flags in the btree (@bi), not inode->i_flags:
*/
struct flags_set *s = p;
unsigned newflags = s->flags;
unsigned oldflags = bi->bi_flags & s->mask;
if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
if (!S_ISREG(bi->bi_mode) &&
!S_ISDIR(bi->bi_mode) &&
(newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
return -EINVAL;
if ((newflags ^ oldflags) & BCH_INODE_casefolded) {
#ifdef CONFIG_UNICODE
int ret = 0;
/* Not supported on individual files. */
if (!S_ISDIR(bi->bi_mode))
return -EOPNOTSUPP;
/*
* Make sure the dir is empty, as otherwise we'd need to
* rehash everything and update the dirent keys.
*/
ret = bch2_empty_dir_trans(trans, inode_inum(inode));
if (ret < 0)
return ret;
ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
if (ret)
return ret;
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
#else
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
return -EOPNOTSUPP;
#endif
}
if (s->set_projinherit) {
bi->bi_fields_set &= ~(1 << Inode_opt_project);
bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
}
bi->bi_flags &= ~s->mask;
bi->bi_flags |= newflags;
bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
return 0;
}
static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg)
{
unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags);
return put_user(flags, arg);
}
static int bch2_ioc_setflags(struct bch_fs *c,
struct file *file,
struct bch_inode_info *inode,
void __user *arg)
{
struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) };
unsigned uflags;
int ret;
if (get_user(uflags, (int __user *) arg))
return -EFAULT;
s.flags = map_flags_rev(bch_flags_to_uflags, uflags);
if (uflags)
return -EOPNOTSUPP;
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(&inode->v);
if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
ret = -EACCES;
goto setflags_out;
}
mutex_lock(&inode->ei_update_lock);
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
setflags_out:
inode_unlock(&inode->v);
mnt_drop_write_file(file);
return ret;
}
static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
struct fsxattr __user *arg)
{
struct fsxattr fa = { 0 };
fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
fa.fsx_xflags |= FS_XFLAG_PROJINHERIT;
fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
if (copy_to_user(arg, &fa, sizeof(fa)))
return -EFAULT;
return 0;
}
static int fssetxattr_inode_update_fn(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
void *p)
{
struct flags_set *s = p;
if (s->projid != bi->bi_project) {
bi->bi_fields_set |= 1U << Inode_opt_project;
bi->bi_project = s->projid;
}
return bch2_inode_flags_set(trans, inode, bi, p);
}
static int bch2_ioc_fssetxattr(struct bch_fs *c,
struct file *file,
struct bch_inode_info *inode,
struct fsxattr __user *arg)
{
struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) };
struct fsxattr fa;
int ret;
if (copy_from_user(&fa, arg, sizeof(fa)))
return -EFAULT;
s.set_projinherit = true;
s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0;
fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags);
if (fa.fsx_xflags)
return -EOPNOTSUPP;
if (fa.fsx_projid >= U32_MAX)
return -EINVAL;
/*
* inode fields accessible via the xattr interface are stored with a +1
* bias, so that 0 means unset:
*/
s.projid = fa.fsx_projid + 1;
ret = mnt_want_write_file(file);
if (ret)
return ret;
inode_lock(&inode->v);
if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) {
ret = -EACCES;
goto err;
}
mutex_lock(&inode->ei_update_lock);
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
bch2_set_projid(c, inode, fa.fsx_projid) ?:
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
err:
inode_unlock(&inode->v);
mnt_drop_write_file(file);
return ret;
}
static int bch2_reinherit_attrs_fn(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
@ -558,23 +358,6 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
long ret;
switch (cmd) {
case FS_IOC_GETFLAGS:
ret = bch2_ioc_getflags(inode, (int __user *) arg);
break;
case FS_IOC_SETFLAGS:
ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg);
break;
case FS_IOC_FSGETXATTR:
ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg);
break;
case FS_IOC_FSSETXATTR:
ret = bch2_ioc_fssetxattr(c, file, inode,
(void __user *) arg);
break;
case BCHFS_IOC_REINHERIT_ATTRS:
ret = bch2_ioc_reinherit_attrs(c, file, inode,
(void __user *) arg);

View File

@ -2,81 +2,6 @@
#ifndef _BCACHEFS_FS_IOCTL_H
#define _BCACHEFS_FS_IOCTL_H
/* Inode flags: */
/* bcachefs inode flags -> vfs inode flags: */
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
[__BCH_INODE_sync] = S_SYNC,
[__BCH_INODE_immutable] = S_IMMUTABLE,
[__BCH_INODE_append] = S_APPEND,
[__BCH_INODE_noatime] = S_NOATIME,
[__BCH_INODE_casefolded] = S_CASEFOLD,
};
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
static const __maybe_unused unsigned bch_flags_to_uflags[] = {
[__BCH_INODE_sync] = FS_SYNC_FL,
[__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
[__BCH_INODE_append] = FS_APPEND_FL,
[__BCH_INODE_nodump] = FS_NODUMP_FL,
[__BCH_INODE_noatime] = FS_NOATIME_FL,
[__BCH_INODE_casefolded] = FS_CASEFOLD_FL,
};
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
static const __maybe_unused unsigned bch_flags_to_xflags[] = {
[__BCH_INODE_sync] = FS_XFLAG_SYNC,
[__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
[__BCH_INODE_append] = FS_XFLAG_APPEND,
[__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
[__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
//[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
};
#define set_flags(_map, _in, _out) \
do { \
unsigned _i; \
\
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
if ((_in) & (1 << _i)) \
(_out) |= _map[_i]; \
else \
(_out) &= ~_map[_i]; \
} while (0)
#define map_flags(_map, _in) \
({ \
unsigned _out = 0; \
\
set_flags(_map, _in, _out); \
_out; \
})
#define map_flags_rev(_map, _in) \
({ \
unsigned _i, _out = 0; \
\
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
if ((_in) & _map[_i]) { \
(_out) |= 1 << _i; \
(_in) &= ~_map[_i]; \
} \
(_out); \
})
#define map_defined(_map) \
({ \
unsigned _in = ~0; \
\
map_flags_rev(_map, _in); \
})
/* Set VFS inode flags from bcachefs inode: */
static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
{
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
}
long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long);
long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long);

View File

@ -33,6 +33,7 @@
#include <linux/backing-dev.h>
#include <linux/exportfs.h>
#include <linux/fiemap.h>
#include <linux/fileattr.h>
#include <linux/fs_context.h>
#include <linux/module.h>
#include <linux/pagemap.h>
@ -51,6 +52,22 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *,
struct bch_subvolume *);
/* Set VFS inode flags from bcachefs inode: */
static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode)
{
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
[__BCH_INODE_sync] = S_SYNC,
[__BCH_INODE_immutable] = S_IMMUTABLE,
[__BCH_INODE_append] = S_APPEND,
[__BCH_INODE_noatime] = S_NOATIME,
};
set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags);
if (inode->ei_inode.bi_casefold)
inode->v.i_flags |= S_CASEFOLD;
}
void bch2_inode_update_after_write(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
@ -1448,6 +1465,165 @@ static int bch2_open(struct inode *vinode, struct file *file)
return generic_file_open(vinode, file);
}
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
static const __maybe_unused unsigned bch_flags_to_uflags[] = {
[__BCH_INODE_sync] = FS_SYNC_FL,
[__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
[__BCH_INODE_append] = FS_APPEND_FL,
[__BCH_INODE_nodump] = FS_NODUMP_FL,
[__BCH_INODE_noatime] = FS_NOATIME_FL,
};
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
static const __maybe_unused unsigned bch_flags_to_xflags[] = {
[__BCH_INODE_sync] = FS_XFLAG_SYNC,
[__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
[__BCH_INODE_append] = FS_XFLAG_APPEND,
[__BCH_INODE_nodump] = FS_XFLAG_NODUMP,
[__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
};
static int bch2_fileattr_get(struct dentry *dentry,
struct fileattr *fa)
{
struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags));
if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project))
fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
if (inode->ei_inode.bi_casefold)
fa->flags |= FS_CASEFOLD_FL;
fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ];
return 0;
}
struct flags_set {
unsigned mask;
unsigned flags;
unsigned projid;
bool set_project;
bool set_casefold;
bool casefold;
};
static int fssetxattr_inode_update_fn(struct btree_trans *trans,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
void *p)
{
struct bch_fs *c = trans->c;
struct flags_set *s = p;
/*
* We're relying on btree locking here for exclusion with other ioctl
* calls - use the flags in the btree (@bi), not inode->i_flags:
*/
if (!S_ISREG(bi->bi_mode) &&
!S_ISDIR(bi->bi_mode) &&
(s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags)
return -EINVAL;
if (s->casefold != bi->bi_casefold) {
#ifdef CONFIG_UNICODE
int ret = 0;
/* Not supported on individual files. */
if (!S_ISDIR(bi->bi_mode))
return -EOPNOTSUPP;
/*
* Make sure the dir is empty, as otherwise we'd need to
* rehash everything and update the dirent keys.
*/
ret = bch2_empty_dir_trans(trans, inode_inum(inode));
if (ret < 0)
return ret;
ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding);
if (ret)
return ret;
bch2_check_set_feature(c, BCH_FEATURE_casefolding);
bi->bi_casefold = s->casefold;
bi->bi_fields_set &= ~BIT(Inode_opt_casefold);
bi->bi_fields_set |= s->casefold << Inode_opt_casefold;
#else
printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
return -EOPNOTSUPP;
#endif
}
if (s->set_project) {
bi->bi_project = s->projid;
bi->bi_fields_set |= BIT(Inode_opt_project);
}
bi->bi_flags &= ~s->mask;
bi->bi_flags |= s->flags;
bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v));
return 0;
}
static int bch2_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry,
struct fileattr *fa)
{
struct bch_inode_info *inode = to_bch_ei(d_inode(dentry));
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct flags_set s = {};
int ret;
if (fa->fsx_valid) {
fa->fsx_xflags &= ~FS_XFLAG_PROJINHERIT;
s.mask = map_defined(bch_flags_to_xflags);
s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags);
if (fa->fsx_xflags)
return -EOPNOTSUPP;
if (fa->fsx_projid >= U32_MAX)
return -EINVAL;
/*
* inode fields accessible via the xattr interface are stored with a +1
* bias, so that 0 means unset:
*/
if ((inode->ei_inode.bi_project ||
fa->fsx_projid) &&
inode->ei_inode.bi_project != fa->fsx_projid + 1) {
s.projid = fa->fsx_projid + 1;
s.set_project = true;
}
}
if (fa->flags_valid) {
s.mask = map_defined(bch_flags_to_uflags);
s.set_casefold = true;
s.casefold = (fa->flags & FS_CASEFOLD_FL) != 0;
fa->flags &= ~FS_CASEFOLD_FL;
s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags);
if (fa->flags)
return -EOPNOTSUPP;
}
mutex_lock(&inode->ei_update_lock);
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
(s.set_project
? bch2_set_projid(c, inode, fa->fsx_projid)
: 0) ?:
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
return ret;
}
static const struct file_operations bch_file_operations = {
.open = bch2_open,
.llseek = bch2_llseek,
@ -1475,6 +1651,8 @@ static const struct inode_operations bch_file_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
.fileattr_get = bch2_fileattr_get,
.fileattr_set = bch2_fileattr_set,
};
static const struct inode_operations bch_dir_inode_operations = {
@ -1495,6 +1673,8 @@ static const struct inode_operations bch_dir_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
.fileattr_get = bch2_fileattr_get,
.fileattr_set = bch2_fileattr_set,
};
static const struct file_operations bch_dir_file_operations = {
@ -1517,6 +1697,8 @@ static const struct inode_operations bch_symlink_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
.fileattr_get = bch2_fileattr_get,
.fileattr_set = bch2_fileattr_set,
};
static const struct inode_operations bch_special_inode_operations = {
@ -1527,6 +1709,8 @@ static const struct inode_operations bch_special_inode_operations = {
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
.fileattr_get = bch2_fileattr_get,
.fileattr_set = bch2_fileattr_set,
};
static const struct address_space_operations bch_address_space_operations = {
@ -2032,12 +2216,14 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
struct bch_fs *c = root->d_sb->s_fs_info;
bool first = true;
for_each_online_member(c, ca) {
rcu_read_lock();
for_each_online_member_rcu(c, ca) {
if (!first)
seq_putc(seq, ':');
first = false;
seq_puts(seq, ca->disk_sb.sb_name);
}
rcu_read_unlock();
return 0;
}
@ -2235,15 +2421,16 @@ got_sb:
sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
for_each_online_member(c, ca) {
rcu_read_lock();
for_each_online_member_rcu(c, ca) {
struct block_device *bdev = ca->disk_sb.bdev;
/* XXX: create an anonymous device for multi device filesystems */
sb->s_bdev = bdev;
sb->s_dev = bdev->bd_dev;
percpu_ref_put(&ca->io_ref[READ]);
break;
}
rcu_read_unlock();
c->dev = sb->s_dev;

View File

@ -103,7 +103,8 @@ struct bch_inode_generation {
x(bi_parent_subvol, 32) \
x(bi_nocow, 8) \
x(bi_depth, 32) \
x(bi_inodes_32bit, 8)
x(bi_inodes_32bit, 8) \
x(bi_casefold, 8)
/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \
@ -117,7 +118,8 @@ struct bch_inode_generation {
x(background_target, 16) \
x(erasure_code, 16) \
x(nocow, 8) \
x(inodes_32bit, 8)
x(inodes_32bit, 8) \
x(casefold, 8)
enum inode_opt_id {
#define x(name, ...) \
@ -137,8 +139,7 @@ enum inode_opt_id {
x(i_sectors_dirty, 6) \
x(unlinked, 7) \
x(backptr_untrusted, 8) \
x(has_child_snapshot, 9) \
x(casefolded, 10)
x(has_child_snapshot, 9)
/* bits 20+ reserved for packed fields below: */

View File

@ -9,6 +9,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "async_objs.h"
#include "btree_update.h"
#include "buckets.h"
#include "checksum.h"
@ -17,6 +18,7 @@
#include "data_update.h"
#include "disk_groups.h"
#include "ec.h"
#include "enumerated_ref.h"
#include "error.h"
#include "io_read.h"
#include "io_misc.h"
@ -81,18 +83,6 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
/* Cache promotion on read */
struct promote_op {
struct rcu_head rcu;
u64 start_time;
struct rhash_head hash;
struct bpos pos;
struct work_struct work;
struct data_update write;
struct bio_vec bi_inline_vecs[]; /* must be last */
};
static const struct rhashtable_params bch_promote_params = {
.head_offset = offsetof(struct promote_op, hash),
.key_offset = offsetof(struct promote_op, pos),
@ -170,9 +160,11 @@ static noinline void promote_free(struct bch_read_bio *rbio)
bch_promote_params);
BUG_ON(ret);
async_object_list_del(c, promote, op->list_idx);
bch2_data_update_exit(&op->write);
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_promote);
kfree_rcu(op, rcu);
}
@ -237,7 +229,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
return NULL;
}
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_promote))
return ERR_PTR(-BCH_ERR_nopromote_no_writes);
struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL);
@ -255,6 +247,10 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
goto err;
}
ret = async_object_list_add(c, promote, op, &op->list_idx);
if (ret < 0)
goto err_remove_hash;
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
writepoint_hashed((unsigned long) current),
&orig->opts,
@ -266,7 +262,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
* -BCH_ERR_ENOSPC_disk_reservation:
*/
if (ret)
goto err_remove_hash;
goto err_remove_list;
rbio_init_fragment(&op->write.rbio.bio, orig);
op->write.rbio.bounce = true;
@ -274,6 +270,8 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
op->write.op.end_io = promote_done;
return &op->write.rbio;
err_remove_list:
async_object_list_del(c, promote, op->list_idx);
err_remove_hash:
BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
bch_promote_params));
@ -282,7 +280,7 @@ err:
/* We may have added to the rhashtable and thus need rcu freeing: */
kfree_rcu(op, rcu);
err_put:
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_promote);
return ERR_PTR(ret);
}
@ -346,6 +344,18 @@ nopromote:
return NULL;
}
void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op)
{
if (!op->write.read_done) {
prt_printf(out, "parent read: %px\n", op->write.rbio.parent);
printbuf_indent_add(out, 2);
bch2_read_bio_to_text(out, op->write.rbio.parent);
printbuf_indent_sub(out, 2);
}
bch2_data_update_to_text(out, &op->write);
}
/* Read */
static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
@ -402,7 +412,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
if (rbio->have_ioref) {
struct bch_dev *ca = bch2_dev_have_ref(rbio->c, rbio->pick.ptr.dev);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read);
}
if (rbio->split) {
@ -414,6 +424,8 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
else
promote_free(rbio);
} else {
async_object_list_del(rbio->c, rbio, rbio->list_idx);
if (rbio->bounce)
bch2_bio_free_pages_pool(rbio->c, &rbio->bio);
@ -1090,7 +1102,8 @@ retry_pick:
goto err;
}
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
BCH_DEV_READ_REF_io_read);
/*
* Stale dirty pointers are treated as IO errors, but @failed isn't
@ -1104,7 +1117,7 @@ retry_pick:
unlikely(dev_ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, ca, k, pick.ptr);
bch2_mark_io_failure(failed, &pick, false);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_io_read);
goto retry_pick;
}
@ -1137,7 +1150,8 @@ retry_pick:
*/
if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
if (ca)
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_io_read);
rbio->ret = -BCH_ERR_data_read_buffer_too_small;
goto out_read_done;
}
@ -1191,6 +1205,8 @@ retry_pick:
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
rbio->bounce = true;
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
} else if (flags & BCH_READ_must_clone) {
/*
* Have to clone if there were any splits, due to error
@ -1204,6 +1220,8 @@ retry_pick:
&c->bio_read_split),
orig);
rbio->bio.bi_iter = iter;
async_object_list_add(c, rbio, rbio, &rbio->list_idx);
} else {
rbio = orig;
rbio->bio.bi_iter = iter;
@ -1479,6 +1497,41 @@ err:
return ret;
}
static const char * const bch2_read_bio_flags[] = {
#define x(n) #n,
BCH_READ_FLAGS()
#undef x
NULL
};
void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio)
{
u64 now = local_clock();
prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0);
prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0);
if (!rbio->split)
prt_printf(out, "end_io:\t%ps\n", rbio->end_io);
else
prt_printf(out, "parent:\t%px\n", rbio->parent);
prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io);
prt_printf(out, "promote:\t%u\n", rbio->promote);
prt_printf(out, "bounce:\t%u\n", rbio->bounce);
prt_printf(out, "split:\t%u\n", rbio->split);
prt_printf(out, "have_ioref:\t%u\n", rbio->have_ioref);
prt_printf(out, "narrow_crcs:\t%u\n", rbio->narrow_crcs);
prt_printf(out, "context:\t%u\n", rbio->context);
prt_printf(out, "ret:\t%s\n", bch2_err_str(rbio->ret));
prt_printf(out, "flags:\t");
bch2_prt_bitflags(out, bch2_read_bio_flags, rbio->flags);
prt_newline(out);
bch2_bio_to_text(out, &rbio->bio);
}
void bch2_fs_io_read_exit(struct bch_fs *c)
{
if (c->promote_table.tbl)

View File

@ -4,6 +4,7 @@
#include "bkey_buf.h"
#include "btree_iter.h"
#include "extents_types.h"
#include "reflink.h"
struct bch_read_bio {
@ -48,6 +49,9 @@ struct bch_read_bio {
u16 _state;
};
s16 ret;
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
unsigned list_idx;
#endif
struct extent_ptr_decoded pick;
@ -173,6 +177,9 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio,
rbio->split = true;
rbio->parent = orig;
rbio->opts = orig->opts;
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
rbio->list_idx = 0;
#endif
return rbio;
}
@ -190,9 +197,16 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio,
rbio->ret = 0;
rbio->opts = opts;
rbio->bio.bi_end_io = end_io;
#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
rbio->list_idx = 0;
#endif
return rbio;
}
struct promote_op;
void bch2_promote_op_to_text(struct printbuf *, struct promote_op *);
void bch2_read_bio_to_text(struct printbuf *, struct bch_read_bio *);
void bch2_fs_io_read_exit(struct bch_fs *);
int bch2_fs_io_read_init(struct bch_fs *);

View File

@ -15,6 +15,7 @@
#include "compress.h"
#include "debug.h"
#include "ec.h"
#include "enumerated_ref.h"
#include "error.h"
#include "extent_update.h"
#include "inode.h"
@ -441,6 +442,10 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k));
struct bch_write_bio *n;
unsigned ref_rw = type == BCH_DATA_btree ? READ : WRITE;
unsigned ref_idx = type == BCH_DATA_btree
? BCH_DEV_READ_REF_btree_node_write
: BCH_DEV_WRITE_REF_io_write;
BUG_ON(c->opts.nochanges);
@ -452,7 +457,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
*/
struct bch_dev *ca = nocow
? bch2_dev_have_ref(c, ptr->dev)
: bch2_dev_get_ioref(c, ptr->dev, type == BCH_DATA_btree ? READ : WRITE);
: bch2_dev_get_ioref(c, ptr->dev, ref_rw, ref_idx);
if (to_entry(ptr + 1) < ptrs.end) {
n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, GFP_NOFS, &c->replica_set));
@ -512,7 +517,7 @@ static void bch2_write_done(struct closure *cl)
bch2_disk_reservation_put(c, &op->res);
if (!(op->flags & BCH_WRITE_move))
bch2_write_ref_put(c, BCH_WRITE_REF_write);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_write);
bch2_keylist_free(&op->insert_keys, op->inline_keys);
EBUG_ON(cl->parent);
@ -727,7 +732,8 @@ static void bch2_write_endio(struct bio *bio)
}
if (wbio->have_ioref)
percpu_ref_put(&ca->io_ref[WRITE]);
enumerated_ref_put(&ca->io_ref[WRITE],
BCH_DEV_WRITE_REF_io_write);
if (wbio->bounce)
bch2_bio_free_pages_pool(c, bio);
@ -1324,7 +1330,8 @@ retry:
/* Get iorefs before dropping btree locks: */
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE);
struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE,
BCH_DEV_WRITE_REF_io_write);
if (unlikely(!ca))
goto err_get_ioref;
@ -1426,7 +1433,8 @@ err:
return;
err_get_ioref:
darray_for_each(buckets, i)
percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE]);
enumerated_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE],
BCH_DEV_WRITE_REF_io_write);
/* Fall back to COW path: */
goto out;
@ -1660,7 +1668,7 @@ CLOSURE_CALLBACK(bch2_write)
}
if (!(op->flags & BCH_WRITE_move) &&
!bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_write)) {
op->error = -BCH_ERR_erofs_no_writes;
goto err;
}

View File

@ -12,6 +12,7 @@
#include "btree_update.h"
#include "btree_write_buffer.h"
#include "buckets.h"
#include "enumerated_ref.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
@ -699,8 +700,10 @@ static unsigned max_dev_latency(struct bch_fs *c)
{
u64 nsecs = 0;
for_each_rw_member(c, ca)
rcu_read_lock();
for_each_rw_member_rcu(c, ca)
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
rcu_read_unlock();
return nsecs_to_jiffies(nsecs);
}
@ -987,11 +990,11 @@ int bch2_journal_meta(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_journal))
return -BCH_ERR_erofs_no_writes;
int ret = __bch2_journal_meta(j);
bch2_write_ref_put(c, BCH_WRITE_REF_journal);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_journal);
return ret;
}
@ -1333,13 +1336,14 @@ err:
int bch2_fs_journal_alloc(struct bch_fs *c)
{
for_each_online_member(c, ca) {
for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_journal_alloc) {
if (ca->journal.nr)
continue;
int ret = bch2_dev_journal_alloc(ca, true);
if (ret) {
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_fs_journal_alloc);
return ret;
}
}

View File

@ -1218,7 +1218,7 @@ static CLOSURE_CALLBACK(bch2_journal_read_device)
out:
bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
kvfree(buf.data);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_journal_read);
closure_return(cl);
return;
err:
@ -1253,7 +1253,8 @@ int bch2_journal_read(struct bch_fs *c,
if ((ca->mi.state == BCH_MEMBER_STATE_rw ||
ca->mi.state == BCH_MEMBER_STATE_ro) &&
percpu_ref_tryget(&ca->io_ref[READ]))
enumerated_ref_tryget(&ca->io_ref[READ],
BCH_DEV_READ_REF_journal_read))
closure_call(&ca->journal.read,
bch2_journal_read_device,
system_unbound_wq,
@ -1768,7 +1769,7 @@ static void journal_write_endio(struct bio *bio)
}
closure_put(&w->io);
percpu_ref_put(&ca->io_ref[WRITE]);
enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
}
static CLOSURE_CALLBACK(journal_write_submit)
@ -1779,7 +1780,8 @@ static CLOSURE_CALLBACK(journal_write_submit)
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE);
struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE,
BCH_DEV_WRITE_REF_journal_write);
if (!ca) {
/* XXX: fix this */
bch_err(c, "missing device for journal write\n");
@ -1842,8 +1844,9 @@ static CLOSURE_CALLBACK(journal_write_preflush)
}
if (w->separate_flush) {
for_each_rw_member(c, ca) {
percpu_ref_get(&ca->io_ref[WRITE]);
for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_write) {
enumerated_ref_get(&ca->io_ref[WRITE],
BCH_DEV_WRITE_REF_journal_write);
struct journal_device *ja = &ca->journal;
struct bio *bio = &ja->bio[w->idx]->bio;
@ -2053,12 +2056,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas;
unsigned nr_rw_members = 0;
unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_journal]);
int ret;
for_each_rw_member(c, ca)
nr_rw_members++;
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
BUG_ON(!w->write_started);
BUG_ON(w->write_allocated);

View File

@ -285,7 +285,7 @@ void bch2_journal_do_discards(struct journal *j)
mutex_lock(&j->discard_lock);
for_each_rw_member(c, ca) {
for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) {
struct journal_device *ja = &ca->journal;
while (should_discard_bucket(j, ja)) {
@ -617,7 +617,8 @@ static u64 journal_seq_to_flush(struct journal *j)
spin_lock(&j->lock);
for_each_rw_member(c, ca) {
rcu_read_lock();
for_each_rw_member_rcu(c, ca) {
struct journal_device *ja = &ca->journal;
unsigned nr_buckets, bucket_to_flush;
@ -631,6 +632,7 @@ static u64 journal_seq_to_flush(struct journal *j)
seq_to_flush = max(seq_to_flush,
ja->bucket_seq[bucket_to_flush]);
}
rcu_read_unlock();
/* Also flush if the pin fifo is more than half full */
seq_to_flush = max_t(s64, seq_to_flush,

View File

@ -279,7 +279,8 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
{
s64 wait = S64_MAX, fragmented_allowed, fragmented;
for_each_rw_member(c, ca) {
rcu_read_lock();
for_each_rw_member_rcu(c, ca) {
struct bch_dev_usage_full usage_full = bch2_dev_usage_full_read(ca);
struct bch_dev_usage usage;
@ -296,6 +297,7 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
wait = min(wait, max(0LL, fragmented_allowed - fragmented));
}
rcu_read_unlock();
return wait;
}
@ -356,6 +358,13 @@ static int bch2_copygc_thread(void *arg)
set_freezable();
/*
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*/
kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
kthread_should_stop());
bch2_move_stats_init(&move_stats, "copygc");
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
writepoint_ptr(&c->copygc_write_point),

View File

@ -47,10 +47,6 @@ int bch2_create_trans(struct btree_trans *trans,
if (ret)
goto err;
/* Inherit casefold state from parent. */
if (S_ISDIR(mode))
new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded;
if (!(flags & BCH_CREATE_SNAPSHOT)) {
/* Normal create path - allocate a new inode: */
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);

View File

@ -229,6 +229,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_ERASURE_CODE, false, \
NULL, "Enable erasure coding (DO NOT USE YET)") \
x(casefold, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT, \
OPT_BOOL(), \
BCH_SB_CASEFOLD, false, \
NULL, "Dirent lookups are casefolded") \
x(inodes_32bit, u8, \
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \

View File

@ -587,6 +587,13 @@ static int bch2_rebalance_thread(void *arg)
set_freezable();
/*
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*/
kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
kthread_should_stop());
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
writepoint_ptr(&c->rebalance_write_point),
true);

View File

@ -18,6 +18,7 @@
#include "journal_seq_blacklist.h"
#include "logged_ops.h"
#include "move.h"
#include "movinggc.h"
#include "namei.h"
#include "quota.h"
#include "rebalance.h"
@ -1214,6 +1215,9 @@ int bch2_fs_initialize(struct bch_fs *c)
c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
bch2_copygc_wakeup(c);
bch2_rebalance_wakeup(c);
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
if (ret)

View File

@ -12,6 +12,7 @@
#include "journal.h"
#include "lru.h"
#include "logged_ops.h"
#include "movinggc.h"
#include "rebalance.h"
#include "recovery.h"
#include "recovery_passes.h"
@ -311,49 +312,52 @@ int bch2_run_recovery_passes(struct bch_fs *c)
*/
c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
c->next_recovery_pass = c->curr_recovery_pass + 1;
spin_lock_irq(&c->recovery_pass_lock);
spin_lock_irq(&c->recovery_pass_lock);
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
unsigned prev_done = c->recovery_pass_done;
unsigned pass = c->curr_recovery_pass;
c->next_recovery_pass = pass + 1;
if (c->opts.recovery_pass_last &&
c->curr_recovery_pass > c->opts.recovery_pass_last) {
spin_unlock_irq(&c->recovery_pass_lock);
c->curr_recovery_pass > c->opts.recovery_pass_last)
break;
}
if (!should_run_recovery_pass(c, pass)) {
c->curr_recovery_pass++;
c->recovery_pass_done = max(c->recovery_pass_done, pass);
if (should_run_recovery_pass(c, pass)) {
spin_unlock_irq(&c->recovery_pass_lock);
continue;
ret = bch2_run_recovery_pass(c, pass) ?:
bch2_journal_flush(&c->journal);
if (!ret && !test_bit(BCH_FS_error, &c->flags))
bch2_clear_recovery_pass_required(c, pass);
spin_lock_irq(&c->recovery_pass_lock);
if (c->next_recovery_pass < c->curr_recovery_pass) {
/*
* bch2_run_explicit_recovery_pass() was called: we
* can't always catch -BCH_ERR_restart_recovery because
* it may have been called from another thread (btree
* node read completion)
*/
ret = 0;
c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
} else {
c->recovery_passes_complete |= BIT_ULL(pass);
c->recovery_pass_done = max(c->recovery_pass_done, pass);
}
}
spin_unlock_irq(&c->recovery_pass_lock);
ret = bch2_run_recovery_pass(c, pass) ?:
bch2_journal_flush(&c->journal);
if (!ret && !test_bit(BCH_FS_error, &c->flags))
bch2_clear_recovery_pass_required(c, pass);
spin_lock_irq(&c->recovery_pass_lock);
if (c->next_recovery_pass < c->curr_recovery_pass) {
/*
* bch2_run_explicit_recovery_pass() was called: we
* can't always catch -BCH_ERR_restart_recovery because
* it may have been called from another thread (btree
* node read completion)
*/
ret = 0;
c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass);
} else {
c->recovery_passes_complete |= BIT_ULL(pass);
c->recovery_pass_done = max(c->recovery_pass_done, pass);
}
c->curr_recovery_pass = c->next_recovery_pass;
spin_unlock_irq(&c->recovery_pass_lock);
if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) {
bch2_copygc_wakeup(c);
bch2_rebalance_wakeup(c);
}
}
spin_unlock_irq(&c->recovery_pass_lock);
return ret;
}

View File

@ -3,6 +3,7 @@
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "enumerated_ref.h"
#include "error.h"
#include "extents.h"
#include "inode.h"
@ -610,7 +611,7 @@ s64 bch2_remap_range(struct bch_fs *c,
!bch2_request_incompat_feature(c, bcachefs_metadata_version_reflink_p_may_update_opts);
int ret = 0, ret2 = 0;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_reflink))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_reflink))
return -BCH_ERR_erofs_no_writes;
bch2_check_set_feature(c, BCH_FEATURE_reflink);
@ -761,7 +762,7 @@ err:
bch2_bkey_buf_exit(&new_src, c);
bch2_bkey_buf_exit(&new_dst, c);
bch2_write_ref_put(c, BCH_WRITE_REF_reflink);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_reflink);
return dst_done ?: ret ?: ret2;
}

View File

@ -4,6 +4,7 @@
#include "darray.h"
#include "bkey_types.h"
#include "enumerated_ref.h"
extern char * const bch2_member_error_strs[];
@ -20,7 +21,7 @@ struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
static inline bool bch2_dev_is_online(struct bch_dev *ca)
{
return !percpu_ref_is_zero(&ca->io_ref[READ]);
return !enumerated_ref_is_zero(&ca->io_ref[READ]);
}
static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned);
@ -104,6 +105,12 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, struct bch_dev *
for (struct bch_dev *_ca = NULL; \
(_ca = __bch2_next_dev((_c), _ca, (_mask)));)
#define for_each_online_member_rcu(_c, _ca) \
for_each_member_device_rcu(_c, _ca, &(_c)->online_devs)
#define for_each_rw_member_rcu(_c, _ca) \
for_each_member_device_rcu(_c, _ca, &(_c)->rw_devs[BCH_DATA_free])
static inline void bch2_dev_get(struct bch_dev *ca)
{
#ifdef CONFIG_BCACHEFS_DEBUG
@ -157,33 +164,33 @@ static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev
static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
struct bch_dev *ca,
unsigned state_mask,
int rw)
int rw, unsigned ref_idx)
{
rcu_read_lock();
if (ca)
percpu_ref_put(&ca->io_ref[rw]);
enumerated_ref_put(&ca->io_ref[rw], ref_idx);
while ((ca = __bch2_next_dev(c, ca, NULL)) &&
(!((1 << ca->mi.state) & state_mask) ||
!percpu_ref_tryget(&ca->io_ref[rw])))
!enumerated_ref_tryget(&ca->io_ref[rw], ref_idx)))
;
rcu_read_unlock();
return ca;
}
#define __for_each_online_member(_c, _ca, state_mask, rw) \
#define __for_each_online_member(_c, _ca, state_mask, rw, ref_idx) \
for (struct bch_dev *_ca = NULL; \
(_ca = bch2_get_next_online_dev(_c, _ca, state_mask, rw));)
(_ca = bch2_get_next_online_dev(_c, _ca, state_mask, rw, ref_idx));)
#define for_each_online_member(c, ca) \
__for_each_online_member(c, ca, ~0, READ)
#define for_each_online_member(c, ca, ref_idx) \
__for_each_online_member(c, ca, ~0, READ, ref_idx)
#define for_each_rw_member(c, ca) \
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE)
#define for_each_rw_member(c, ca, ref_idx) \
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE, ref_idx)
#define for_each_readable_member(c, ca) \
__for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro), READ)
#define for_each_readable_member(c, ca, ref_idx) \
__for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro), READ, ref_idx)
static inline bool bch2_dev_exists(const struct bch_fs *c, unsigned dev)
{
@ -284,13 +291,14 @@ static inline struct bch_dev *bch2_dev_iterate(struct bch_fs *c, struct bch_dev
return bch2_dev_tryget(c, dev_idx);
}
static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, int rw)
static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
int rw, unsigned ref_idx)
{
might_sleep();
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu(c, dev);
if (ca && !percpu_ref_tryget(&ca->io_ref[rw]))
if (ca && !enumerated_ref_tryget(&ca->io_ref[rw], ref_idx))
ca = NULL;
rcu_read_unlock();
@ -300,21 +308,10 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
return ca;
if (ca)
percpu_ref_put(&ca->io_ref[rw]);
enumerated_ref_put(&ca->io_ref[rw], ref_idx);
return NULL;
}
/* XXX kill, move to struct bch_fs */
static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
{
struct bch_devs_mask devs;
memset(&devs, 0, sizeof(devs));
for_each_online_member(c, ca)
__set_bit(ca->dev_idx, devs.d);
return devs;
}
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1;
extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;

View File

@ -6,6 +6,7 @@
#include "btree_key_cache.h"
#include "btree_update.h"
#include "buckets.h"
#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@ -1661,18 +1662,18 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work)
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
bch2_delete_dead_snapshots(c);
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
}
void bch2_delete_dead_snapshots_async(struct bch_fs *c)
{
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_delete_dead_snapshots))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_delete_dead_snapshots))
return;
BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags));
if (!queue_work(c->write_ref_wq, &c->snapshot_delete_work))
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_delete_dead_snapshots);
}
int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans,

View File

@ -48,7 +48,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
struct bch_hash_info info = {
.type = INODE_STR_HASH(bi),
#ifdef CONFIG_UNICODE
.cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL,
.cf_encoding = bi->bi_casefold ? c->cf_encoding : NULL,
#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};

View File

@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "btree_key_cache.h"
#include "btree_update.h"
#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@ -517,7 +518,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
darray_exit(&s);
}
bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache);
}
struct subvolume_unlink_hook {
@ -540,11 +541,11 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans
if (ret)
return ret;
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache))
return -EROFS;
if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_snapshot_delete_pagecache);
return 0;
}

View File

@ -260,11 +260,11 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
/* XXX: we're not checking that offline device have enough space */
for_each_online_member(c, ca) {
for_each_online_member(c, ca, BCH_DEV_READ_REF_sb_field_resize) {
struct bch_sb_handle *dev_sb = &ca->disk_sb;
if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_sb_field_resize);
return NULL;
}
}
@ -967,7 +967,7 @@ static void write_super_endio(struct bio *bio)
}
closure_put(&ca->fs->sb_write);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
}
static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
@ -985,7 +985,7 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio));
percpu_ref_get(&ca->io_ref[READ]);
enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
closure_bio_submit(bio, &c->sb_write);
}
@ -1011,7 +1011,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
bio_sectors(bio));
percpu_ref_get(&ca->io_ref[READ]);
enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
closure_bio_submit(bio, &c->sb_write);
}
@ -1043,13 +1043,13 @@ int bch2_write_super(struct bch_fs *c)
* For now, we expect to be able to call write_super() when we're not
* yet RW:
*/
for_each_online_member(c, ca) {
for_each_online_member(c, ca, BCH_DEV_READ_REF_write_super) {
ret = darray_push(&online_devices, ca);
if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) {
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
goto out;
}
percpu_ref_get(&ca->io_ref[READ]);
enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super);
}
/* Make sure we're using the new magic numbers: */
@ -1108,7 +1108,8 @@ int bch2_write_super(struct bch_fs *c)
prt_str(&buf, ")");
bch2_fs_fatal_error(c, ": %s", buf.buf);
printbuf_exit(&buf);
return -BCH_ERR_sb_not_downgraded;
ret = -BCH_ERR_sb_not_downgraded;
goto out;
}
darray_for_each(online_devices, ca) {
@ -1215,7 +1216,7 @@ out:
/* Make new options visible after they're persistent: */
bch2_sb_update(c);
darray_for_each(online_devices, ca)
percpu_ref_put(&(*ca)->io_ref[READ]);
enumerated_ref_put(&(*ca)->io_ref[READ], BCH_DEV_READ_REF_write_super);
darray_exit(&online_devices);
printbuf_exit(&err);
return ret;

View File

@ -10,6 +10,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "async_objs.h"
#include "bkey_sort.h"
#include "btree_cache.h"
#include "btree_gc.h"
@ -28,6 +29,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
#include "enumerated_ref.h"
#include "errcode.h"
#include "error.h"
#include "fs.h"
@ -77,13 +79,28 @@ MODULE_DESCRIPTION("bcachefs filesystem");
typedef DARRAY(struct bch_sb_handle) bch_sb_handles;
const char * const bch2_fs_flag_strs[] = {
#define x(n) #n,
const char * const bch2_fs_flag_strs[] = {
BCH_FS_FLAGS()
#undef x
NULL
};
const char * const bch2_write_refs[] = {
BCH_WRITE_REFS()
NULL
};
const char * const bch2_dev_read_refs[] = {
BCH_DEV_READ_REFS()
NULL
};
const char * const bch2_dev_write_refs[] = {
BCH_DEV_WRITE_REFS()
NULL
};
#undef x
static void __bch2_print_str(struct bch_fs *c, const char *prefix,
const char *str, bool nonblocking)
{
@ -311,15 +328,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
}
}
#ifndef BCH_WRITE_REF_DEBUG
static void bch2_writes_disabled(struct percpu_ref *writes)
static void bch2_writes_disabled(struct enumerated_ref *writes)
{
struct bch_fs *c = container_of(writes, struct bch_fs, writes);
set_bit(BCH_FS_write_disable_complete, &c->flags);
wake_up(&bch2_read_only_wait);
}
#endif
void bch2_fs_read_only(struct bch_fs *c)
{
@ -337,12 +352,7 @@ void bch2_fs_read_only(struct bch_fs *c)
* writes will return -EROFS:
*/
set_bit(BCH_FS_going_ro, &c->flags);
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_kill(&c->writes);
#else
for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++)
bch2_write_ref_put(c, i);
#endif
enumerated_ref_stop_async(&c->writes);
/*
* If we're not doing an emergency shutdown, we want to wait on
@ -432,32 +442,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
return ret;
}
static int bch2_fs_read_write_late(struct bch_fs *c)
{
int ret;
/*
* Data move operations can't run until after check_snapshots has
* completed, and bch2_snapshot_is_ancestor() is available.
*
* Ideally we'd start copygc/rebalance earlier instead of waiting for
* all of recovery/fsck to complete:
*/
ret = bch2_copygc_start(c);
if (ret) {
bch_err(c, "error starting copygc thread");
return ret;
}
ret = bch2_rebalance_start(c);
if (ret) {
bch_err(c, "error starting rebalance thread");
return ret;
}
return 0;
}
static int __bch2_fs_read_write(struct bch_fs *c, bool early)
{
int ret;
@ -492,10 +476,14 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
clear_bit(BCH_FS_clean_shutdown, &c->flags);
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
bch2_dev_allocator_add(c, ca);
percpu_ref_reinit(&ca->io_ref[WRITE]);
}
rcu_read_lock();
for_each_online_member_rcu(c, ca)
if (ca->mi.state == BCH_MEMBER_STATE_rw) {
bch2_dev_allocator_add(c, ca);
enumerated_ref_start(&ca->io_ref[WRITE]);
}
rcu_read_unlock();
bch2_recalc_capacity(c);
/*
@ -521,18 +509,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
set_bit(BCH_FS_rw, &c->flags);
set_bit(BCH_FS_was_rw, &c->flags);
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_reinit(&c->writes);
#else
for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) {
BUG_ON(atomic_long_read(&c->writes[i]));
atomic_long_inc(&c->writes[i]);
enumerated_ref_start(&c->writes);
ret = bch2_copygc_start(c);
if (ret) {
bch_err_msg(c, ret, "error starting copygc thread");
goto err;
}
#endif
if (!early) {
ret = bch2_fs_read_write_late(c);
if (ret)
goto err;
ret = bch2_rebalance_start(c);
if (ret) {
bch_err_msg(c, ret, "error starting rebalance thread");
goto err;
}
bch2_do_discards(c);
@ -583,6 +571,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_free_pending_node_rewrites(c);
bch2_free_fsck_errs(c);
bch2_fs_accounting_exit(c);
bch2_fs_async_obj_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c);
@ -625,9 +614,7 @@ static void __bch2_fs_free(struct bch_fs *c)
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
mempool_exit(&c->fill_iter);
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_exit(&c->writes);
#endif
enumerated_ref_exit(&c->writes);
kfree(rcu_dereference_protected(c->disk_groups, 1));
kfree(c->journal_seq_blacklist_table);
@ -666,6 +653,12 @@ void __bch2_fs_stop(struct bch_fs *c)
bch2_fs_read_only(c);
up_write(&c->state_lock);
for (unsigned i = 0; i < c->sb.nr_devices; i++) {
struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true);
if (ca)
bch2_dev_io_ref_stop(ca, READ);
}
for_each_member_device(c, ca)
bch2_dev_unlink(ca);
@ -694,8 +687,6 @@ void __bch2_fs_stop(struct bch_fs *c)
void bch2_fs_free(struct bch_fs *c)
{
unsigned i;
mutex_lock(&bch_fs_list_lock);
list_del(&c->list);
mutex_unlock(&bch_fs_list_lock);
@ -703,7 +694,7 @@ void bch2_fs_free(struct bch_fs *c)
closure_sync(&c->cl);
closure_debug_destroy(&c->cl);
for (i = 0; i < c->sb.nr_devices; i++) {
for (unsigned i = 0; i < c->sb.nr_devices; i++) {
struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true);
if (ca) {
@ -974,10 +965,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
if (!(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) ||
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
#endif
enumerated_ref_init(&c->writes, BCH_WRITE_REF_NR,
bch2_writes_disabled) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
bioset_init(&c->btree_bio, 1,
max(offsetof(struct btree_read_bio, bio),
@ -994,6 +983,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
}
ret =
bch2_fs_async_obj_init(c) ?:
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
@ -1107,14 +1097,20 @@ int bch2_fs_start(struct bch_fs *c)
goto err;
}
for_each_online_member(c, ca)
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
rcu_read_lock();
for_each_online_member_rcu(c, ca)
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount =
cpu_to_le64(now);
rcu_read_unlock();
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
for_each_rw_member(c, ca)
bch2_dev_allocator_add(c, ca);
rcu_read_lock();
for_each_online_member_rcu(c, ca)
if (ca->mi.state == BCH_MEMBER_STATE_rw)
bch2_dev_allocator_add(c, ca);
rcu_read_unlock();
bch2_recalc_capacity(c);
up_write(&c->state_lock);
@ -1140,13 +1136,10 @@ int bch2_fs_start(struct bch_fs *c)
wake_up(&c->ro_ref_wait);
down_write(&c->state_lock);
if (c->opts.read_only) {
if (c->opts.read_only)
bch2_fs_read_only(c);
} else {
ret = !test_bit(BCH_FS_rw, &c->flags)
? bch2_fs_read_write(c)
: bch2_fs_read_write_late(c);
}
else if (!test_bit(BCH_FS_rw, &c->flags))
ret = bch2_fs_read_write(c);
up_write(&c->state_lock);
err:
@ -1264,11 +1257,14 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
static void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw)
{
if (!percpu_ref_is_zero(&ca->io_ref[rw])) {
reinit_completion(&ca->io_ref_completion[rw]);
percpu_ref_kill(&ca->io_ref[rw]);
wait_for_completion(&ca->io_ref_completion[rw]);
}
if (rw == READ)
clear_bit(ca->dev_idx, ca->fs->online_devs.d);
if (!enumerated_ref_is_zero(&ca->io_ref[rw]))
enumerated_ref_stop(&ca->io_ref[rw],
rw == READ
? bch2_dev_read_refs
: bch2_dev_write_refs);
}
static void bch2_dev_release(struct kobject *kobj)
@ -1280,8 +1276,8 @@ static void bch2_dev_release(struct kobject *kobj)
static void bch2_dev_free(struct bch_dev *ca)
{
WARN_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE]));
WARN_ON(!percpu_ref_is_zero(&ca->io_ref[READ]));
WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE]));
WARN_ON(!enumerated_ref_is_zero(&ca->io_ref[READ]));
cancel_work_sync(&ca->io_error_work);
@ -1301,8 +1297,8 @@ static void bch2_dev_free(struct bch_dev *ca)
bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]);
bch2_time_stats_quantiles_exit(&ca->io_latency[READ]);
percpu_ref_exit(&ca->io_ref[WRITE]);
percpu_ref_exit(&ca->io_ref[READ]);
enumerated_ref_exit(&ca->io_ref[WRITE]);
enumerated_ref_exit(&ca->io_ref[READ]);
#ifndef CONFIG_BCACHEFS_DEBUG
percpu_ref_exit(&ca->ref);
#endif
@ -1314,7 +1310,7 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
lockdep_assert_held(&c->state_lock);
if (percpu_ref_is_zero(&ca->io_ref[READ]))
if (enumerated_ref_is_zero(&ca->io_ref[READ]))
return;
__bch2_dev_read_only(c, ca);
@ -1336,20 +1332,6 @@ static void bch2_dev_ref_complete(struct percpu_ref *ref)
}
#endif
static void bch2_dev_io_ref_read_complete(struct percpu_ref *ref)
{
struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[READ]);
complete(&ca->io_ref_completion[READ]);
}
static void bch2_dev_io_ref_write_complete(struct percpu_ref *ref)
{
struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[WRITE]);
complete(&ca->io_ref_completion[WRITE]);
}
static void bch2_dev_unlink(struct bch_dev *ca)
{
struct kobject *b;
@ -1411,8 +1393,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
kobject_init(&ca->kobj, &bch2_dev_ktype);
init_completion(&ca->ref_completion);
init_completion(&ca->io_ref_completion[READ]);
init_completion(&ca->io_ref_completion[WRITE]);
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
@ -1438,10 +1418,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
bch2_dev_allocator_background_init(ca);
if (percpu_ref_init(&ca->io_ref[READ], bch2_dev_io_ref_read_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
percpu_ref_init(&ca->io_ref[WRITE], bch2_dev_io_ref_write_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
if (enumerated_ref_init(&ca->io_ref[READ], BCH_DEV_READ_REF_NR, NULL) ||
enumerated_ref_init(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_NR, NULL) ||
!(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) ||
bch2_dev_buckets_alloc(c, ca) ||
!(ca->io_done = alloc_percpu(*ca->io_done)))
@ -1503,8 +1481,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
return -BCH_ERR_device_size_too_small;
}
BUG_ON(!percpu_ref_is_zero(&ca->io_ref[READ]));
BUG_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE]));
BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[READ]));
BUG_ON(!enumerated_ref_is_zero(&ca->io_ref[WRITE]));
ret = bch2_dev_journal_init(ca, sb->sb);
if (ret)
@ -1523,7 +1501,7 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
ca->dev = ca->disk_sb.bdev->bd_dev;
percpu_ref_reinit(&ca->io_ref[READ]);
enumerated_ref_start(&ca->io_ref[READ]);
return 0;
}
@ -1547,6 +1525,8 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
if (ret)
return ret;
set_bit(ca->dev_idx, c->online_devs.d);
bch2_dev_sysfs_online(c, ca);
struct printbuf name = PRINTBUF;
@ -1604,7 +1584,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
return true;
/* do we have enough devices to read from? */
new_online_devs = bch2_online_devs(c);
new_online_devs = c->online_devs;
__clear_bit(ca->dev_idx, new_online_devs.d);
return bch2_have_enough_devs(c, new_online_devs, flags, false);
@ -1644,7 +1624,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
break;
}
return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
return bch2_have_enough_devs(c, c->online_devs, flags, true);
}
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
@ -1668,8 +1648,8 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
if (percpu_ref_is_zero(&ca->io_ref[WRITE]))
percpu_ref_reinit(&ca->io_ref[WRITE]);
if (enumerated_ref_is_zero(&ca->io_ref[WRITE]))
enumerated_ref_start(&ca->io_ref[WRITE]);
bch2_dev_do_discards(ca);
}
@ -1819,7 +1799,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
err:
if (test_bit(BCH_FS_rw, &c->flags) &&
ca->mi.state == BCH_MEMBER_STATE_rw &&
!percpu_ref_is_zero(&ca->io_ref[READ]))
!enumerated_ref_is_zero(&ca->io_ref[READ]))
__bch2_dev_read_write(c, ca);
up_write(&c->state_lock);
return ret;
@ -2120,7 +2100,7 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
{
down_write(&c->state_lock);
for_each_online_member(c, ca) {
for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
u64 old_nbuckets = ca->mi.nbuckets;
u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk),
ca->mi.bucket_size);
@ -2131,7 +2111,8 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
int ret = bch2_dev_buckets_resize(c, ca, new_nbuckets);
bch_err_fn(ca, ret);
if (ret) {
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_fs_resize_on_mount);
up_write(&c->state_lock);
return ret;
}
@ -2149,7 +2130,8 @@ int bch2_fs_resize_on_mount(struct bch_fs *c)
if (ca->mi.freespace_initialized) {
ret = __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets);
if (ret) {
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ],
BCH_DEV_READ_REF_fs_resize_on_mount);
up_write(&c->state_lock);
return ret;
}

View File

@ -9,6 +9,9 @@
#include <linux/math64.h>
extern const char * const bch2_fs_flag_strs[];
extern const char * const bch2_write_refs[];
extern const char * const bch2_dev_read_refs[];
extern const char * const bch2_dev_write_refs[];
struct bch_fs *bch2_dev_to_fs(dev_t);
struct bch_fs *bch2_uuid_to_fs(__uuid_t);

View File

@ -25,6 +25,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
#include "enumerated_ref.h"
#include "inode.h"
#include "journal.h"
#include "journal_reclaim.h"
@ -178,25 +179,9 @@ read_attribute(open_buckets);
read_attribute(open_buckets_partial);
read_attribute(nocow_lock_table);
#ifdef BCH_WRITE_REF_DEBUG
read_attribute(read_refs);
read_attribute(write_refs);
static const char * const bch2_write_refs[] = {
#define x(n) #n,
BCH_WRITE_REFS()
#undef x
NULL
};
static void bch2_write_refs_to_text(struct printbuf *out, struct bch_fs *c)
{
bch2_printbuf_tabstop_push(out, 24);
for (unsigned i = 0; i < ARRAY_SIZE(c->writes); i++)
prt_printf(out, "%s\t%li\n", bch2_write_refs[i], atomic_long_read(&c->writes[i]));
}
#endif
read_attribute(internal_uuid);
read_attribute(disk_groups);
@ -324,7 +309,7 @@ static int bch2_read_fua_test(struct printbuf *out, struct bch_dev *ca)
bch2_time_stats_init_no_pcpu(&stats_fua);
bch2_time_stats_init_no_pcpu(&stats_random);
if (!bch2_dev_get_ioref(c, ca->dev_idx, READ)) {
if (!bch2_dev_get_ioref(c, ca->dev_idx, READ, BCH_DEV_READ_REF_read_fua_test)) {
prt_str(out, "offline\n");
return 0;
}
@ -415,7 +400,7 @@ static int bch2_read_fua_test(struct printbuf *out, struct bch_dev *ca)
err:
kfree(buf);
kfree(bio);
percpu_ref_put(&ca->io_ref[READ]);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_read_fua_test);
bch_err_fn(c, ret);
return ret;
}
@ -481,10 +466,8 @@ SHOW(bch2_fs)
if (attr == &sysfs_moving_ctxts)
bch2_fs_moving_ctxts_to_text(out, c);
#ifdef BCH_WRITE_REF_DEBUG
if (attr == &sysfs_write_refs)
bch2_write_refs_to_text(out, c);
#endif
enumerated_ref_to_text(out, &c->writes, bch2_write_refs);
if (attr == &sysfs_nocow_lock_table)
bch2_nocow_locks_to_text(out, &c->nocow_locks);
@ -517,7 +500,7 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_btree_updates)
queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work);
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs))
return -EROFS;
if (attr == &sysfs_trigger_btree_cache_shrink) {
@ -577,7 +560,7 @@ STORE(bch2_fs)
size = ret;
}
#endif
bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_sysfs);
return size;
}
SYSFS_OPS(bch2_fs);
@ -670,9 +653,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_new_stripes,
&sysfs_open_buckets,
&sysfs_open_buckets_partial,
#ifdef BCH_WRITE_REF_DEBUG
&sysfs_write_refs,
#endif
&sysfs_nocow_lock_table,
&sysfs_io_timers_read,
&sysfs_io_timers_write,
@ -738,7 +719,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
* We don't need to take c->writes for correctness, but it eliminates an
* unsightly error message in the dmesg log when we're RO:
*/
if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
if (unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_sysfs)))
return -EROFS;
char *tmp = kstrdup(buf, GFP_KERNEL);
@ -765,7 +746,7 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
ret = size;
err:
bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
enumerated_ref_put(&c->writes, BCH_WRITE_REF_sysfs);
return ret;
}
@ -919,6 +900,12 @@ SHOW(bch2_dev)
if (opt_id >= 0)
return sysfs_opt_show(c, ca, opt_id, out);
if (attr == &sysfs_read_refs)
enumerated_ref_to_text(out, &ca->io_ref[READ], bch2_dev_read_refs);
if (attr == &sysfs_write_refs)
enumerated_ref_to_text(out, &ca->io_ref[WRITE], bch2_dev_write_refs);
return 0;
}
@ -976,6 +963,9 @@ struct attribute *bch2_dev_files[] = {
/* debug: */
&sysfs_alloc_debug,
&sysfs_open_buckets,
&sysfs_read_refs,
&sysfs_write_refs,
NULL
};

View File

@ -715,6 +715,16 @@ void bch2_corrupt_bio(struct bio *bio)
}
#endif
void bch2_bio_to_text(struct printbuf *out, struct bio *bio)
{
prt_printf(out, "bi_remaining:\t%u\n",
atomic_read(&bio->__bi_remaining));
prt_printf(out, "bi_end_io:\t%ps\n",
bio->bi_end_io);
prt_printf(out, "bi_status:\t%u\n",
bio->bi_status);
}
#if 0
void eytzinger1_test(void)
{

View File

@ -419,6 +419,8 @@ static inline void bch2_maybe_corrupt_bio(struct bio *bio, unsigned ratio)
#define bch2_maybe_corrupt_bio(...) do {} while (0)
#endif
void bch2_bio_to_text(struct printbuf *, struct bio *);
static inline void memcpy_u64s_small(void *dst, const void *src,
unsigned u64s)
{
@ -739,4 +741,42 @@ static inline void memcpy_swab(void *_dst, void *_src, size_t len)
*--dst = *src++;
}
#define set_flags(_map, _in, _out) \
do { \
unsigned _i; \
\
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
if ((_in) & (1 << _i)) \
(_out) |= _map[_i]; \
else \
(_out) &= ~_map[_i]; \
} while (0)
#define map_flags(_map, _in) \
({ \
unsigned _out = 0; \
\
set_flags(_map, _in, _out); \
_out; \
})
#define map_flags_rev(_map, _in) \
({ \
unsigned _i, _out = 0; \
\
for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \
if ((_in) & _map[_i]) { \
(_out) |= 1 << _i; \
(_in) &= ~_map[_i]; \
} \
(_out); \
})
#define map_defined(_map) \
({ \
unsigned _in = ~0; \
\
map_flags_rev(_map, _in); \
})
#endif /* _BCACHEFS_UTIL_H */

View File

@ -248,3 +248,28 @@ void wait_for_completion(struct completion *x)
out:
spin_unlock_irq(&x->wait.lock);
}
unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout)
{
spin_lock_irq(&x->wait.lock);
if (!x->done) {
DECLARE_WAITQUEUE(wait, current);
__add_wait_queue_tail_exclusive(&x->wait, &wait);
do {
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&x->wait.lock);
timeout = schedule_timeout(timeout);
spin_lock_irq(&x->wait.lock);
} while (!x->done);
__remove_wait_queue(&x->wait, &wait);
if (!x->done)
goto out;
}
x->done--;
out:
spin_unlock_irq(&x->wait.lock);
return timeout;
}