Update bcachefs sources to dfc7b2c9433e bcachefs: bch2_sb_nr_devices()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-08-08 11:42:15 -04:00
parent da1d5571f5
commit 95e8cddd3b
42 changed files with 794 additions and 652 deletions

View File

@ -1 +1 @@
1592eaa60418d460021ecf44bc405ec49ef14adf
dfc7b2c9433ed926cad881b8fbee55e36105989b

236
include/linux/min_heap.h Normal file
View File

@ -0,0 +1,236 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MIN_HEAP_H
#define _LINUX_MIN_HEAP_H
#include <linux/bug.h>
#include <linux/string.h>
#include <linux/types.h>
/**
* Data structure to hold a min-heap.
* @nr: Number of elements currently in the heap.
* @size: Maximum number of elements that can be held in current storage.
* @data: Pointer to the start of array holding the heap elements.
* @preallocated: Start of the static preallocated array holding the heap elements.
*/
#define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \
struct _name { \
int nr; \
int size; \
_type *data; \
_type preallocated[_nr]; \
}
#define DEFINE_MIN_HEAP(_type, _name) MIN_HEAP_PREALLOCATED(_type, _name, 0)
typedef DEFINE_MIN_HEAP(char, min_heap_char) min_heap_char;
#define __minheap_cast(_heap) (typeof((_heap)->data[0]) *)
#define __minheap_obj_size(_heap) sizeof((_heap)->data[0])
/**
* struct min_heap_callbacks - Data/functions to customise the min_heap.
* @less: Partial order function for this heap.
* @swp: Swap elements function.
*/
struct min_heap_callbacks {
bool (*less)(const void *lhs, const void *rhs, void *args);
void (*swp)(void *lhs, void *rhs, void *args);
};
/* Initialize a min-heap. */
static __always_inline
void __min_heap_init(min_heap_char *heap, void *data, int size)
{
heap->nr = 0;
heap->size = size;
if (data)
heap->data = data;
else
heap->data = heap->preallocated;
}
#define min_heap_init(_heap, _data, _size) \
__min_heap_init((min_heap_char *)_heap, _data, _size)
/* Get the minimum element from the heap. */
static __always_inline
void *__min_heap_peek(struct min_heap_char *heap)
{
return heap->nr ? heap->data : NULL;
}
#define min_heap_peek(_heap) \
(__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
/* Check if the heap is full. */
static __always_inline
bool __min_heap_full(min_heap_char *heap)
{
return heap->nr == heap->size;
}
#define min_heap_full(_heap) \
__min_heap_full((min_heap_char *)_heap)
/* Sift the element at pos down the heap. */
static __always_inline
void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
const struct min_heap_callbacks *func, void *args)
{
void *left, *right;
void *data = heap->data;
void *root = data + pos * elem_size;
int i = pos, j;
/* Find the sift-down path all the way to the leaves. */
for (;;) {
if (i * 2 + 2 >= heap->nr)
break;
left = data + (i * 2 + 1) * elem_size;
right = data + (i * 2 + 2) * elem_size;
i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2;
}
/* Special case for the last leaf with no sibling. */
if (i * 2 + 2 == heap->nr)
i = i * 2 + 1;
/* Backtrack to the correct location. */
while (i != pos && func->less(root, data + i * elem_size, args))
i = (i - 1) / 2;
/* Shift the element into its correct place. */
j = i;
while (i != pos) {
i = (i - 1) / 2;
func->swp(data + i * elem_size, data + j * elem_size, args);
}
}
#define min_heap_sift_down(_heap, _pos, _func, _args) \
__min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
/* Sift up ith element from the heap, O(log2(nr)). */
static __always_inline
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
const struct min_heap_callbacks *func, void *args)
{
void *data = heap->data;
size_t parent;
while (idx) {
parent = (idx - 1) / 2;
if (func->less(data + parent * elem_size, data + idx * elem_size, args))
break;
func->swp(data + parent * elem_size, data + idx * elem_size, args);
idx = parent;
}
}
#define min_heap_sift_up(_heap, _idx, _func, _args) \
__min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
/* Floyd's approach to heapification that is O(nr). */
static __always_inline
void __min_heapify_all(min_heap_char *heap, size_t elem_size,
const struct min_heap_callbacks *func, void *args)
{
int i;
for (i = heap->nr / 2 - 1; i >= 0; i--)
__min_heap_sift_down(heap, i, elem_size, func, args);
}
#define min_heapify_all(_heap, _func, _args) \
__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
/* Remove minimum element from the heap, O(log2(nr)). */
static __always_inline
bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
const struct min_heap_callbacks *func, void *args)
{
void *data = heap->data;
if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
return false;
/* Place last element at the root (position 0) and then sift down. */
heap->nr--;
memcpy(data, data + (heap->nr * elem_size), elem_size);
__min_heap_sift_down(heap, 0, elem_size, func, args);
return true;
}
#define min_heap_pop(_heap, _func, _args) \
__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
/*
* Remove the minimum element and then push the given element. The
* implementation performs 1 sift (O(log2(nr))) and is therefore more
* efficient than a pop followed by a push that does 2.
*/
static __always_inline
void __min_heap_pop_push(min_heap_char *heap,
const void *element, size_t elem_size,
const struct min_heap_callbacks *func,
void *args)
{
memcpy(heap->data, element, elem_size);
__min_heap_sift_down(heap, 0, elem_size, func, args);
}
#define min_heap_pop_push(_heap, _element, _func, _args) \
__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
/* Push an element on to the heap, O(log2(nr)). */
static __always_inline
bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
const struct min_heap_callbacks *func, void *args)
{
void *data = heap->data;
int pos;
if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap"))
return false;
/* Place at the end of data. */
pos = heap->nr;
memcpy(data + (pos * elem_size), element, elem_size);
heap->nr++;
/* Sift child at pos up. */
__min_heap_sift_up(heap, elem_size, pos, func, args);
return true;
}
#define min_heap_push(_heap, _element, _func, _args) \
__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
/* Remove ith element from the heap, O(log2(nr)). */
static __always_inline
bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
const struct min_heap_callbacks *func, void *args)
{
void *data = heap->data;
if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
return false;
/* Place last element at the root (position 0) and then sift down. */
heap->nr--;
if (idx == heap->nr)
return true;
func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args);
__min_heap_sift_up(heap, elem_size, idx, func, args);
__min_heap_sift_down(heap, idx, elem_size, func, args);
return true;
}
#define min_heap_del(_heap, _idx, _func, _args) \
__min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
#endif /* _LINUX_MIN_HEAP_H */

View File

@ -38,11 +38,8 @@
* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
#define SLUB_RED_INACTIVE 0xbb
#define SLUB_RED_ACTIVE 0xcc
#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */
#define SLUB_RED_ACTIVE 0xcc /* when obj is active */
/* ...and for poisoning */
#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
@ -52,12 +49,6 @@
/********** arch/$ARCH/mm/init.c **********/
#define POISON_FREE_INITMEM 0xcc
/********** arch/ia64/hp/common/sba_iommu.c **********/
/*
* arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a
* value of "SBAIOMMU POISON\0" for spill-over poisoning.
*/
/********** fs/jbd/journal.c **********/
#define JBD_POISON_FREE 0x5b
#define JBD2_POISON_FREE 0x5c

View File

@ -272,16 +272,19 @@ bch2_acl_to_xattr(struct btree_trans *trans,
return xattr;
}
struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
struct dentry *dentry, int type)
struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct posix_acl *acl = NULL;
if (rcu)
return ERR_PTR(-ECHILD);
struct btree_trans *trans = bch2_trans_get(c);
retry:
bch2_trans_begin(trans);
@ -358,7 +361,7 @@ retry:
bch2_trans_begin(trans);
acl = _acl;
ret = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?:
ret = bch2_subvol_is_ro_trans(trans, inode->ei_inum.subvol) ?:
bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
BTREE_ITER_intent);
if (ret)

View File

@ -28,7 +28,7 @@ void bch2_acl_to_text(struct printbuf *, const void *, size_t);
#ifdef CONFIG_BCACHEFS_POSIX_ACL
struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int);
struct posix_acl *bch2_get_acl(struct inode *, int, bool);
int bch2_set_acl_trans(struct btree_trans *, subvol_inum,
struct bch_inode_unpacked *,

View File

@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket,
bucket_data_type(bucket) != bucket_data_type(ptr);
}
/*
* It is my general preference to use unsigned types for unsigned quantities -
* however, these helpers are used in disk accounting calculations run by
* triggers where the output will be negated and added to an s64. unsigned is
* right out even though all these quantities will fit in 32 bits, since it
* won't be sign extended correctly; u64 will negate "correctly", but s64 is the
* simpler option here.
*/
static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a)
{
return a.stripe_sectors + a.dirty_sectors + a.cached_sectors;
@ -166,7 +174,7 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
* avoid overflowing LRU_TIME_BITS on a corrupted fs, when
* bucket_sectors_dirty is (much) bigger than bucket_size
*/
u64 d = min(bch2_bucket_sectors_dirty(a),
u64 d = min_t(s64, bch2_bucket_sectors_dirty(a),
ca->mi.bucket_size);
return div_u64(d * (1ULL << 31), ca->mi.bucket_size);

View File

@ -496,12 +496,6 @@ again:
for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
alloc_cursor < k.k->p.offset;
alloc_cursor++) {
ret = btree_trans_too_many_iters(trans);
if (ret) {
ob = ERR_PTR(ret);
break;
}
s->buckets_seen++;
u64 bucket = alloc_cursor & ~(~0ULL << 56);
@ -1028,9 +1022,6 @@ static int __open_bucket_add_buckets(struct btree_trans *trans,
open_bucket_for_each(c, ptrs, ob, i)
__clear_bit(ob->dev, devs.d);
if (erasure_code && ec_open_bucket(c, ptrs))
return 0;
ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
nr_replicas, nr_effective,
have_cache, erasure_code, flags);
@ -1085,7 +1076,7 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
{
int ret;
if (erasure_code) {
if (erasure_code && !ec_open_bucket(trans->c, ptrs)) {
ret = __open_bucket_add_buckets(trans, ptrs, wp,
devs_have, target, erasure_code,
nr_replicas, nr_effective, have_cache,
@ -1609,7 +1600,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope
prt_newline(out);
}
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca)
{
struct open_bucket *ob;
@ -1619,7 +1611,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) {
spin_lock(&ob->lock);
if (ob->valid && !ob->on_partial_list)
if (ob->valid && !ob->on_partial_list &&
(!ca || ob->dev == ca->dev_idx))
bch2_open_bucket_to_text(out, c, ob);
spin_unlock(&ob->lock);
}
@ -1762,11 +1755,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats));
}
void bch2_print_allocator_stuck(struct bch_fs *c)
static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
{
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n");
prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n",
c->opts.allocator_stuck_timeout);
prt_printf(&buf, "Allocator debug:\n");
printbuf_indent_add(&buf, 2);
@ -1796,3 +1790,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c)
bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
static inline unsigned allocator_wait_timeout(struct bch_fs *c)
{
if (c->allocator_last_stuck &&
time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies))
return 0;
return c->opts.allocator_stuck_timeout * HZ;
}
void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
{
unsigned t = allocator_wait_timeout(c);
if (t && closure_sync_timeout(cl, t)) {
c->allocator_last_stuck = jiffies;
bch2_print_allocator_stuck(c);
}
closure_sync(cl);
}

View File

@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp
void bch2_fs_allocator_foreground_init(struct bch_fs *);
void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *);
void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *);
void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *);
void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *);
void bch2_print_allocator_stuck(struct bch_fs *);
void __bch2_wait_on_allocator(struct bch_fs *, struct closure *);
static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
{
if (cl->closure_get_happened)
__bch2_wait_on_allocator(c, cl);
}
#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */

View File

@ -893,6 +893,8 @@ struct bch_fs {
struct bch_fs_usage_base __percpu *usage;
u64 __percpu *online_reserved;
unsigned long allocator_last_stuck;
struct io_clock io_clock[2];
/* JOURNAL SEQ BLACKLIST */
@ -1020,6 +1022,7 @@ struct bch_fs {
/* fs.c */
struct list_head vfs_inodes_list;
struct mutex vfs_inodes_lock;
struct rhashtable vfs_inodes_table;
/* VFS IO PATH - fs-io.c */
struct bio_set writepage_bioset;
@ -1082,7 +1085,6 @@ struct bch_fs {
u64 __percpu *counters;
unsigned copy_gc_enabled:1;
bool promote_whole_extents;
struct bch2_time_stats times[BCH_TIME_STAT_NR];

View File

@ -792,6 +792,8 @@ LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61);
LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62);
LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63);
LE64_BITMASK(BCH_SB_PROMOTE_WHOLE_EXTENTS,
struct bch_sb, flags[0], 63, 64);
LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1], 4, 8);
@ -836,6 +838,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
struct bch_sb, flags[5], 0, 16);
LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
struct bch_sb, flags[5], 16, 32);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{

View File

@ -1934,6 +1934,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
bch2_trans_verify_not_in_restart(trans);
bch2_btree_iter_verify(iter);
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (ret)
goto err;
struct btree_path *path = btree_iter_path(trans, iter);
/* already at end? */
@ -2720,6 +2725,7 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
return bch2_btree_iter_peek_slot(iter);
}
/* Obsolete, but still used by rust wrapper in -tools */
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
{
struct bkey_s_c k;

View File

@ -811,20 +811,6 @@ transaction_restart: \
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
static inline struct bkey_s_c
__bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
struct btree_iter *iter, unsigned flags)
{
struct bkey_s_c k;
while (btree_trans_too_many_iters(trans) ||
(k = bch2_btree_iter_peek_type(iter, flags),
bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
bch2_trans_begin(trans);
return k;
}
#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \
_start, _end, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
@ -865,7 +851,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
\
if (bch2_err_matches(_ret, ENOMEM)) { \
_gfp = GFP_KERNEL; \
_ret = drop_locks_do(trans, _do); \
_ret = drop_locks_do(_trans, _do); \
} \
_ret; \
})
@ -878,7 +864,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
_ret = 0; \
if (unlikely(!_p)) { \
_gfp = GFP_KERNEL; \
_ret = drop_locks_do(trans, ((_p = _do), 0)); \
_ret = drop_locks_do(_trans, ((_p = _do), 0)); \
} \
_p; \
})

View File

@ -1264,7 +1264,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
bch2_trans_unlock(trans);
closure_sync(&cl);
bch2_wait_on_allocator(c, &cl);
} while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
}

View File

@ -6,15 +6,29 @@
#include <linux/kthread.h>
#include <linux/preempt.h>
static inline long io_timer_cmp(io_timer_heap *h,
struct io_timer *l,
struct io_timer *r)
static inline bool io_timer_cmp(const void *l, const void *r, void __always_unused *args)
{
return l->expire - r->expire;
struct io_timer **_l = (struct io_timer **)l;
struct io_timer **_r = (struct io_timer **)r;
return (*_l)->expire < (*_r)->expire;
}
static inline void io_timer_swp(void *l, void *r, void __always_unused *args)
{
struct io_timer **_l = (struct io_timer **)l;
struct io_timer **_r = (struct io_timer **)r;
swap(*_l, *_r);
}
void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
{
const struct min_heap_callbacks callbacks = {
.less = io_timer_cmp,
.swp = io_timer_swp,
};
spin_lock(&clock->timer_lock);
if (time_after_eq64((u64) atomic64_read(&clock->now), timer->expire)) {
@ -23,22 +37,27 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
return;
}
for (size_t i = 0; i < clock->timers.used; i++)
for (size_t i = 0; i < clock->timers.nr; i++)
if (clock->timers.data[i] == timer)
goto out;
BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
BUG_ON(!min_heap_push(&clock->timers, &timer, &callbacks, NULL));
out:
spin_unlock(&clock->timer_lock);
}
void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
{
const struct min_heap_callbacks callbacks = {
.less = io_timer_cmp,
.swp = io_timer_swp,
};
spin_lock(&clock->timer_lock);
for (size_t i = 0; i < clock->timers.used; i++)
for (size_t i = 0; i < clock->timers.nr; i++)
if (clock->timers.data[i] == timer) {
heap_del(&clock->timers, i, io_timer_cmp, NULL);
min_heap_del(&clock->timers, i, &callbacks, NULL);
break;
}
@ -123,10 +142,17 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
static struct io_timer *get_expired_timer(struct io_clock *clock, u64 now)
{
struct io_timer *ret = NULL;
const struct min_heap_callbacks callbacks = {
.less = io_timer_cmp,
.swp = io_timer_swp,
};
if (clock->timers.nr &&
time_after_eq64(now, clock->timers.data[0]->expire)) {
ret = *min_heap_peek(&clock->timers);
min_heap_pop(&clock->timers, &callbacks, NULL);
}
if (clock->timers.used &&
time_after_eq64(now, clock->timers.data[0]->expire))
heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
return ret;
}
@ -150,7 +176,7 @@ void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
printbuf_tabstop_push(out, 40);
prt_printf(out, "current time:\t%llu\n", now);
for (unsigned i = 0; i < clock->timers.used; i++)
for (unsigned i = 0; i < clock->timers.nr; i++)
prt_printf(out, "%ps %ps:\t%llu\n",
clock->timers.data[i]->fn,
clock->timers.data[i]->fn2,

View File

@ -20,15 +20,6 @@ static inline void bch2_increment_clock(struct bch_fs *c, u64 sectors,
void bch2_io_clock_schedule_timeout(struct io_clock *, u64);
#define bch2_kthread_wait_event_ioclock_timeout(condition, clock, timeout)\
({ \
long __ret = timeout; \
might_sleep(); \
if (!___wait_cond_timeout(condition)) \
__ret = __wait_event_timeout(wq, condition, timeout); \
__ret; \
})
void bch2_io_timers_to_text(struct printbuf *, struct io_clock *);
void bch2_io_clock_exit(struct io_clock *);

View File

@ -24,7 +24,7 @@ struct io_timer {
/* Amount to buffer up on a percpu counter */
#define IO_CLOCK_PCPU_SECTORS 128
typedef HEAP(struct io_timer *) io_timer_heap;
typedef DEFINE_MIN_HEAP(struct io_timer *, io_timer_heap) io_timer_heap;
struct io_clock {
atomic64_t now;

View File

@ -553,25 +553,14 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv
int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
{
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
subvol_inum target;
u32 snapshot;
struct bkey_buf sk;
int ret;
bch2_bkey_buf_init(&sk);
retry:
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
SPOS(inum.inum, ctx->pos, snapshot),
POS(inum.inum, U64_MAX), 0, k, ret) {
int ret = bch2_trans_run(c,
for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_dirents,
POS(inum.inum, ctx->pos),
POS(inum.inum, U64_MAX),
inum.subvol, 0, k, ({
if (k.k->type != KEY_TYPE_dirent)
continue;
@ -579,36 +568,15 @@ retry:
bch2_bkey_buf_reassemble(&sk, c, k);
struct bkey_s_c_dirent dirent = bkey_i_to_s_c_dirent(sk.k);
ret = bch2_dirent_read_target(trans, inum, dirent, &target);
if (ret < 0)
break;
if (ret)
subvol_inum target;
int ret2 = bch2_dirent_read_target(trans, inum, dirent, &target);
if (ret2 > 0)
continue;
/*
* read_target looks up subvolumes, we can overflow paths if the
* directory has many subvolumes in it
*
* XXX: btree_trans_too_many_iters() is something we'd like to
* get rid of, and there's no good reason to be using it here
* except that we don't yet have a for_each_btree_key() helper
* that does subvolume_get_snapshot().
*/
ret = drop_locks_do(trans,
bch2_dir_emit(ctx, dirent, target)) ?:
btree_trans_too_many_iters(trans);
if (ret) {
ret = ret < 0 ? ret : 0;
break;
}
}
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
ret2 ?: drop_locks_do(trans, bch2_dir_emit(ctx, dirent, target));
})));
bch2_trans_put(trans);
bch2_bkey_buf_exit(&sk, c);
return ret;
return ret < 0 ? ret : 0;
}

View File

@ -901,8 +901,8 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
mutex_lock(&c->ec_stripes_heap_lock);
if (n.size > h->size) {
memcpy(n.data, h->data, h->used * sizeof(h->data[0]));
n.used = h->used;
memcpy(n.data, h->data, h->nr * sizeof(h->data[0]));
n.nr = h->nr;
swap(*h, n);
}
mutex_unlock(&c->ec_stripes_heap_lock);
@ -993,7 +993,7 @@ static u64 stripe_idx_to_delete(struct bch_fs *c)
lockdep_assert_held(&c->ec_stripes_heap_lock);
if (h->used &&
if (h->nr &&
h->data[0].blocks_nonempty == 0 &&
!bch2_stripe_is_open(c, h->data[0].idx))
return h->data[0].idx;
@ -1001,14 +1001,6 @@ static u64 stripe_idx_to_delete(struct bch_fs *c)
return 0;
}
static inline int ec_stripes_heap_cmp(ec_stripes_heap *h,
struct ec_stripe_heap_entry l,
struct ec_stripe_heap_entry r)
{
return ((l.blocks_nonempty > r.blocks_nonempty) -
(l.blocks_nonempty < r.blocks_nonempty));
}
static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
size_t i)
{
@ -1017,39 +1009,71 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i;
}
static inline bool ec_stripes_heap_cmp(const void *l, const void *r, void __always_unused *args)
{
struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
return ((_l->blocks_nonempty > _r->blocks_nonempty) <
(_l->blocks_nonempty < _r->blocks_nonempty));
}
static inline void ec_stripes_heap_swap(void *l, void *r, void *h)
{
struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
ec_stripes_heap *_h = (ec_stripes_heap *)h;
size_t i = _l - _h->data;
size_t j = _r - _h->data;
swap(*_l, *_r);
ec_stripes_heap_set_backpointer(_h, i);
ec_stripes_heap_set_backpointer(_h, j);
}
static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
{
ec_stripes_heap *h = &c->ec_stripes_heap;
struct stripe *m = genradix_ptr(&c->stripes, idx);
BUG_ON(m->heap_idx >= h->used);
BUG_ON(m->heap_idx >= h->nr);
BUG_ON(h->data[m->heap_idx].idx != idx);
}
void bch2_stripes_heap_del(struct bch_fs *c,
struct stripe *m, size_t idx)
{
const struct min_heap_callbacks callbacks = {
.less = ec_stripes_heap_cmp,
.swp = ec_stripes_heap_swap,
};
mutex_lock(&c->ec_stripes_heap_lock);
heap_verify_backpointer(c, idx);
heap_del(&c->ec_stripes_heap, m->heap_idx,
ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
min_heap_del(&c->ec_stripes_heap, m->heap_idx, &callbacks, &c->ec_stripes_heap);
mutex_unlock(&c->ec_stripes_heap_lock);
}
void bch2_stripes_heap_insert(struct bch_fs *c,
struct stripe *m, size_t idx)
{
mutex_lock(&c->ec_stripes_heap_lock);
BUG_ON(heap_full(&c->ec_stripes_heap));
const struct min_heap_callbacks callbacks = {
.less = ec_stripes_heap_cmp,
.swp = ec_stripes_heap_swap,
};
heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
mutex_lock(&c->ec_stripes_heap_lock);
BUG_ON(min_heap_full(&c->ec_stripes_heap));
genradix_ptr(&c->stripes, idx)->heap_idx = c->ec_stripes_heap.nr;
min_heap_push(&c->ec_stripes_heap, &((struct ec_stripe_heap_entry) {
.idx = idx,
.blocks_nonempty = m->blocks_nonempty,
}),
ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
&callbacks,
&c->ec_stripes_heap);
heap_verify_backpointer(c, idx);
mutex_unlock(&c->ec_stripes_heap_lock);
@ -1058,6 +1082,10 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
void bch2_stripes_heap_update(struct bch_fs *c,
struct stripe *m, size_t idx)
{
const struct min_heap_callbacks callbacks = {
.less = ec_stripes_heap_cmp,
.swp = ec_stripes_heap_swap,
};
ec_stripes_heap *h = &c->ec_stripes_heap;
bool do_deletes;
size_t i;
@ -1068,10 +1096,8 @@ void bch2_stripes_heap_update(struct bch_fs *c,
h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
i = m->heap_idx;
heap_sift_up(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
heap_sift_down(h, i, ec_stripes_heap_cmp,
ec_stripes_heap_set_backpointer);
min_heap_sift_up(h, i, &callbacks, &c->ec_stripes_heap);
min_heap_sift_down(h, i, &callbacks, &c->ec_stripes_heap);
heap_verify_backpointer(c, idx);
@ -1783,6 +1809,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
BUG_ON(v->nr_redundant != h->s->nr_parity);
/* * We bypass the sector allocator which normally does this: */
bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
__clear_bit(v->ptrs[i].dev, devs.d);
if (i < h->s->nr_data)
@ -1864,7 +1893,7 @@ static s64 get_existing_stripe(struct bch_fs *c,
return -1;
mutex_lock(&c->ec_stripes_heap_lock);
for (heap_idx = 0; heap_idx < h->used; heap_idx++) {
for (heap_idx = 0; heap_idx < h->nr; heap_idx++) {
/* No blocks worth reusing, stripe will just be deleted: */
if (!h->data[heap_idx].blocks_nonempty)
continue;
@ -2195,7 +2224,7 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
size_t i;
mutex_lock(&c->ec_stripes_heap_lock);
for (i = 0; i < min_t(size_t, h->used, 50); i++) {
for (i = 0; i < min_t(size_t, h->nr, 50); i++) {
m = genradix_ptr(&c->stripes, h->data[i].idx);
prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
@ -2209,6 +2238,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
mutex_unlock(&c->ec_stripes_heap_lock);
}
static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct ec_stripe_new *s)
{
prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs",
s->idx, s->nr_data, s->nr_parity,
bitmap_weight(s->blocks_allocated, s->nr_data),
atomic_read(&s->ref[STRIPE_REF_io]),
atomic_read(&s->ref[STRIPE_REF_stripe]),
bch2_watermarks[s->h->watermark]);
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
unsigned i;
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks)
prt_printf(out, " %u", s->blocks[i]);
prt_newline(out);
}
void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
{
struct ec_stripe_head *h;
@ -2221,23 +2267,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
bch2_watermarks[h->watermark]);
if (h->s)
prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n",
h->s->idx, h->s->nr_data, h->s->nr_parity,
bitmap_weight(h->s->blocks_allocated,
h->s->nr_data));
bch2_new_stripe_to_text(out, c, h->s);
}
mutex_unlock(&c->ec_stripe_head_lock);
prt_printf(out, "in flight:\n");
mutex_lock(&c->ec_stripe_new_lock);
list_for_each_entry(s, &c->ec_stripe_new_list, list) {
prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n",
s->idx, s->nr_data, s->nr_parity,
atomic_read(&s->ref[STRIPE_REF_io]),
atomic_read(&s->ref[STRIPE_REF_stripe]),
bch2_watermarks[s->h->watermark]);
}
list_for_each_entry(s, &c->ec_stripe_new_list, list)
bch2_new_stripe_to_text(out, c, s);
mutex_unlock(&c->ec_stripe_new_lock);
}

View File

@ -36,6 +36,6 @@ struct ec_stripe_heap_entry {
unsigned blocks_nonempty;
};
typedef HEAP(struct ec_stripe_heap_entry) ec_stripes_heap;
typedef DEFINE_MIN_HEAP(struct ec_stripe_heap_entry, ec_stripes_heap) ec_stripes_heap;
#endif /* _BCACHEFS_EC_TYPES_H */

View File

@ -357,7 +357,7 @@ out: \
__bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \
_ptr, _entry)
#define bkey_crc_next(_k, _start, _end, _crc, _iter) \
#define bkey_crc_next(_k, _end, _crc, _iter) \
({ \
__bkey_extent_entry_for_each_from(_iter, _end, _iter) \
if (extent_entry_is_crc(_iter)) { \
@ -372,7 +372,7 @@ out: \
#define __bkey_for_each_crc(_k, _start, _end, _crc, _iter) \
for ((_crc) = bch2_extent_crc_unpack(_k, NULL), \
(_iter) = (_start); \
bkey_crc_next(_k, _start, _end, _crc, _iter); \
bkey_crc_next(_k, _end, _crc, _iter); \
(_iter) = extent_entry_next(_iter))
#define bkey_for_each_crc(_k, _p, _crc, _iter) \

View File

@ -151,7 +151,6 @@ static void bchfs_read(struct btree_trans *trans,
struct bkey_buf sk;
int flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE;
u32 snapshot;
int ret = 0;
rbio->c = c;
@ -159,29 +158,23 @@ static void bchfs_read(struct btree_trans *trans,
rbio->subvol = inum.subvol;
bch2_bkey_buf_init(&sk);
retry:
bch2_trans_begin(trans);
iter = (struct btree_iter) { NULL };
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(inum.inum, rbio->bio.bi_iter.bi_sector, snapshot),
POS(inum.inum, rbio->bio.bi_iter.bi_sector),
BTREE_ITER_slots);
while (1) {
struct bkey_s_c k;
unsigned bytes, sectors, offset_into_extent;
enum btree_id data_btree = BTREE_ID_extents;
/*
* read_extent -> io_time_reset may cause a transaction restart
* without returning an error, we need to check for that here:
*/
ret = bch2_trans_relock(trans);
bch2_trans_begin(trans);
u32 snapshot;
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
break;
goto err;
bch2_btree_iter_set_snapshot(&iter, snapshot);
bch2_btree_iter_set_pos(&iter,
POS(inum.inum, rbio->bio.bi_iter.bi_sector));
@ -189,7 +182,7 @@ retry:
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)
break;
goto err;
offset_into_extent = iter.pos.offset -
bkey_start_offset(k.k);
@ -200,7 +193,7 @@ retry:
ret = bch2_read_indirect_extent(trans, &data_btree,
&offset_into_extent, &sk);
if (ret)
break;
goto err;
k = bkey_i_to_s_c(sk.k);
@ -210,7 +203,7 @@ retry:
ret = readpage_bio_extend(trans, readpages_iter, &rbio->bio, sectors,
extent_partial_reads_expensive(k));
if (ret)
break;
goto err;
}
bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
@ -229,17 +222,13 @@ retry:
swap(rbio->bio.bi_iter.bi_size, bytes);
bio_advance(&rbio->bio, bytes);
ret = btree_trans_too_many_iters(trans);
if (ret)
err:
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart))
break;
}
err:
bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (ret) {
bch_err_inum_offset_ratelimited(c,
iter.pos.inode,
@ -486,7 +475,7 @@ static void bch2_writepage_io_alloc(struct bch_fs *c,
op->nr_replicas = nr_replicas;
op->res.nr_replicas = nr_replicas;
op->write_point = writepoint_hashed(inode->ei_last_dirtied);
op->subvol = inode->ei_subvol;
op->subvol = inode->ei_inum.subvol;
op->pos = POS(inode->v.i_ino, sector);
op->end_io = bch2_writepage_io_done;
op->devs_need_flush = &inode->ei_devs_need_flush;

View File

@ -500,7 +500,7 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
dio->op.target = dio->op.opts.foreground_target;
dio->op.write_point = writepoint_hashed((unsigned long) current);
dio->op.nr_replicas = dio->op.opts.data_replicas;
dio->op.subvol = inode->ei_subvol;
dio->op.subvol = inode->ei_inum.subvol;
dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
dio->op.devs_need_flush = &inode->ei_devs_need_flush;

View File

@ -182,18 +182,11 @@ static void __bch2_folio_set(struct folio *folio,
int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
struct folio **fs, unsigned nr_folios)
{
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_folio *s;
u64 offset = folio_sector(fs[0]);
unsigned folio_idx;
u32 snapshot;
bool need_set = false;
int ret;
for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
for (unsigned folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
struct bch_folio *s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
if (!s)
return -ENOMEM;
@ -203,18 +196,13 @@ int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
if (!need_set)
return 0;
folio_idx = 0;
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(trans);
unsigned folio_idx = 0;
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
SPOS(inum.inum, offset, snapshot),
BTREE_ITER_slots, k, ret) {
return bch2_trans_run(c,
for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents,
POS(inum.inum, offset),
POS(inum.inum, U64_MAX),
inum.subvol, BTREE_ITER_slots, k, ({
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
unsigned state = bkey_to_sector_state(k);
@ -240,16 +228,8 @@ retry:
if (folio_idx == nr_folios)
break;
}
offset = iter.pos.offset;
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_put(trans);
return ret;
0;
})));
}
void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)

View File

@ -221,30 +221,11 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol,
struct bpos start,
struct bpos end)
{
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
retry:
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, subvol, &start.snapshot);
if (ret)
goto err;
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents, start, end, 0, k, ret)
if (bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k)) {
ret = 1;
break;
}
start = iter.pos;
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_put(trans);
return ret;
return bch2_trans_run(c,
for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents, start, end,
subvol, 0, k, ({
bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k);
})));
}
static int __bch2_truncate_folio(struct bch_inode_info *inode,
@ -267,7 +248,7 @@ static int __bch2_truncate_folio(struct bch_inode_info *inode,
* XXX: we're doing two index lookups when we end up reading the
* folio
*/
ret = range_has_data(c, inode->ei_subvol,
ret = range_has_data(c, inode->ei_inum.subvol,
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)),
POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS));
if (ret <= 0)
@ -618,7 +599,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans,
inode->ei_subvol, &snapshot);
inode->ei_inum.subvol, &snapshot);
if (ret)
goto bkey_err;
@ -813,41 +794,23 @@ static int quota_reserve_range(struct bch_inode_info *inode,
u64 start, u64 end)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
u32 snapshot;
u64 sectors = end - start;
u64 pos = start;
int ret;
retry:
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
if (ret)
goto err;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, pos, snapshot), 0);
while (!(ret = btree_trans_too_many_iters(trans)) &&
(k = bch2_btree_iter_peek_upto(&iter, POS(inode->v.i_ino, end - 1))).k &&
!(ret = bkey_err(k))) {
int ret = bch2_trans_run(c,
for_each_btree_key_in_subvolume_upto(trans, iter,
BTREE_ID_extents,
POS(inode->v.i_ino, start),
POS(inode->v.i_ino, end - 1),
inode->ei_inum.subvol, 0, k, ({
if (bkey_extent_is_allocation(k.k)) {
u64 s = min(end, k.k->p.offset) -
max(start, bkey_start_offset(k.k));
BUG_ON(s > sectors);
sectors -= s;
}
bch2_btree_iter_advance(&iter);
}
pos = iter.pos.offset;
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_put(trans);
0;
})));
return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true);
}
@ -942,42 +905,25 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
subvol_inum inum = inode_inum(inode);
u64 isize, next_data = MAX_LFS_FILESIZE;
u32 snapshot;
int ret;
isize = i_size_read(&inode->v);
if (offset >= isize)
return -ENXIO;
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot),
int ret = bch2_trans_run(c,
for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents,
POS(inode->v.i_ino, offset >> 9),
POS(inode->v.i_ino, U64_MAX),
0, k, ret) {
inum.subvol, 0, k, ({
if (bkey_extent_is_data(k.k)) {
next_data = max(offset, bkey_start_offset(k.k) << 9);
break;
} else if (k.k->p.offset >> 9 > isize)
break;
}
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_put(trans);
0;
})));
if (ret)
return ret;
@ -995,29 +941,18 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_trans *trans;
struct btree_iter iter;
struct bkey_s_c k;
subvol_inum inum = inode_inum(inode);
u64 isize, next_hole = MAX_LFS_FILESIZE;
u32 snapshot;
int ret;
isize = i_size_read(&inode->v);
if (offset >= isize)
return -ENXIO;
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
if (ret)
goto err;
for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
SPOS(inode->v.i_ino, offset >> 9, snapshot),
BTREE_ITER_slots, k, ret) {
int ret = bch2_trans_run(c,
for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_extents,
POS(inode->v.i_ino, offset >> 9),
POS(inode->v.i_ino, U64_MAX),
inum.subvol, BTREE_ITER_slots, k, ({
if (k.k->p.inode != inode->v.i_ino) {
next_hole = bch2_seek_pagecache_hole(&inode->v,
offset, MAX_LFS_FILESIZE, 0, false);
@ -1032,13 +967,8 @@ retry:
} else {
offset = max(offset, bkey_start_offset(k.k) << 9);
}
}
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_put(trans);
0;
})));
if (ret)
return ret;

View File

@ -100,7 +100,7 @@ static int bch2_ioc_setflags(struct bch_fs *c,
}
mutex_lock(&inode->ei_update_lock);
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
@ -184,7 +184,7 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
}
mutex_lock(&inode->ei_update_lock);
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
bch2_set_projid(c, inode, fa.fsx_projid) ?:
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ATTR_CTIME);

View File

@ -152,42 +152,80 @@ int bch2_fs_quota_transfer(struct bch_fs *c,
return ret;
}
static int bch2_iget5_test(struct inode *vinode, void *p)
static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
{
struct bch_inode_info *inode = to_bch_ei(vinode);
subvol_inum *inum = p;
return inode->ei_subvol == inum->subvol &&
inode->ei_inode.bi_inum == inum->inum;
return a.subvol == b.subvol && a.inum == b.inum;
}
static int bch2_iget5_set(struct inode *vinode, void *p)
static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,
const void *obj)
{
struct bch_inode_info *inode = to_bch_ei(vinode);
subvol_inum *inum = p;
const struct bch_inode_info *inode = obj;
const subvol_inum *v = arg->key;
inode->v.i_ino = inum->inum;
inode->ei_subvol = inum->subvol;
inode->ei_inode.bi_inum = inum->inum;
return 0;
return !subvol_inum_eq(inode->ei_inum, *v);
}
static unsigned bch2_inode_hash(subvol_inum inum)
static const struct rhashtable_params bch2_vfs_inodes_params = {
.head_offset = offsetof(struct bch_inode_info, hash),
.key_offset = offsetof(struct bch_inode_info, ei_inum),
.key_len = sizeof(subvol_inum),
.obj_cmpfn = bch2_vfs_inode_cmp_fn,
.automatic_shrinking = true,
};
static void __wait_on_freeing_inode(struct inode *inode)
{
return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
schedule();
finish_wait(wq, &wait.wq_entry);
}
static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode)
static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
{
subvol_inum inum = inode_inum(inode);
struct bch_inode_info *old = to_bch_ei(inode_insert5(&inode->v,
bch2_inode_hash(inum),
bch2_iget5_test,
bch2_iget5_set,
&inum));
BUG_ON(!old);
struct bch_inode_info *inode;
repeat:
inode = rhashtable_lookup_fast(&c->vfs_inodes_table, &inum,
bch2_vfs_inodes_params);
if (inode) {
spin_lock(&inode->v.i_lock);
if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) {
__wait_on_freeing_inode(&inode->v);
goto repeat;
}
__iget(&inode->v);
spin_unlock(&inode->v.i_lock);
}
return inode;
}
static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode)
{
if (test_bit(EI_INODE_HASHED, &inode->ei_flags)) {
int ret = rhashtable_remove_fast(&c->vfs_inodes_table,
&inode->hash, bch2_vfs_inodes_params);
BUG_ON(ret);
clear_bit(EI_INODE_HASHED, &inode->ei_flags);
inode->v.i_hash.pprev = NULL;
}
}
static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bch_inode_info *inode)
{
struct bch_inode_info *old = inode;
retry:
if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table,
&inode->hash,
bch2_vfs_inodes_params))) {
old = bch2_inode_hash_find(c, inode->ei_inum);
if (!old)
goto retry;
if (unlikely(old != inode)) {
/*
* bcachefs doesn't use I_NEW; we have no use for it since we
* only insert fully created inodes in the inode hash table. But
@ -203,16 +241,14 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
discard_new_inode(&inode->v);
inode = old;
} else {
set_bit(EI_INODE_HASHED, &inode->ei_flags);
inode_fake_hash(&inode->v);
inode_sb_list_add(&inode->v);
mutex_lock(&c->vfs_inodes_lock);
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
mutex_unlock(&c->vfs_inodes_lock);
/*
* Again, I_NEW makes no sense for bcachefs. This is only needed
* for clearing I_NEW, but since the inode was already fully
* created and initialized we didn't actually want
* inode_insert5() to set it for us.
*/
unlock_new_inode(&inode->v);
}
return inode;
@ -245,7 +281,6 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c)
inode->ei_flags = 0;
mutex_init(&inode->ei_quota_lock);
memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
inode->v.i_state = 0;
if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) {
kmem_cache_free(bch2_inode_cache, inode);
@ -279,11 +314,7 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
{
struct bch_inode_info *inode =
to_bch_ei(ilookup5_nowait(c->vfs_sb,
bch2_inode_hash(inum),
bch2_iget5_test,
&inum));
struct bch_inode_info *inode = bch2_inode_hash_find(c, inum);
if (inode)
return &inode->v;
@ -297,7 +328,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
if (!ret) {
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
inode = bch2_inode_insert(c, inode);
inode = bch2_inode_hash_insert(c, inode);
}
bch2_trans_put(trans);
@ -345,7 +376,7 @@ __bch2_create(struct mnt_idmap *idmap,
retry:
bch2_trans_begin(trans);
ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:
ret = bch2_subvol_is_ro_trans(trans, dir->ei_inum.subvol) ?:
bch2_create_trans(trans,
inode_inum(dir), &dir_u, &inode_u,
!(flags & BCH_CREATE_TMPFILE)
@ -359,7 +390,7 @@ retry:
if (unlikely(ret))
goto err_before_quota;
inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol;
inum.inum = inode_u.bi_inum;
ret = bch2_subvolume_get(trans, inum.subvol, true,
@ -390,7 +421,7 @@ err_before_quota:
* bch2_trans_exit() and dropping locks, else we could race with another
* thread pulling the inode in and modifying it:
*/
inode = bch2_inode_insert(c, inode);
inode = bch2_inode_hash_insert(c, inode);
bch2_trans_put(trans);
err:
posix_acl_release(default_acl);
@ -430,11 +461,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
if (ret)
goto err;
struct bch_inode_info *inode =
to_bch_ei(ilookup5_nowait(c->vfs_sb,
bch2_inode_hash(inum),
bch2_iget5_test,
&inum));
struct bch_inode_info *inode = bch2_inode_hash_find(c, inum);
if (inode)
goto out;
@ -464,7 +491,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
}
bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
inode = bch2_inode_insert(c, inode);
inode = bch2_inode_hash_insert(c, inode);
out:
bch2_trans_iter_exit(trans, &dirent_iter);
printbuf_exit(&buf);
@ -551,8 +578,8 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
lockdep_assert_held(&inode->v.i_rwsem);
ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
bch2_subvol_is_ro(c, inode->ei_subvol) ?:
ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:
bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
__bch2_link(c, inode, dir, dentry);
if (unlikely(ret))
return bch2_err_class(ret);
@ -608,7 +635,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
struct bch_inode_info *dir= to_bch_ei(vdir);
struct bch_fs *c = dir->v.i_sb->s_fs_info;
int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
int ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:
__bch2_unlink(vdir, dentry, false);
return bch2_err_class(ret);
}
@ -691,8 +718,8 @@ static int bch2_rename2(struct mnt_idmap *idmap,
trans = bch2_trans_get(c);
ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?:
bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol);
if (ret)
goto err;
@ -893,7 +920,7 @@ static int bch2_getattr(struct mnt_idmap *idmap,
stat->blksize = block_bytes(c);
stat->blocks = inode->v.i_blocks;
stat->subvol = inode->ei_subvol;
stat->subvol = inode->ei_inum.subvol;
stat->result_mask |= STATX_SUBVOL;
if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) {
@ -935,7 +962,7 @@ static int bch2_setattr(struct mnt_idmap *idmap,
lockdep_assert_held(&inode->v.i_rwsem);
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:
setattr_prepare(idmap, dentry, iattr);
if (ret)
return ret;
@ -1028,7 +1055,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
struct bkey_buf cur, prev;
unsigned offset_into_extent, sectors;
bool have_extent = false;
u32 snapshot;
int ret = 0;
ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
@ -1044,20 +1070,29 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bch2_bkey_buf_init(&cur);
bch2_bkey_buf_init(&prev);
trans = bch2_trans_get(c);
retry:
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
POS(ei->v.i_ino, start), 0);
while (true) {
enum btree_id data_btree = BTREE_ID_extents;
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);
u32 snapshot;
ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
if (ret)
goto err;
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(ei->v.i_ino, start, snapshot), 0);
bch2_btree_iter_set_snapshot(&iter, snapshot);
while (!(ret = btree_trans_too_many_iters(trans)) &&
(k = bch2_btree_iter_peek_upto(&iter, end)).k &&
!(ret = bkey_err(k))) {
enum btree_id data_btree = BTREE_ID_extents;
k = bch2_btree_iter_peek_upto(&iter, end);
ret = bkey_err(k);
if (ret)
goto err;
if (!k.k)
break;
if (!bkey_extent_is_data(k.k) &&
k.k->type != KEY_TYPE_reservation) {
@ -1102,16 +1137,12 @@ retry:
bch2_btree_iter_set_pos(&iter,
POS(iter.pos.inode, iter.pos.offset + sectors));
ret = bch2_trans_relock(trans);
if (ret)
err:
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart))
break;
}
start = iter.pos.offset;
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (!ret && have_extent) {
bch2_trans_unlock(trans);
@ -1167,7 +1198,7 @@ static int bch2_open(struct inode *vinode, struct file *file)
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret = bch2_subvol_is_ro(c, inode->ei_subvol);
int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol);
if (ret)
return ret;
}
@ -1201,7 +1232,7 @@ static const struct inode_operations bch_file_inode_operations = {
.fiemap = bch2_fiemap,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
.get_acl = bch2_get_acl,
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@ -1221,7 +1252,7 @@ static const struct inode_operations bch_dir_inode_operations = {
.tmpfile = bch2_tmpfile,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
.get_acl = bch2_get_acl,
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@ -1243,7 +1274,7 @@ static const struct inode_operations bch_symlink_inode_operations = {
.setattr = bch2_setattr,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
.get_acl = bch2_get_acl,
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@ -1253,7 +1284,7 @@ static const struct inode_operations bch_special_inode_operations = {
.setattr = bch2_setattr,
.listxattr = bch2_xattr_list,
#ifdef CONFIG_BCACHEFS_POSIX_ACL
.get_acl = bch2_get_acl,
.get_inode_acl = bch2_get_acl,
.set_acl = bch2_set_acl,
#endif
};
@ -1299,8 +1330,8 @@ static int bcachefs_fid_valid(int fh_len, int fh_type)
static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
{
return (struct bcachefs_fid) {
.inum = inode->ei_inode.bi_inum,
.subvol = inode->ei_subvol,
.inum = inode->ei_inum.inum,
.subvol = inode->ei_inum.subvol,
.gen = inode->ei_inode.bi_generation,
};
}
@ -1385,7 +1416,7 @@ static struct dentry *bch2_get_parent(struct dentry *child)
struct bch_fs *c = inode->v.i_sb->s_fs_info;
subvol_inum parent_inum = {
.subvol = inode->ei_inode.bi_parent_subvol ?:
inode->ei_subvol,
inode->ei_inum.subvol,
.inum = inode->ei_inode.bi_dir,
};
@ -1421,7 +1452,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child
retry:
bch2_trans_begin(trans);
ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);
ret = bch2_subvolume_get_snapshot(trans, dir->ei_inum.subvol, &snapshot);
if (ret)
goto err;
@ -1452,8 +1483,7 @@ retry:
if (ret)
goto err;
if (target.subvol == inode->ei_subvol &&
target.inum == inode->ei_inode.bi_inum)
if (subvol_inum_eq(target, inode->ei_inum))
goto found;
} else {
/*
@ -1474,8 +1504,7 @@ retry:
if (ret)
continue;
if (target.subvol == inode->ei_subvol &&
target.inum == inode->ei_inode.bi_inum)
if (subvol_inum_eq(target, inode->ei_inum))
goto found;
}
}
@ -1512,7 +1541,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
struct bch_inode_unpacked *bi,
struct bch_subvolume *subvol)
{
bch2_iget5_set(&inode->v, &inum);
inode->v.i_ino = inum.inum;
inode->ei_inum = inum;
inode->ei_inode.bi_inum = inum.inum;
bch2_inode_update_after_write(trans, inode, bi, ~0);
inode->v.i_blocks = bi->bi_sectors;
@ -1524,7 +1555,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
inode->ei_flags = 0;
inode->ei_quota_reserved = 0;
inode->ei_qid = bch_qid(bi);
inode->ei_subvol = inum.subvol;
if (BCH_SUBVOLUME_SNAP(subvol))
set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
@ -1592,6 +1622,8 @@ static void bch2_evict_inode(struct inode *vinode)
struct bch_fs *c = vinode->i_sb->s_fs_info;
struct bch_inode_info *inode = to_bch_ei(vinode);
bch2_inode_hash_remove(c, inode);
truncate_inode_pages_final(&inode->v.i_data);
clear_inode(&inode->v);
@ -1633,7 +1665,7 @@ again:
mutex_lock(&c->vfs_inodes_lock);
list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
if (!snapshot_list_has_id(s, inode->ei_subvol))
if (!snapshot_list_has_id(s, inode->ei_inum.subvol))
continue;
if (!(inode->v.i_state & I_DONTCACHE) &&
@ -2121,6 +2153,17 @@ static int bch2_init_fs_context(struct fs_context *fc)
return 0;
}
void bch2_fs_vfs_exit(struct bch_fs *c)
{
if (c->vfs_inodes_table.tbl)
rhashtable_destroy(&c->vfs_inodes_table);
}
int bch2_fs_vfs_init(struct bch_fs *c)
{
return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params);
}
static struct file_system_type bcache_fs_type = {
.owner = THIS_MODULE,
.name = "bcachefs",

View File

@ -13,6 +13,9 @@
struct bch_inode_info {
struct inode v;
struct rhash_head hash;
subvol_inum ei_inum;
struct list_head ei_vfs_inode_list;
unsigned long ei_flags;
@ -24,8 +27,6 @@ struct bch_inode_info {
struct mutex ei_quota_lock;
struct bch_qid ei_qid;
u32 ei_subvol;
/*
* When we've been doing nocow writes we'll need to issue flushes to the
* underlying block devices
@ -50,10 +51,7 @@ struct bch_inode_info {
static inline subvol_inum inode_inum(struct bch_inode_info *inode)
{
return (subvol_inum) {
.subvol = inode->ei_subvol,
.inum = inode->ei_inode.bi_inum,
};
return inode->ei_inum;
}
/*
@ -67,6 +65,7 @@ static inline subvol_inum inode_inum(struct bch_inode_info *inode)
* those:
*/
#define EI_INODE_SNAPSHOT 1
#define EI_INODE_HASHED 2
#define to_bch_ei(_inode) \
container_of_or_null(_inode, struct bch_inode_info, v)
@ -187,6 +186,9 @@ int __bch2_unlink(struct inode *, struct dentry *, bool);
void bch2_evict_subvolume_inodes(struct bch_fs *, snapshot_id_list *);
void bch2_fs_vfs_exit(struct bch_fs *);
int bch2_fs_vfs_init(struct bch_fs *);
void bch2_vfs_exit(void);
int bch2_vfs_init(void);
@ -196,6 +198,10 @@ int bch2_vfs_init(void);
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
snapshot_id_list *s) {}
static inline void bch2_fs_vfs_exit(struct bch_fs *c) {}
static inline int bch2_fs_vfs_init(struct bch_fs *c) { return 0; }
static inline void bch2_vfs_exit(void) {}
static inline int bch2_vfs_init(void) { return 0; }

View File

@ -283,6 +283,7 @@ static int reattach_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode,
u32 inode_snapshot)
{
struct bch_fs *c = trans->c;
struct bch_hash_info dir_hash;
struct bch_inode_unpacked lostfound;
char name_buf[20];
@ -317,7 +318,7 @@ static int reattach_inode(struct btree_trans *trans,
return ret;
}
dir_hash = bch2_hash_info_init(trans->c, &lostfound);
dir_hash = bch2_hash_info_init(c, &lostfound);
name = (struct qstr) QSTR(name_buf);
@ -330,8 +331,10 @@ static int reattach_inode(struct btree_trans *trans,
inode->bi_subvol ?: inode->bi_inum,
&dir_offset,
STR_HASH_must_create);
if (ret)
if (ret) {
bch_err_msg(c, ret, "error creating dirent");
return ret;
}
inode->bi_dir = lostfound.bi_inum;
inode->bi_dir_offset = dir_offset;

View File

@ -126,11 +126,7 @@ err_noprint:
if (closure_nr_remaining(&cl) != 1) {
bch2_trans_unlock_long(trans);
if (closure_sync_timeout(&cl, HZ * 10)) {
bch2_print_allocator_stuck(c);
closure_sync(&cl);
}
bch2_wait_on_allocator(c, &cl);
}
return ret;

View File

@ -286,7 +286,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
*/
bool promote_full = (failed ||
*read_full ||
READ_ONCE(c->promote_whole_extents));
READ_ONCE(c->opts.promote_whole_extents));
/* data might have to be decompressed in the write path: */
unsigned sectors = promote_full
? max(pick->crc.compressed_size, pick->crc.live_size)
@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
bch2_trans_iter_init(trans, &iter, rbio->data_btree,
rbio->read_pos, BTREE_ITER_slots);
retry:
bch2_trans_begin(trans);
rbio->bio.bi_status = 0;
k = bch2_btree_iter_peek_slot(&iter);
@ -1213,10 +1214,6 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
swap(bvec_iter.bi_size, bytes);
bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
ret = btree_trans_too_many_iters(trans);
if (ret)
goto err;
err:
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&

View File

@ -1503,10 +1503,7 @@ err:
if ((op->flags & BCH_WRITE_SYNC) ||
(!(op->flags & BCH_WRITE_SUBMITTED) &&
!(op->flags & BCH_WRITE_IN_WORKER))) {
if (closure_sync_timeout(&op->cl, HZ * 10)) {
bch2_print_allocator_stuck(c);
closure_sync(&op->cl);
}
bch2_wait_on_allocator(c, &op->cl);
__bch2_write_index(op);

View File

@ -206,6 +206,7 @@ void bch2_journal_space_available(struct journal *j)
if (nr_online < metadata_replicas_required(c)) {
struct printbuf buf = PRINTBUF;
buf.atomic++;
prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n"
"rw journal devs:", nr_online, metadata_replicas_required(c));

View File

@ -265,6 +265,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "Enable inline data extents") \
x(promote_whole_extents, u8, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_SB_PROMOTE_WHOLE_EXTENTS, true, \
NULL, "Promote whole extents, instead of just part being read")\
x(acl, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT, \
OPT_BOOL(), \
@ -393,6 +398,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \
NULL, "Log transaction function names in journal") \
x(allocator_stuck_timeout, u16, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(0, U16_MAX), \
BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \
NULL, "Default timeout in seconds for stuck allocator messages")\
x(noexcl, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \

View File

@ -464,3 +464,12 @@ void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
__bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
}
}
unsigned bch2_sb_nr_devices(const struct bch_sb *sb)
{
unsigned nr = 0;
for (unsigned i = 0; i < sb->nr_devices; i++)
nr += bch2_member_exists((struct bch_sb *) sb, i);
return nr;
}

View File

@ -307,6 +307,8 @@ static inline bool bch2_member_exists(struct bch_sb *sb, unsigned dev)
return false;
}
unsigned bch2_sb_nr_devices(const struct bch_sb *);
static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
{
return (struct bch_member_cpu) {

View File

@ -32,6 +32,51 @@ int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
int bch2_subvol_is_ro_trans(struct btree_trans *, u32);
int bch2_subvol_is_ro(struct bch_fs *, u32);
static inline struct bkey_s_c
bch2_btree_iter_peek_in_subvolume_upto_type(struct btree_iter *iter, struct bpos end,
u32 subvolid, unsigned flags)
{
u32 snapshot;
int ret = bch2_subvolume_get_snapshot(iter->trans, subvolid, &snapshot);
if (ret)
return bkey_s_c_err(ret);
bch2_btree_iter_set_snapshot(iter, snapshot);
return bch2_btree_iter_peek_upto_type(iter, end, flags);
}
#define for_each_btree_key_in_subvolume_upto_continue(_trans, _iter, \
_end, _subvolid, _flags, _k, _do) \
({ \
struct bkey_s_c _k; \
int _ret3 = 0; \
\
do { \
_ret3 = lockrestart_do(_trans, ({ \
(_k) = bch2_btree_iter_peek_in_subvolume_upto_type(&(_iter), \
_end, _subvolid, (_flags)); \
if (!(_k).k) \
break; \
\
bkey_err(_k) ?: (_do); \
})); \
} while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
_ret3; \
})
#define for_each_btree_key_in_subvolume_upto(_trans, _iter, _btree_id, \
_start, _end, _subvolid, _flags, _k, _do) \
({ \
struct btree_iter _iter; \
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
\
for_each_btree_key_in_subvolume_upto_continue(_trans, _iter, \
_end, _subvolid, _flags, _k, _do); \
})
int bch2_delete_dead_snapshots(struct bch_fs *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);

View File

@ -414,6 +414,13 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb))
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version));
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 &&
!BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb))
SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30);
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2)
SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true);
}
for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
@ -1288,15 +1295,9 @@ void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l)
void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
bool print_layout, unsigned fields)
{
u64 fields_have = 0;
unsigned nr_devices = 0;
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 44);
for (int i = 0; i < sb->nr_devices; i++)
nr_devices += bch2_member_exists(sb, i);
prt_printf(out, "External UUID:\t");
pr_uuid(out, sb->user_uuid.b);
prt_newline(out);
@ -1352,9 +1353,10 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
prt_newline(out);
prt_printf(out, "Clean:\t%llu\n", BCH_SB_CLEAN(sb));
prt_printf(out, "Devices:\t%u\n", nr_devices);
prt_printf(out, "Devices:\t%u\n", bch2_sb_nr_devices(sb));
prt_printf(out, "Sections:\t");
u64 fields_have = 0;
vstruct_for_each(sb, f)
fields_have |= 1 << le32_to_cpu(f->type);
prt_bitflags(out, bch2_sb_fields, fields_have);

View File

@ -543,6 +543,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_fs_fs_io_direct_exit(c);
bch2_fs_fs_io_buffered_exit(c);
bch2_fs_fsio_exit(c);
bch2_fs_vfs_exit(c);
bch2_fs_ec_exit(c);
bch2_fs_encryption_exit(c);
bch2_fs_nocow_locking_exit(c);
@ -810,7 +811,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->copy_gc_enabled = 1;
c->rebalance.enabled = 1;
c->promote_whole_extents = true;
c->journal.flush_write_time = &c->times[BCH_TIME_journal_flush_write];
c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write];
@ -926,6 +926,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_encryption_init(c) ?:
bch2_fs_compress_init(c) ?:
bch2_fs_ec_init(c) ?:
bch2_fs_vfs_init(c) ?:
bch2_fs_fsio_init(c) ?:
bch2_fs_fs_io_buffered_init(c) ?:
bch2_fs_fs_io_direct_init(c);
@ -1193,7 +1194,6 @@ static void bch2_dev_free(struct bch_dev *ca)
if (ca->kobj.state_in_sysfs)
kobject_del(&ca->kobj);
kfree(ca->buckets_nouse);
bch2_free_super(&ca->disk_sb);
bch2_dev_allocator_background_exit(ca);
bch2_dev_journal_exit(ca);

View File

@ -219,7 +219,6 @@ read_attribute(copy_gc_wait);
rw_attribute(rebalance_enabled);
sysfs_pd_controller_attribute(rebalance);
read_attribute(rebalance_status);
rw_attribute(promote_whole_extents);
read_attribute(new_stripes);
@ -347,8 +346,6 @@ SHOW(bch2_fs)
if (attr == &sysfs_rebalance_status)
bch2_rebalance_status_to_text(out, c);
sysfs_print(promote_whole_extents, c->promote_whole_extents);
/* Debugging: */
if (attr == &sysfs_journal_debug)
@ -367,7 +364,7 @@ SHOW(bch2_fs)
bch2_stripes_heap_to_text(out, c);
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c);
bch2_open_buckets_to_text(out, c, NULL);
if (attr == &sysfs_open_buckets_partial)
bch2_open_buckets_partial_to_text(out, c);
@ -436,8 +433,6 @@ STORE(bch2_fs)
sysfs_pd_controller_store(rebalance, &c->rebalance.pd);
sysfs_strtoul(promote_whole_extents, c->promote_whole_extents);
/* Debugging: */
if (!test_bit(BCH_FS_started, &c->flags))
@ -514,7 +509,7 @@ struct attribute *bch2_fs_files[] = {
&sysfs_btree_cache_size,
&sysfs_btree_write_stats,
&sysfs_promote_whole_extents,
&sysfs_rebalance_status,
&sysfs_compression_stats,
@ -614,7 +609,6 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_copy_gc_wait,
&sysfs_rebalance_enabled,
&sysfs_rebalance_status,
sysfs_pd_controller_files(rebalance),
&sysfs_moving_ctxts,
@ -811,6 +805,9 @@ SHOW(bch2_dev)
if (attr == &sysfs_alloc_debug)
bch2_dev_alloc_debug_to_text(out, ca);
if (attr == &sysfs_open_buckets)
bch2_open_buckets_to_text(out, c, ca);
return 0;
}
@ -877,6 +874,7 @@ struct attribute *bch2_dev_files[] = {
/* debug: */
&sysfs_alloc_debug,
&sysfs_open_buckets,
NULL
};

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
* random utiility code, for bcache but in theory not specific to bcache
* random utility code, for bcache but in theory not specific to bcache
*
* Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright 2012 Google, Inc.

View File

@ -8,6 +8,7 @@
#include <linux/errno.h>
#include <linux/freezer.h>
#include <linux/kernel.h>
#include <linux/min_heap.h>
#include <linux/sched/clock.h>
#include <linux/llist.h>
#include <linux/log2.h>
@ -54,17 +55,9 @@ static inline size_t buf_pages(void *p, size_t len)
PAGE_SIZE);
}
#define HEAP(type) \
struct { \
size_t size, used; \
type *data; \
}
#define DECLARE_HEAP(type, name) HEAP(type) name
#define init_heap(heap, _size, gfp) \
({ \
(heap)->used = 0; \
(heap)->nr = 0; \
(heap)->size = (_size); \
(heap)->data = kvmalloc((heap)->size * sizeof((heap)->data[0]),\
(gfp)); \
@ -76,113 +69,6 @@ do { \
(heap)->data = NULL; \
} while (0)
#define heap_set_backpointer(h, i, _fn) \
do { \
void (*fn)(typeof(h), size_t) = _fn; \
if (fn) \
fn(h, i); \
} while (0)
#define heap_swap(h, i, j, set_backpointer) \
do { \
swap((h)->data[i], (h)->data[j]); \
heap_set_backpointer(h, i, set_backpointer); \
heap_set_backpointer(h, j, set_backpointer); \
} while (0)
#define heap_peek(h) \
({ \
EBUG_ON(!(h)->used); \
(h)->data[0]; \
})
#define heap_full(h) ((h)->used == (h)->size)
#define heap_sift_down(h, i, cmp, set_backpointer) \
do { \
size_t _c, _j = i; \
\
for (; _j * 2 + 1 < (h)->used; _j = _c) { \
_c = _j * 2 + 1; \
if (_c + 1 < (h)->used && \
cmp(h, (h)->data[_c], (h)->data[_c + 1]) >= 0) \
_c++; \
\
if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \
break; \
heap_swap(h, _c, _j, set_backpointer); \
} \
} while (0)
#define heap_sift_up(h, i, cmp, set_backpointer) \
do { \
while (i) { \
size_t p = (i - 1) / 2; \
if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \
break; \
heap_swap(h, i, p, set_backpointer); \
i = p; \
} \
} while (0)
#define __heap_add(h, d, cmp, set_backpointer) \
({ \
size_t _i = (h)->used++; \
(h)->data[_i] = d; \
heap_set_backpointer(h, _i, set_backpointer); \
\
heap_sift_up(h, _i, cmp, set_backpointer); \
_i; \
})
#define heap_add(h, d, cmp, set_backpointer) \
({ \
bool _r = !heap_full(h); \
if (_r) \
__heap_add(h, d, cmp, set_backpointer); \
_r; \
})
#define heap_add_or_replace(h, new, cmp, set_backpointer) \
do { \
if (!heap_add(h, new, cmp, set_backpointer) && \
cmp(h, new, heap_peek(h)) >= 0) { \
(h)->data[0] = new; \
heap_set_backpointer(h, 0, set_backpointer); \
heap_sift_down(h, 0, cmp, set_backpointer); \
} \
} while (0)
#define heap_del(h, i, cmp, set_backpointer) \
do { \
size_t _i = (i); \
\
BUG_ON(_i >= (h)->used); \
(h)->used--; \
if ((_i) < (h)->used) { \
heap_swap(h, _i, (h)->used, set_backpointer); \
heap_sift_up(h, _i, cmp, set_backpointer); \
heap_sift_down(h, _i, cmp, set_backpointer); \
} \
} while (0)
#define heap_pop(h, d, cmp, set_backpointer) \
({ \
bool _r = (h)->used; \
if (_r) { \
(d) = (h)->data[0]; \
heap_del(h, 0, cmp, set_backpointer); \
} \
_r; \
})
#define heap_resort(heap, cmp, set_backpointer) \
do { \
ssize_t _i; \
for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
heap_sift_down(heap, _i, cmp, set_backpointer); \
} while (0)
#define ANYSINT_MAX(t) \
((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)

View File

@ -296,54 +296,23 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct bch_fs *c = dentry->d_sb->s_fs_info;
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter;
struct bkey_s_c k;
struct xattr_buf buf = { .buf = buffer, .len = buffer_size };
u64 offset = 0, inum = inode->ei_inode.bi_inum;
u32 snapshot;
int ret;
retry:
bch2_trans_begin(trans);
iter = (struct btree_iter) { NULL };
ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);
if (ret)
goto err;
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_xattrs,
SPOS(inum, offset, snapshot),
POS(inum, U64_MAX), 0, k, ret) {
int ret = bch2_trans_run(c,
for_each_btree_key_in_subvolume_upto(trans, iter, BTREE_ID_xattrs,
POS(inum, offset),
POS(inum, U64_MAX),
inode->ei_inum.subvol, 0, k, ({
if (k.k->type != KEY_TYPE_xattr)
continue;
ret = bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf);
if (ret)
break;
}
bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf);
}))) ?:
bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false) ?:
bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true);
offset = iter.pos.offset;
bch2_trans_iter_exit(trans, &iter);
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_put(trans);
if (ret)
goto out;
ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false);
if (ret)
goto out;
ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true);
if (ret)
goto out;
return buf.used;
out:
return bch2_err_class(ret);
return ret ? bch2_err_class(ret) : buf.used;
}
static int bch2_xattr_get_handler(const struct xattr_handler *handler,