diff --git a/.bcachefs_revision b/.bcachefs_revision index d29d45d4..7c242654 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -90d78c246188f4e90bd9ceb29fe95186b7dc680d +e7f4678827ad7bf7e294105e7bb3a53e04474070 diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 4016ab96..d29bdafa 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -180,27 +180,16 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type) { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; + umode_t mode = inode->v.i_mode; int name_index; void *value = NULL; size_t size = 0; int ret; if (type == ACL_TYPE_ACCESS && acl) { - umode_t mode = inode->v.i_mode; - ret = posix_acl_update_mode(&inode->v, &mode, &acl); if (ret) return ret; - - mutex_lock(&inode->ei_update_lock); - inode->v.i_mode = mode; - inode->v.i_ctime = current_time(&inode->v); - - ret = bch2_write_inode(c, inode); - mutex_unlock(&inode->ei_update_lock); - - if (ret) - return ret; } switch (type) { @@ -210,12 +199,8 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type) ret = posix_acl_equiv_mode(acl, &inode->v.i_mode); if (ret < 0) return ret; - else { - inode->v.i_ctime = current_time(&inode->v); - mark_inode_dirty(&inode->v); - if (ret == 0) - acl = NULL; - } + if (ret == 0) + acl = NULL; } break; @@ -235,8 +220,20 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *acl, int type) return (int)PTR_ERR(value); } - ret = bch2_xattr_set(c, inode, "", value, size, 0, name_index); + if (mode != inode->v.i_mode) { + mutex_lock(&inode->ei_update_lock); + inode->v.i_mode = mode; + inode->v.i_ctime = current_time(&inode->v); + ret = bch2_write_inode(c, inode); + mutex_unlock(&inode->ei_update_lock); + + if (ret) + goto err; + } + + ret = bch2_xattr_set(c, inode, "", value, size, 0, name_index); +err: kfree(value); if (ret == -ERANGE) diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c index a76f2b7c..470ef207 100644 --- a/libbcachefs/alloc.c +++ b/libbcachefs/alloc.c @@ -966,11 +966,24 @@ static int bch2_allocator_thread(void *arg) if (fifo_full(&ca->free_inc)) break; + if (!fifo_empty(&ca->free_inc) && + !fifo_full(&ca->free[RESERVE_MOVINGGC])) + break; + + /* + * copygc may be waiting until either its reserve fills + * up, or we can't make forward progress: + */ + ca->allocator_blocked = true; + closure_wake_up(&c->freelist_wait); + if (wait_buckets_available(c, ca)) { up_read(&c->gc_lock); return 0; } } + + ca->allocator_blocked = false; up_read(&c->gc_lock); sort_free_inc(c, ca); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 75f3a006..369d078c 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -399,6 +399,7 @@ struct bch_dev { size_t inc_gen_really_needs_gc; u64 allocator_journal_seq_flush; bool allocator_invalidating_data; + bool allocator_blocked; alloc_heap alloc_heap; @@ -671,9 +672,6 @@ struct bch_fs { bool fsck_alloc_err; /* FILESYSTEM */ - wait_queue_head_t writeback_wait; - atomic_t writeback_pages; - unsigned writeback_pages_max; atomic_long_t nr_inodes; /* QUOTAS */ diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c index a07d5540..92046ae4 100644 --- a/libbcachefs/bset.c +++ b/libbcachefs/bset.c @@ -174,13 +174,11 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter, void bch2_btree_node_iter_verify(struct btree_node_iter *iter, struct btree *b) { - struct btree_node_iter_set *set; + struct btree_node_iter_set *set, *prev = NULL; struct bset_tree *t; struct bkey_packed *k, *first; - BUG_ON(iter->used > MAX_BSETS); - - if (!iter->used) + if (bch2_btree_node_iter_end(iter)) return; btree_node_iter_for_each(iter, set) { @@ -190,8 +188,10 @@ void bch2_btree_node_iter_verify(struct btree_node_iter *iter, BUG_ON(__btree_node_offset_to_key(b, set->end) != btree_bkey_last(b, t)); - BUG_ON(set + 1 < iter->data + iter->used && - btree_node_iter_cmp(iter, b, set[0], set[1]) > 0); + BUG_ON(prev && + btree_node_iter_cmp(iter, b, *prev, *set) > 0); + + prev = set; } first = __btree_node_offset_to_key(b, iter->data[0].k); @@ -1463,22 +1463,8 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter, const struct bkey_packed *k, const struct bkey_packed *end) { - if (k != end) { - struct btree_node_iter_set *pos, n = - ((struct btree_node_iter_set) { - __btree_node_key_to_offset(b, k), - __btree_node_key_to_offset(b, end) - }); - - btree_node_iter_for_each(iter, pos) - if (btree_node_iter_cmp(iter, b, n, *pos) <= 0) - break; - - memmove(pos + 1, pos, - (void *) (iter->data + iter->used) - (void *) pos); - iter->used++; - *pos = n; - } + __bch2_btree_node_iter_push(iter, b, k, end); + bch2_btree_node_iter_sort(iter, b); } noinline __flatten __attribute__((cold)) @@ -1595,8 +1581,6 @@ struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter, { struct btree_node_iter_set *set; - EBUG_ON(iter->used > MAX_BSETS); - btree_node_iter_for_each(iter, set) if (set->end == t->end_offset) return __btree_node_offset_to_key(b, set->k); @@ -1604,47 +1588,67 @@ struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *iter, return btree_bkey_last(b, t); } -static inline void btree_node_iter_sift(struct btree_node_iter *iter, - struct btree *b, - unsigned start) -{ - unsigned i; - - EBUG_ON(iter->used > MAX_BSETS); - - for (i = start; - i + 1 < iter->used && - btree_node_iter_cmp(iter, b, iter->data[i], iter->data[i + 1]) > 0; - i++) - swap(iter->data[i], iter->data[i + 1]); -} - -static inline void btree_node_iter_sort_two(struct btree_node_iter *iter, +static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter, struct btree *b, unsigned first) { - if (btree_node_iter_cmp(iter, b, - iter->data[first], - iter->data[first + 1]) > 0) + bool ret; + + if ((ret = (btree_node_iter_cmp(iter, b, + iter->data[first], + iter->data[first + 1]) > 0))) swap(iter->data[first], iter->data[first + 1]); + return ret; } void bch2_btree_node_iter_sort(struct btree_node_iter *iter, struct btree *b) { - EBUG_ON(iter->used > 3); - /* unrolled bubble sort: */ - if (iter->used > 2) { + if (!__btree_node_iter_set_end(iter, 2)) { btree_node_iter_sort_two(iter, b, 0); btree_node_iter_sort_two(iter, b, 1); } - if (iter->used > 1) + if (!__btree_node_iter_set_end(iter, 1)) btree_node_iter_sort_two(iter, b, 0); } +void bch2_btree_node_iter_set_drop(struct btree_node_iter *iter, + struct btree_node_iter_set *set) +{ + struct btree_node_iter_set *last = + iter->data + ARRAY_SIZE(iter->data) - 1; + + memmove(&set[0], &set[1], (void *) last - (void *) set); + *last = (struct btree_node_iter_set) { 0, 0 }; +} + +static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, + struct btree *b) +{ + iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s; + + EBUG_ON(iter->data->k > iter->data->end); + + if (unlikely(__btree_node_iter_set_end(iter, 0))) { + bch2_btree_node_iter_set_drop(iter, iter->data); + return; + } + + if (__btree_node_iter_set_end(iter, 1)) + return; + + if (!btree_node_iter_sort_two(iter, b, 0)) + return; + + if (__btree_node_iter_set_end(iter, 2)) + return; + + btree_node_iter_sort_two(iter, b, 1); +} + /** * bch_btree_node_iter_advance - advance @iter by one key * @@ -1656,23 +1660,24 @@ void bch2_btree_node_iter_advance(struct btree_node_iter *iter, { #ifdef CONFIG_BCACHEFS_DEBUG struct bkey_packed *k = bch2_btree_node_iter_peek_all(iter, b); -#endif - iter->data->k += __bch2_btree_node_iter_peek_all(iter, b)->u64s; - EBUG_ON(iter->data->k > iter->data->end); - - if (iter->data->k == iter->data->end) { - EBUG_ON(iter->used == 0); - iter->data[0] = iter->data[--iter->used]; - } - - btree_node_iter_sift(iter, b, 0); - -#ifdef CONFIG_BCACHEFS_DEBUG + __bch2_btree_node_iter_advance(iter, b); bch2_btree_node_iter_next_check(iter, b, k); +#else + __bch2_btree_node_iter_advance(iter, b); #endif } +static inline bool __btree_node_iter_used(struct btree_node_iter *iter) +{ + unsigned n = ARRAY_SIZE(iter->data); + + while (n && __btree_node_iter_set_end(iter, n - 1)) + --n; + + return n; +} + /* * Expensive: */ @@ -1683,7 +1688,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree_node_iter_set *set; struct bset_tree *t; struct bset_tree *prev_t; - unsigned end; + unsigned end, used; bch2_btree_node_iter_verify(iter, b); @@ -1715,10 +1720,12 @@ struct bkey_packed *bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, goto out; } + used = __btree_node_iter_used(iter); + BUG_ON(used >= ARRAY_SIZE(iter->data)); + memmove(&iter->data[1], &iter->data[0], - (void *) &iter->data[iter->used] - (void *) &iter->data[0]); - iter->used++; + (void *) &iter->data[used] - (void *) &iter->data[0]); out: iter->data[0].k = __btree_node_key_to_offset(b, prev); iter->data[0].end = end; diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h index f5a84481..cc4ea5d8 100644 --- a/libbcachefs/bset.h +++ b/libbcachefs/bset.h @@ -422,7 +422,6 @@ static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k, struct btree_node_iter { u8 is_extents; - u16 used; struct btree_node_iter_set { u16 k, end; @@ -432,8 +431,8 @@ struct btree_node_iter { static inline void __bch2_btree_node_iter_init(struct btree_node_iter *iter, bool is_extents) { - iter->used = 0; iter->is_extents = is_extents; + memset(iter->data, 0, sizeof(iter->data)); } void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *, @@ -448,16 +447,25 @@ struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *, struct bset_tree *); void bch2_btree_node_iter_sort(struct btree_node_iter *, struct btree *); +void bch2_btree_node_iter_set_drop(struct btree_node_iter *, + struct btree_node_iter_set *); void bch2_btree_node_iter_advance(struct btree_node_iter *, struct btree *); -#define btree_node_iter_for_each(_iter, _set) \ - for (_set = (_iter)->data; \ - _set < (_iter)->data + (_iter)->used; \ +#define btree_node_iter_for_each(_iter, _set) \ + for (_set = (_iter)->data; \ + _set < (_iter)->data + ARRAY_SIZE((_iter)->data) && \ + (_set)->k != (_set)->end; \ _set++) +static inline bool __btree_node_iter_set_end(struct btree_node_iter *iter, + unsigned i) +{ + return iter->data[i].k == iter->data[i].end; +} + static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter) { - return !iter->used; + return __btree_node_iter_set_end(iter, 0); } static inline int __btree_node_iter_cmp(bool is_extents, @@ -493,11 +501,18 @@ static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter, const struct bkey_packed *k, const struct bkey_packed *end) { - if (k != end) - iter->data[iter->used++] = (struct btree_node_iter_set) { + if (k != end) { + struct btree_node_iter_set *pos; + + btree_node_iter_for_each(iter, pos) + ; + + BUG_ON(pos >= iter->data + ARRAY_SIZE(iter->data)); + *pos = (struct btree_node_iter_set) { __btree_node_key_to_offset(b, k), __btree_node_key_to_offset(b, end) }; + } } static inline struct bkey_packed * diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index d805fb41..881039b1 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -18,6 +18,43 @@ #include +/* btree_node_iter_large: */ + +#define btree_node_iter_cmp_heap(h, _l, _r) \ + __btree_node_iter_cmp((iter)->is_extents, b, \ + __btree_node_offset_to_key(b, (_l).k), \ + __btree_node_offset_to_key(b, (_r).k)) + +void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter, + struct btree *b, + const struct bkey_packed *k, + const struct bkey_packed *end) +{ + if (k != end) { + struct btree_node_iter_set n = + ((struct btree_node_iter_set) { + __btree_node_key_to_offset(b, k), + __btree_node_key_to_offset(b, end) + }); + + __heap_add(iter, n, btree_node_iter_cmp_heap); + } +} + +void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter, + struct btree *b) +{ + iter->data->k += __btree_node_offset_to_key(b, iter->data->k)->u64s; + + EBUG_ON(!iter->used); + EBUG_ON(iter->data->k > iter->data->end); + + if (iter->data->k == iter->data->end) + heap_del(iter, 0, btree_node_iter_cmp_heap); + else + heap_sift_down(iter, 0, btree_node_iter_cmp_heap); +} + static void verify_no_dups(struct btree *b, struct bkey_packed *start, struct bkey_packed *end) @@ -910,7 +947,7 @@ enum btree_validate_ret { #define btree_err(type, c, b, i, msg, ...) \ ({ \ - char buf[200], *out = buf, *end = out + sizeof(buf); \ + char _buf[200], *out = _buf, *end = out + sizeof(_buf); \ \ out += btree_err_msg(c, b, i, b->written, write, out, end - out);\ out += scnprintf(out, end - out, ": " msg, ##__VA_ARGS__); \ @@ -918,9 +955,9 @@ enum btree_validate_ret { if (type == BTREE_ERR_FIXABLE && \ write == READ && \ !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \ - mustfix_fsck_err(c, "%s", buf); \ + mustfix_fsck_err(c, "%s", _buf); \ } else { \ - bch_err(c, "%s", buf); \ + bch_err(c, "%s", _buf); \ \ switch (type) { \ case BTREE_ERR_FIXABLE: \ @@ -1108,7 +1145,7 @@ fsck_err: int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry) { struct btree_node_entry *bne; - struct btree_node_iter *iter; + struct btree_node_iter_large *iter; struct btree_node *sorted; struct bkey_packed *k; struct bset *i; @@ -1117,7 +1154,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry int ret, retry_read = 0, write = READ; iter = mempool_alloc(&c->fill_iter, GFP_NOIO); - __bch2_btree_node_iter_init(iter, btree_node_is_extents(b)); + __bch2_btree_node_iter_large_init(iter, btree_node_is_extents(b)); if (bch2_meta_read_fault("btree")) btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL, @@ -1202,11 +1239,11 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry continue; } - __bch2_btree_node_iter_push(iter, b, + bch2_btree_node_iter_large_push(iter, b, i->start, vstruct_idx(i, whiteout_u64s)); - __bch2_btree_node_iter_push(iter, b, + bch2_btree_node_iter_large_push(iter, b, vstruct_idx(i, whiteout_u64s), vstruct_last(i)); } diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index f2790716..01df817d 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -1,6 +1,7 @@ #ifndef _BCACHEFS_BTREE_IO_H #define _BCACHEFS_BTREE_IO_H +#include "bset.h" #include "extents.h" #include "io_types.h" @@ -141,4 +142,55 @@ void bch2_btree_flush_all_writes(struct bch_fs *); void bch2_btree_verify_flushed(struct bch_fs *); ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *); +/* Sorting */ + +struct btree_node_iter_large { + u8 is_extents; + u16 used; + + struct btree_node_iter_set data[MAX_BSETS]; +}; + +static inline void +__bch2_btree_node_iter_large_init(struct btree_node_iter_large *iter, + bool is_extents) +{ + iter->used = 0; + iter->is_extents = is_extents; +} + +void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *, + struct btree *); + +void bch2_btree_node_iter_large_push(struct btree_node_iter_large *, + struct btree *, + const struct bkey_packed *, + const struct bkey_packed *); + +static inline bool bch2_btree_node_iter_large_end(struct btree_node_iter_large *iter) +{ + return !iter->used; +} + +static inline struct bkey_packed * +bch2_btree_node_iter_large_peek_all(struct btree_node_iter_large *iter, + struct btree *b) +{ + return bch2_btree_node_iter_large_end(iter) + ? NULL + : __btree_node_offset_to_key(b, iter->data->k); +} + +static inline struct bkey_packed * +bch2_btree_node_iter_large_next_all(struct btree_node_iter_large *iter, + struct btree *b) +{ + struct bkey_packed *ret = bch2_btree_node_iter_large_peek_all(iter, b); + + if (ret) + bch2_btree_node_iter_large_advance(iter, b); + + return ret; +} + #endif /* _BCACHEFS_BTREE_IO_H */ diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 21a6cbc2..cc5bcbb2 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -382,7 +382,7 @@ found: } else if (set->k < offset + clobber_u64s) { set->k = offset + new_u64s; if (set->k == set->end) - *set = node_iter->data[--node_iter->used]; + bch2_btree_node_iter_set_drop(node_iter, set); } else { set->k = (int) set->k + shift; goto iter_current_key_not_modified; diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 37470f86..b7d969b1 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -28,7 +28,7 @@ static enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *, struct bkey_i *, struct bkey_i *); -static void sort_key_next(struct btree_node_iter *iter, +static void sort_key_next(struct btree_node_iter_large *iter, struct btree *b, struct btree_node_iter_set *i) { @@ -54,7 +54,7 @@ static void sort_key_next(struct btree_node_iter *iter, ?: (l).k - (r).k; \ }) -static inline bool should_drop_next_key(struct btree_node_iter *iter, +static inline bool should_drop_next_key(struct btree_node_iter_large *iter, struct btree *b) { struct btree_node_iter_set *l = iter->data, *r = iter->data + 1; @@ -81,8 +81,8 @@ static inline bool should_drop_next_key(struct btree_node_iter *iter, } struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst, - struct btree *b, - struct btree_node_iter *iter) + struct btree *b, + struct btree_node_iter_large *iter) { struct bkey_packed *out = dst->start; struct btree_nr_keys nr; @@ -91,7 +91,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst, heap_resort(iter, key_sort_cmp); - while (!bch2_btree_node_iter_end(iter)) { + while (!bch2_btree_node_iter_large_end(iter)) { if (!should_drop_next_key(iter, b)) { struct bkey_packed *k = __btree_node_offset_to_key(b, iter->data->k); @@ -148,7 +148,7 @@ bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group struct bch_dev *ca = c->devs[ptr->dev]; if (ca->mi.group && - ca->mi.group == group) + ca->mi.group - 1 == group) return ptr; } @@ -890,13 +890,13 @@ static void extent_save(struct btree *b, struct btree_node_iter *iter, bkey_start_pos(&_ur)) ?: (r).k - (l).k; \ }) -static inline void extent_sort_sift(struct btree_node_iter *iter, +static inline void extent_sort_sift(struct btree_node_iter_large *iter, struct btree *b, size_t i) { heap_sift_down(iter, i, extent_sort_cmp); } -static inline void extent_sort_next(struct btree_node_iter *iter, +static inline void extent_sort_next(struct btree_node_iter_large *iter, struct btree *b, struct btree_node_iter_set *i) { @@ -938,7 +938,7 @@ static void extent_sort_append(struct bch_fs *c, struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst, struct btree *b, - struct btree_node_iter *iter) + struct btree_node_iter_large *iter) { struct bkey_format *f = &b->format; struct btree_node_iter_set *_l = iter->data, *_r; @@ -951,7 +951,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, heap_resort(iter, extent_sort_cmp); - while (!bch2_btree_node_iter_end(iter)) { + while (!bch2_btree_node_iter_large_end(iter)) { lk = __btree_node_offset_to_key(b, _l->k); if (iter->used == 1) { diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 83c0f24d..1ce0d38d 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -8,6 +8,7 @@ struct bch_fs; struct journal_res; struct btree_node_iter; +struct btree_node_iter_large; struct btree_insert; struct btree_insert_entry; struct extent_insert_hook; @@ -16,11 +17,11 @@ union bch_extent_crc; struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *, struct btree *, - struct btree_node_iter *); + struct btree_node_iter_large *); struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *, struct btree *, - struct btree_node_iter *); + struct btree_node_iter_large *); extern const struct bkey_ops bch2_bkey_btree_ops; extern const struct bkey_ops bch2_bkey_extent_ops; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 46cffc5c..cb90738c 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -1193,9 +1193,6 @@ static void bch2_writepage_io_done(struct closure *cl) struct bio_vec *bvec; unsigned i; - atomic_sub(bio->bi_vcnt, &c->writeback_pages); - wake_up(&c->writeback_wait); - if (io->op.op.error) { bio_for_each_segment_all(bvec, bio, i) SetPageError(bvec->bv_page); @@ -1232,11 +1229,8 @@ static void bch2_writepage_io_done(struct closure *cl) static void bch2_writepage_do_io(struct bch_writepage_state *w) { struct bch_writepage_io *io = w->io; - struct bio *bio = &io->op.op.wbio.bio; w->io = NULL; - atomic_add(bio->bi_vcnt, &io->op.op.c->writeback_pages); - closure_call(&io->op.op.cl, bch2_write, NULL, &io->cl); continue_at(&io->cl, bch2_writepage_io_done, NULL); } @@ -1270,11 +1264,13 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, op->wbio.bio.bi_iter.bi_sector = offset; } -static int __bch2_writepage(struct bch_fs *c, struct page *page, +static int __bch2_writepage(struct page *page, struct writeback_control *wbc, - struct bch_writepage_state *w) + void *data) { struct bch_inode_info *inode = to_bch_ei(page->mapping->host); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_writepage_state *w = data; struct bch_page_state new, old; unsigned offset; loff_t i_size = i_size_read(&inode->v); @@ -1318,6 +1314,10 @@ do_io: new.dirty_sectors = 0; }); + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + unlock_page(page); + if (w->io && (w->io->op.op.res.nr_replicas != new.nr_replicas || !bio_can_add_page_contig(&w->io->op.op.wbio.bio, page))) @@ -1334,16 +1334,11 @@ do_io: if (old.reserved) w->io->op.op.res.sectors += old.reservation_replicas * PAGE_SECTORS; - /* while page is locked: */ w->io->op.new_i_size = i_size; if (wbc->sync_mode == WB_SYNC_ALL) w->io->op.op.wbio.bio.bi_opf |= REQ_SYNC; - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - unlock_page(page); - return 0; } @@ -1352,147 +1347,14 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc struct bch_fs *c = mapping->host->i_sb->s_fs_info; struct bch_writepage_state w = bch_writepage_state_init(c, to_bch_ei(mapping->host)); - struct pagecache_iter iter; - struct page *page; - int ret = 0; - int done = 0; - pgoff_t uninitialized_var(writeback_index); - pgoff_t index; - pgoff_t end; /* Inclusive */ - pgoff_t done_index; - int cycled; - int range_whole = 0; - int tag; - - if (wbc->range_cyclic) { - writeback_index = mapping->writeback_index; /* prev offset */ - index = writeback_index; - if (index == 0) - cycled = 1; - else - cycled = 0; - end = -1; - } else { - index = wbc->range_start >> PAGE_SHIFT; - end = wbc->range_end >> PAGE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - cycled = 1; /* ignore range_cyclic tests */ - } - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) - tag = PAGECACHE_TAG_TOWRITE; - else - tag = PAGECACHE_TAG_DIRTY; -retry: - if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) - tag_pages_for_writeback(mapping, index, end); - - done_index = index; -get_pages: - for_each_pagecache_tag(&iter, mapping, tag, index, end, page) { - done_index = page->index; - - if (w.io && - !bio_can_add_page_contig(&w.io->op.op.wbio.bio, page)) - bch2_writepage_do_io(&w); - - if (!w.io && - atomic_read(&c->writeback_pages) >= - c->writeback_pages_max) { - /* don't sleep with pages pinned: */ - pagecache_iter_release(&iter); - - __wait_event(c->writeback_wait, - atomic_read(&c->writeback_pages) < - c->writeback_pages_max); - goto get_pages; - } - - lock_page(page); - - /* - * Page truncated or invalidated. We can freely skip it - * then, even for data integrity operations: the page - * has disappeared concurrently, so there could be no - * real expectation of this data interity operation - * even if there is now a new, dirty page at the same - * pagecache address. - */ - if (unlikely(page->mapping != mapping)) { -continue_unlock: - unlock_page(page); - continue; - } - - if (!PageDirty(page)) { - /* someone wrote it for us */ - goto continue_unlock; - } - - if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - else - goto continue_unlock; - } - - BUG_ON(PageWriteback(page)); - if (!clear_page_dirty_for_io(page)) - goto continue_unlock; - - trace_wbc_writepage(wbc, inode_to_bdi(mapping->host)); - ret = __bch2_writepage(c, page, wbc, &w); - if (unlikely(ret)) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(page); - ret = 0; - } else { - /* - * done_index is set past this page, - * so media errors will not choke - * background writeout for the entire - * file. This has consequences for - * range_cyclic semantics (ie. it may - * not be suitable for data integrity - * writeout). - */ - done_index = page->index + 1; - done = 1; - break; - } - } - - /* - * We stop writing back only if we are not doing - * integrity sync. In case of integrity sync we have to - * keep going until we have written all the pages - * we tagged for writeback prior to entering this loop. - */ - if (--wbc->nr_to_write <= 0 && - wbc->sync_mode == WB_SYNC_NONE) { - done = 1; - break; - } - } - pagecache_iter_release(&iter); + struct blk_plug plug; + int ret; + blk_start_plug(&plug); + ret = write_cache_pages(mapping, wbc, __bch2_writepage, &w); if (w.io) bch2_writepage_do_io(&w); - - if (!cycled && !done) { - /* - * range_cyclic: - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - cycled = 1; - index = 0; - end = writeback_index - 1; - goto retry; - } - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = done_index; - + blk_finish_plug(&plug); return ret; } @@ -1503,7 +1365,7 @@ int bch2_writepage(struct page *page, struct writeback_control *wbc) bch_writepage_state_init(c, to_bch_ei(page->mapping->host)); int ret; - ret = __bch2_writepage(c, page, wbc, &w); + ret = __bch2_writepage(page, wbc, &w); if (w.io) bch2_writepage_do_io(&w); diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 8ce17452..ea8a6282 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -2964,7 +2964,7 @@ int bch2_fs_journal_init(struct journal *j) j->buf[0].size = JOURNAL_ENTRY_SIZE_MIN; j->buf[1].size = JOURNAL_ENTRY_SIZE_MIN; - j->write_delay_ms = 100; + j->write_delay_ms = 1000; j->reclaim_delay_ms = 100; bkey_extent_init(&j->key); diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index ad56e039..2aa58b55 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -106,8 +106,8 @@ static bool have_copygc_reserve(struct bch_dev *ca) bool ret; spin_lock(&ca->freelist_lock); - ret = fifo_used(&ca->free[RESERVE_MOVINGGC]) >= - COPYGC_BUCKETS_PER_ITER(ca); + ret = fifo_full(&ca->free[RESERVE_MOVINGGC]) || + ca->allocator_blocked; spin_unlock(&ca->freelist_lock); return ret; diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index 3811de72..2fa0719f 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -243,7 +243,7 @@ static inline bool dev_in_target(struct bch_dev *ca, unsigned target) case TARGET_DEV: return ca->dev_idx == t.dev; case TARGET_GROUP: - return ca->mi.group && ca->mi.group == t.group; + return ca->mi.group && ca->mi.group - 1 == t.group; default: BUG(); } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index abb97128..77670ea6 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -551,9 +551,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) seqcount_init(&c->gc_pos_lock); - init_waitqueue_head(&c->writeback_wait); - c->writeback_pages_max = (256 << 10) / PAGE_SIZE; - c->copy_gc_enabled = 1; c->rebalance_enabled = 1; c->rebalance_percent = 10; @@ -589,7 +586,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (bch2_fs_init_fault("fs_alloc")) goto err; - iter_size = (btree_blocks(c) + 1) * 2 * + iter_size = sizeof(struct btree_node_iter_large) + + (btree_blocks(c) + 1) * 2 * sizeof(struct btree_node_iter_set); if (!(c->wq = alloc_workqueue("bcachefs", diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index e42bc1da..601e2707 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -164,8 +164,6 @@ read_attribute(extent_migrate_raced); rw_attribute(journal_write_delay_ms); rw_attribute(journal_reclaim_delay_ms); -rw_attribute(writeback_pages_max); - rw_attribute(discard); rw_attribute(cache_replacement_policy); rw_attribute(group); @@ -311,8 +309,6 @@ SHOW(bch2_fs) sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms); sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); - sysfs_print(writeback_pages_max, c->writeback_pages_max); - sysfs_print(block_size, block_bytes(c)); sysfs_print(btree_node_size, btree_bytes(c)); sysfs_hprint(btree_cache_size, bch2_btree_cache_size(c)); @@ -373,9 +369,6 @@ STORE(__bch2_fs) sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms); sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms); - if (attr == &sysfs_writeback_pages_max) - c->writeback_pages_max = strtoul_restrict_or_return(buf, 1, UINT_MAX); - if (attr == &sysfs_btree_gc_periodic) { ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic) ?: (ssize_t) size; @@ -465,8 +458,6 @@ struct attribute *bch2_fs_files[] = { &sysfs_journal_write_delay_ms, &sysfs_journal_reclaim_delay_ms, - &sysfs_writeback_pages_max, - &sysfs_rebalance_percent, &sysfs_compression_stats, @@ -913,7 +904,15 @@ STORE(bch2_dev) } if (attr == &sysfs_group) { - int ret = bch2_dev_group_set(c, ca, buf); + char *tmp; + int ret; + + tmp = kstrdup(buf, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + ret = bch2_dev_group_set(c, ca, strim(tmp)); + kfree(tmp); if (ret) return ret; } diff --git a/libbcachefs/util.h b/libbcachefs/util.h index d475f986..cc89da1f 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -181,15 +181,19 @@ do { \ } \ } while (0) -#define heap_add(h, new, cmp) \ +#define __heap_add(h, d, cmp) \ +do { \ + size_t _i = (h)->used++; \ + (h)->data[_i] = d; \ + \ + heap_sift_up(h, _i, cmp); \ +} while (0) + +#define heap_add(h, d, cmp) \ ({ \ bool _r = !heap_full(h); \ - if (_r) { \ - size_t _i = (h)->used++; \ - (h)->data[_i] = new; \ - \ - heap_sift_up(h, _i, cmp); \ - } \ + if (_r) \ + __heap_add(h, d, cmp); \ _r; \ })