diff --git a/.bcachefs_revision b/.bcachefs_revision index b4ec3188..48cf256f 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -d7dbddc4504c1bf509f8eb5818b5042342dc9ed1 +a9f14c773fb122a4b283fc7b79d9f98703a18890 diff --git a/libbcachefs.c b/libbcachefs.c index 968748af..98f058d7 100644 --- a/libbcachefs.c +++ b/libbcachefs.c @@ -545,6 +545,26 @@ static void bch2_sb_print_crypt(struct bch_sb *sb, struct bch_sb_field *f, BCH_KDF_SCRYPT_P(crypt)); } +static void bch2_sb_print_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f, + enum units units) +{ + struct bch_sb_field_replicas_v0 *replicas = field_to_type(f, replicas_v0); + struct bch_replicas_entry_v0 *e; + unsigned i; + + for_each_replicas_entry(replicas, e) { + printf_pad(32, " %s:", bch2_data_types[e->data_type]); + + putchar('['); + for (i = 0; i < e->nr_devs; i++) { + if (i) + putchar(' '); + printf("%u", e->devs[i]); + } + printf("]\n"); + } +} + static void bch2_sb_print_replicas(struct bch_sb *sb, struct bch_sb_field *f, enum units units) { @@ -553,7 +573,10 @@ static void bch2_sb_print_replicas(struct bch_sb *sb, struct bch_sb_field *f, unsigned i; for_each_replicas_entry(replicas, e) { - printf_pad(32, " %s:", bch2_data_types[e->data_type]); + printf_pad(32, " %s: %u/%u", + bch2_data_types[e->data_type], + e->nr_required, + e->nr_devs); putchar('['); for (i = 0; i < e->nr_devs; i++) { diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 7ad080bf..56fef9e4 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -883,10 +883,11 @@ struct bch_sb_field { x(journal, 0) \ x(members, 1) \ x(crypt, 2) \ - x(replicas, 3) \ + x(replicas_v0, 3) \ x(quota, 4) \ x(disk_groups, 5) \ - x(clean, 6) + x(clean, 6) \ + x(replicas, 7) enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, @@ -1012,16 +1013,28 @@ enum bch_data_type { BCH_DATA_NR = 6, }; -struct bch_replicas_entry { +struct bch_replicas_entry_v0 { __u8 data_type; __u8 nr_devs; __u8 devs[0]; -}; +} __attribute__((packed)); + +struct bch_sb_field_replicas_v0 { + struct bch_sb_field field; + struct bch_replicas_entry_v0 entries[0]; +} __attribute__((packed, aligned(8))); + +struct bch_replicas_entry { + __u8 data_type; + __u8 nr_devs; + __u8 nr_required; + __u8 devs[0]; +} __attribute__((packed)); struct bch_sb_field_replicas { struct bch_sb_field field; struct bch_replicas_entry entries[0]; -}; +} __attribute__((packed, aligned(8))); /* BCH_SB_FIELD_quota: */ @@ -1227,7 +1240,8 @@ enum bch_sb_features { BCH_FEATURE_LZ4 = 0, BCH_FEATURE_GZIP = 1, BCH_FEATURE_ZSTD = 2, - BCH_FEATURE_ATOMIC_NLINK = 3, + BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */ + BCH_FEATURE_NR, }; /* options: */ diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index b3c69da9..6b67da90 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -119,86 +119,105 @@ static bool bkey_type_needs_gc(enum bkey_type type) } } -u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k) +static void ptr_gen_recalc_oldest(struct bch_fs *c, + const struct bch_extent_ptr *ptr, + u8 *max_stale) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + size_t b = PTR_BUCKET_NR(ca, ptr); + + if (gen_after(ca->oldest_gens[b], ptr->gen)) + ca->oldest_gens[b] = ptr->gen; + + *max_stale = max(*max_stale, ptr_stale(ca, ptr)); +} + +static u8 ptr_gens_recalc_oldest(struct bch_fs *c, + enum bkey_type type, + struct bkey_s_c k) { const struct bch_extent_ptr *ptr; u8 max_stale = 0; - if (bkey_extent_is_data(k.k)) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + switch (type) { + case BKEY_TYPE_BTREE: + case BKEY_TYPE_EXTENTS: + switch (k.k->type) { + case BCH_EXTENT: + case BCH_EXTENT_CACHED: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - extent_for_each_ptr(e, ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - size_t b = PTR_BUCKET_NR(ca, ptr); - - if (gen_after(ca->oldest_gens[b], ptr->gen)) - ca->oldest_gens[b] = ptr->gen; - - max_stale = max(max_stale, ptr_stale(ca, ptr)); + extent_for_each_ptr(e, ptr) + ptr_gen_recalc_oldest(c, ptr, &max_stale); + break; } + } + break; + default: + break; } return max_stale; } -static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +static int ptr_gen_check(struct bch_fs *c, + enum bkey_type type, + const struct bch_extent_ptr *ptr) { - enum bch_data_type data_type = type == BKEY_TYPE_BTREE - ? BCH_DATA_BTREE : BCH_DATA_USER; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + size_t b = PTR_BUCKET_NR(ca, ptr); + struct bucket *g = PTR_BUCKET(ca, ptr); int ret = 0; - BUG_ON(journal_seq_verify(c) && - k.k->version.lo > journal_cur_seq(&c->journal)); - - if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || - fsck_err_on(!bch2_bkey_replicas_marked(c, type, k), c, - "superblock not marked as containing replicas (type %u)", - data_type)) { - ret = bch2_mark_bkey_replicas(c, type, k); - if (ret) - return ret; + if (mustfix_fsck_err_on(!g->mark.gen_valid, c, + "found ptr with missing gen in alloc btree,\n" + "type %u gen %u", + type, ptr->gen)) { + g->_mark.gen = ptr->gen; + g->_mark.gen_valid = 1; + set_bit(b, ca->buckets_dirty); } - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const struct bch_extent_ptr *ptr; + if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, + "%u ptr gen in the future: %u > %u", + type, ptr->gen, g->mark.gen)) { + g->_mark.gen = ptr->gen; + g->_mark.gen_valid = 1; + set_bit(b, ca->buckets_dirty); + set_bit(BCH_FS_FIXED_GENS, &c->flags); + } +fsck_err: + return ret; +} - extent_for_each_ptr(e, ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - size_t b = PTR_BUCKET_NR(ca, ptr); - struct bucket *g = PTR_BUCKET(ca, ptr); +static int ptr_gens_check(struct bch_fs *c, enum bkey_type type, + struct bkey_s_c k) +{ + const struct bch_extent_ptr *ptr; + int ret = 0; + + switch (type) { + case BKEY_TYPE_BTREE: + case BKEY_TYPE_EXTENTS: + switch (k.k->type) { + case BCH_EXTENT: + case BCH_EXTENT_CACHED: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + + extent_for_each_ptr(e, ptr) { + ret = ptr_gen_check(c, type, ptr); + if (ret) + return ret; - if (mustfix_fsck_err_on(!g->mark.gen_valid, c, - "found ptr with missing gen in alloc btree,\n" - "type %s gen %u", - bch2_data_types[data_type], - ptr->gen)) { - g->_mark.gen = ptr->gen; - g->_mark.gen_valid = 1; - set_bit(b, ca->buckets_dirty); } - - if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, - "%s ptr gen in the future: %u > %u", - bch2_data_types[data_type], - ptr->gen, g->mark.gen)) { - g->_mark.gen = ptr->gen; - g->_mark.gen_valid = 1; - set_bit(b, ca->buckets_dirty); - set_bit(BCH_FS_FIXED_GENS, &c->flags); - } - + break; + } } break; - } + default: + break; } - if (k.k->version.lo > atomic64_read(&c->key_version)) - atomic64_set(&c->key_version, k.k->version.lo); -fsck_err: return ret; } @@ -215,31 +234,32 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type, (initial ? BCH_BUCKET_MARK_NOATOMIC : 0); int ret = 0; - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - if (initial) { - ret = bch2_btree_mark_ptrs_initial(c, type, k); - if (ret < 0) + if (initial) { + BUG_ON(journal_seq_verify(c) && + k.k->version.lo > journal_cur_seq(&c->journal)); + + if (k.k->version.lo > atomic64_read(&c->key_version)) + atomic64_set(&c->key_version, k.k->version.lo); + + if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || + fsck_err_on(!bch2_bkey_replicas_marked(c, type, k, + false), c, + "superblock not marked as containing replicas (type %u)", + type)) { + ret = bch2_mark_bkey_replicas(c, type, k); + if (ret) return ret; } - break; - default: - break; + + ret = ptr_gens_check(c, type, k); + if (ret) + return ret; } - bch2_mark_key(c, type, k, true, k.k->size, - pos, NULL, 0, flags); - - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - ret = bch2_btree_key_recalc_oldest_gen(c, k); - break; - default: - break; - } + bch2_mark_key(c, type, k, true, k.k->size, pos, NULL, 0, flags); + ret = ptr_gens_recalc_oldest(c, type, k); +fsck_err: return ret; } diff --git a/libbcachefs/btree_gc.h b/libbcachefs/btree_gc.h index f9225af2..101a6a89 100644 --- a/libbcachefs/btree_gc.h +++ b/libbcachefs/btree_gc.h @@ -10,7 +10,6 @@ void bch2_gc(struct bch_fs *); void bch2_gc_thread_stop(struct bch_fs *); int bch2_gc_thread_start(struct bch_fs *); int bch2_initial_gc(struct bch_fs *, struct list_head *); -u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c); void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned); /* diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 44349159..a7eda114 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -439,11 +439,11 @@ enum btree_insert_ret { BTREE_INSERT_OK, /* extent spanned multiple leaf nodes: have to traverse to next node: */ BTREE_INSERT_NEED_TRAVERSE, - /* write lock held for too long */ /* leaf node needs to be split */ BTREE_INSERT_BTREE_NODE_FULL, BTREE_INSERT_ENOSPC, BTREE_INSERT_NEED_GC_LOCK, + BTREE_INSERT_NEED_MARK_REPLICAS, }; enum btree_gc_coalesce_fail_reason { diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 33c913f7..288d7ca6 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -5,11 +5,13 @@ #include "btree_io.h" #include "btree_iter.h" #include "btree_locking.h" +#include "buckets.h" #include "debug.h" #include "extents.h" #include "journal.h" #include "journal_reclaim.h" #include "keylist.h" +#include "replicas.h" #include #include @@ -203,6 +205,8 @@ btree_insert_key_leaf(struct btree_insert *trans, int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; + bch2_mark_update(trans, insert); + ret = !btree_node_is_extents(b) ? bch2_insert_fixup_key(trans, insert) : bch2_insert_fixup_extent(trans, insert); @@ -297,8 +301,8 @@ static inline int btree_trans_cmp(struct btree_insert_entry l, static enum btree_insert_ret btree_key_can_insert(struct btree_insert *trans, - struct btree_insert_entry *insert, - unsigned *u64s) + struct btree_insert_entry *insert, + unsigned *u64s) { struct bch_fs *c = trans->c; struct btree *b = insert->iter->l[0].b; @@ -307,6 +311,12 @@ btree_key_can_insert(struct btree_insert *trans, if (unlikely(btree_node_fake(b))) return BTREE_INSERT_BTREE_NODE_FULL; + if (!bch2_bkey_replicas_marked(c, + insert->iter->btree_id, + bkey_i_to_s_c(insert->k), + true)) + return BTREE_INSERT_NEED_MARK_REPLICAS; + ret = !btree_node_is_extents(b) ? BTREE_INSERT_OK : bch2_extent_can_insert(trans, insert, u64s); @@ -323,8 +333,7 @@ btree_key_can_insert(struct btree_insert *trans, * Get journal reservation, take write locks, and attempt to do btree update(s): */ static inline int do_btree_insert_at(struct btree_insert *trans, - struct btree_iter **split, - bool *cycle_gc_lock) + struct btree_insert_entry **stopped_at) { struct bch_fs *c = trans->c; struct btree_insert_entry *i; @@ -368,22 +377,10 @@ static inline int do_btree_insert_at(struct btree_insert *trans, u64s = 0; u64s += i->k->k.u64s; - switch (btree_key_can_insert(trans, i, &u64s)) { - case BTREE_INSERT_OK: - break; - case BTREE_INSERT_BTREE_NODE_FULL: - ret = -EINTR; - *split = i->iter; + ret = btree_key_can_insert(trans, i, &u64s); + if (ret) { + *stopped_at = i; goto out; - case BTREE_INSERT_ENOSPC: - ret = -ENOSPC; - goto out; - case BTREE_INSERT_NEED_GC_LOCK: - ret = -EINTR; - *cycle_gc_lock = true; - goto out; - default: - BUG(); } } @@ -441,8 +438,7 @@ int __bch2_btree_insert_at(struct btree_insert *trans) { struct bch_fs *c = trans->c; struct btree_insert_entry *i; - struct btree_iter *linked, *split = NULL; - bool cycle_gc_lock = false; + struct btree_iter *linked; unsigned flags; int ret; @@ -462,9 +458,6 @@ int __bch2_btree_insert_at(struct btree_insert *trans) if (unlikely(!percpu_ref_tryget(&c->writes))) return -EROFS; retry: - split = NULL; - cycle_gc_lock = false; - trans_for_each_entry(trans, i) { unsigned old_locks_want = i->iter->locks_want; unsigned old_uptodate = i->iter->uptodate; @@ -482,7 +475,7 @@ retry: } } - ret = do_btree_insert_at(trans, &split, &cycle_gc_lock); + ret = do_btree_insert_at(trans, &i); if (unlikely(ret)) goto err; @@ -517,8 +510,9 @@ err: if (!trans->did_work) flags &= ~BTREE_INSERT_NOUNLOCK; - if (split) { - ret = bch2_btree_split_leaf(c, split, flags); + switch (ret) { + case BTREE_INSERT_BTREE_NODE_FULL: + ret = bch2_btree_split_leaf(c, i->iter, flags); /* * if the split succeeded without dropping locks the insert will @@ -543,9 +537,10 @@ err: trans_restart(" (split)"); ret = -EINTR; } - } + break; + case BTREE_INSERT_NEED_GC_LOCK: + ret = -EINTR; - if (cycle_gc_lock) { if (!down_read_trylock(&c->gc_lock)) { if (flags & BTREE_INSERT_NOUNLOCK) goto out; @@ -554,6 +549,24 @@ err: down_read(&c->gc_lock); } up_read(&c->gc_lock); + break; + case BTREE_INSERT_ENOSPC: + ret = -ENOSPC; + break; + case BTREE_INSERT_NEED_MARK_REPLICAS: + if (flags & BTREE_INSERT_NOUNLOCK) { + ret = -EINTR; + goto out; + } + + bch2_btree_iter_unlock(trans->entries[0].iter); + ret = bch2_mark_bkey_replicas(c, i->iter->btree_id, + bkey_i_to_s_c(i->k)) + ?: -EINTR; + break; + default: + BUG_ON(ret >= 0); + break; } if (ret == -EINTR) { diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index c68683eb..86d57f3b 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -64,7 +64,9 @@ #include "bcachefs.h" #include "alloc_background.h" +#include "bset.h" #include "btree_gc.h" +#include "btree_update.h" #include "buckets.h" #include "error.h" #include "movinggc.h" @@ -345,7 +347,8 @@ void bch2_fs_usage_apply(struct bch_fs *c, * reservation: */ should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0); - if (WARN_ON(should_not_have_added > 0)) { + if (WARN_ONCE(should_not_have_added > 0, + "disk usage increased without a reservation")) { atomic64_sub(should_not_have_added, &c->sectors_available); added -= should_not_have_added; } @@ -636,9 +639,6 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, struct bch_fs_usage *stats, u64 journal_seq, unsigned flags) { - unsigned replicas = bch2_extent_nr_dirty_ptrs(k); - - BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas)); BUG_ON(!sectors); switch (k.k->type) { @@ -647,38 +647,43 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; + s64 cached_sectors = 0; + s64 dirty_sectors = 0; + unsigned replicas = 0; extent_for_each_ptr_decode(e, p, entry) { s64 disk_sectors = ptr_disk_sectors(e, p, sectors); - /* - * fs level usage (which determines free space) is in - * uncompressed sectors, until copygc + compression is - * sorted out: - * - * note also that we always update @fs_usage, even when - * we otherwise wouldn't do anything because gc is - * running - this is because the caller still needs to - * account w.r.t. its disk reservation. It is caller's - * responsibility to not apply @fs_usage if gc is in - * progress. - */ - stats->replicas - [!p.ptr.cached && replicas ? replicas - 1 : 0].data - [!p.ptr.cached ? data_type : BCH_DATA_CACHED] += - disk_sectors; - bch2_mark_pointer(c, e, p, disk_sectors, data_type, stats, journal_seq, flags); + + if (!p.ptr.cached) + replicas++; + + if (p.ptr.cached) + cached_sectors += disk_sectors; + else + dirty_sectors += disk_sectors; } + + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + + stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; + stats->replicas[replicas - 1].data[data_type] += dirty_sectors; break; } - case BCH_RESERVATION: - if (replicas) - stats->replicas[replicas - 1].persistent_reserved += - sectors * replicas; + case BCH_RESERVATION: { + unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; + + sectors *= replicas; + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + + stats->replicas[replicas - 1].persistent_reserved += sectors; break; } + } } void bch2_mark_key(struct bch_fs *c, @@ -742,6 +747,76 @@ void bch2_mark_key(struct bch_fs *c, percpu_up_read_preempt_enable(&c->usage_lock); } +void bch2_mark_update(struct btree_insert *trans, + struct btree_insert_entry *insert) +{ + struct bch_fs *c = trans->c; + struct btree_iter *iter = insert->iter; + struct btree *b = iter->l[0].b; + struct btree_node_iter node_iter = iter->l[0].iter; + struct bch_fs_usage stats = { 0 }; + struct gc_pos pos = gc_pos_btree_node(b); + struct bkey_packed *_k; + + if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) + bch2_mark_key(c, btree_node_type(b), bkey_i_to_s_c(insert->k), + true, + bpos_min(insert->k->k.p, b->key.k.p).offset - + bkey_start_offset(&insert->k->k), + pos, &stats, trans->journal_res.seq, 0); + + while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, + KEY_TYPE_DISCARD))) { + struct bkey unpacked; + struct bkey_s_c k; + s64 sectors = 0; + + k = bkey_disassemble(b, _k, &unpacked); + + if (btree_node_is_extents(b) + ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0 + : bkey_cmp(insert->k->k.p, k.k->p)) + break; + + if (btree_node_is_extents(b)) { + switch (bch2_extent_overlap(&insert->k->k, k.k)) { + case BCH_EXTENT_OVERLAP_ALL: + sectors = -((s64) k.k->size); + break; + case BCH_EXTENT_OVERLAP_BACK: + sectors = bkey_start_offset(&insert->k->k) - + k.k->p.offset; + break; + case BCH_EXTENT_OVERLAP_FRONT: + sectors = bkey_start_offset(k.k) - + insert->k->k.p.offset; + break; + case BCH_EXTENT_OVERLAP_MIDDLE: + sectors = k.k->p.offset - insert->k->k.p.offset; + BUG_ON(sectors <= 0); + + bch2_mark_key(c, btree_node_type(b), k, + true, sectors, + pos, &stats, trans->journal_res.seq, 0); + + sectors = bkey_start_offset(&insert->k->k) - + k.k->p.offset; + break; + } + + BUG_ON(sectors >= 0); + } + + bch2_mark_key(c, btree_node_type(b), k, + false, sectors, + pos, &stats, trans->journal_res.seq, 0); + + bch2_btree_node_iter_advance(&node_iter, b); + } + + bch2_fs_usage_apply(c, &stats, trans->disk_res, pos); +} + /* Disk reservations: */ static u64 __recalc_sectors_available(struct bch_fs *c) diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 17b82cd0..e84247d5 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -212,6 +212,7 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c, bool, s64, struct gc_pos, struct bch_fs_usage *, u64, unsigned); +void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *); void bch2_recalc_sectors_available(struct bch_fs *); diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 72301eab..a3ec1cc9 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -675,7 +675,8 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, } if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) { + !bch2_bkey_replicas_marked(c, btree_node_type(b), + e.s_c, false)) { bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k); bch2_fs_bug(c, "btree key bad (replicas not marked in superblock):\n%s", @@ -1009,7 +1010,6 @@ struct extent_insert_state { struct btree_insert *trans; struct btree_insert_entry *insert; struct bpos committed; - struct bch_fs_usage stats; /* for deleting: */ struct bkey_i whiteout; @@ -1018,54 +1018,6 @@ struct extent_insert_state { bool deleting; }; -static void bch2_add_sectors(struct extent_insert_state *s, - struct bkey_s_c k, u64 offset, s64 sectors) -{ - struct bch_fs *c = s->trans->c; - struct btree *b = s->insert->iter->l[0].b; - - EBUG_ON(bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0); - - if (!sectors) - return; - - bch2_mark_key(c, BKEY_TYPE_EXTENTS, k, sectors > 0, sectors, - gc_pos_btree_node(b), &s->stats, - s->trans->journal_res.seq, 0); -} - -static void bch2_subtract_sectors(struct extent_insert_state *s, - struct bkey_s_c k, u64 offset, s64 sectors) -{ - bch2_add_sectors(s, k, offset, -sectors); -} - -/* These wrappers subtract exactly the sectors that we're removing from @k */ -static void bch2_cut_subtract_back(struct extent_insert_state *s, - struct bpos where, struct bkey_s k) -{ - bch2_subtract_sectors(s, k.s_c, where.offset, - k.k->p.offset - where.offset); - bch2_cut_back(where, k.k); -} - -static void bch2_cut_subtract_front(struct extent_insert_state *s, - struct bpos where, struct bkey_s k) -{ - bch2_subtract_sectors(s, k.s_c, bkey_start_offset(k.k), - where.offset - bkey_start_offset(k.k)); - __bch2_cut_front(where, k); -} - -static void bch2_drop_subtract(struct extent_insert_state *s, struct bkey_s k) -{ - if (k.k->size) - bch2_subtract_sectors(s, k.s_c, - bkey_start_offset(k.k), k.k->size); - k.k->size = 0; - k.k->type = KEY_TYPE_DELETED; -} - static bool bch2_extent_merge_inline(struct bch_fs *, struct btree_iter *, struct bkey_packed *, @@ -1166,11 +1118,7 @@ static void extent_insert_committed(struct extent_insert_state *s) if (s->deleting) split.k.k.type = KEY_TYPE_DISCARD; - if (!(s->trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) - bch2_cut_subtract_back(s, s->committed, - bkey_i_to_s(&split.k)); - else - bch2_cut_back(s->committed, &split.k.k); + bch2_cut_back(s->committed, &split.k.k); if (!bkey_cmp(s->committed, iter->pos)) return; @@ -1290,7 +1238,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, switch (overlap) { case BCH_EXTENT_OVERLAP_FRONT: /* insert overlaps with start of k: */ - bch2_cut_subtract_front(s, insert->k.p, k); + __bch2_cut_front(insert->k.p, k); BUG_ON(bkey_deleted(k.k)); extent_save(l->b, _k, k.k); verify_modified_extent(iter, _k); @@ -1298,7 +1246,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, case BCH_EXTENT_OVERLAP_BACK: /* insert overlaps with end of k: */ - bch2_cut_subtract_back(s, bkey_start_pos(&insert->k), k); + bch2_cut_back(bkey_start_pos(&insert->k), k.k); BUG_ON(bkey_deleted(k.k)); extent_save(l->b, _k, k.k); @@ -1318,7 +1266,8 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, if (!bkey_whiteout(k.k)) btree_account_key_drop(l->b, _k); - bch2_drop_subtract(s, k); + k.k->size = 0; + k.k->type = KEY_TYPE_DELETED; if (_k >= btree_bset_last(l->b)->start) { unsigned u64s = _k->u64s; @@ -1358,14 +1307,11 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, bch2_cut_back(bkey_start_pos(&insert->k), &split.k.k); BUG_ON(bkey_deleted(&split.k.k)); - bch2_cut_subtract_front(s, insert->k.p, k); + __bch2_cut_front(insert->k.p, k); BUG_ON(bkey_deleted(k.k)); extent_save(l->b, _k, k.k); verify_modified_extent(iter, _k); - bch2_add_sectors(s, bkey_i_to_s_c(&split.k), - bkey_start_offset(&split.k.k), - split.k.k.size); extent_bset_insert(c, iter, &split.k); break; } @@ -1414,8 +1360,6 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s) !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { if (!bkey_whiteout(k.k)) { btree_account_key_drop(l->b, _k); - bch2_subtract_sectors(s, k.s_c, - bkey_start_offset(k.k), k.k->size); _k->type = KEY_TYPE_DISCARD; reserve_whiteout(l->b, _k); } @@ -1505,7 +1449,6 @@ enum btree_insert_ret bch2_insert_fixup_extent(struct btree_insert *trans, struct btree_insert_entry *insert) { - struct bch_fs *c = trans->c; struct btree_iter *iter = insert->iter; struct btree *b = iter->l[0].b; struct extent_insert_state s = { @@ -1530,19 +1473,10 @@ bch2_insert_fixup_extent(struct btree_insert *trans, */ EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k))); - if (!s.deleting && - !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) - bch2_add_sectors(&s, bkey_i_to_s_c(insert->k), - bkey_start_offset(&insert->k->k), - insert->k->k.size); - __bch2_insert_fixup_extent(&s); extent_insert_committed(&s); - bch2_fs_usage_apply(c, &s.stats, trans->disk_res, - gc_pos_btree_node(b)); - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k))); EBUG_ON(bkey_cmp(iter->pos, s.committed)); @@ -1702,7 +1636,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, } if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) { + !bch2_bkey_replicas_marked(c, btree_node_type(b), + e.s_c, false)) { bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), e.s_c); bch2_fs_bug(c, diff --git a/libbcachefs/io.c b/libbcachefs/io.c index eceb4865..34cab253 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -22,7 +22,6 @@ #include "keylist.h" #include "move.h" #include "rebalance.h" -#include "replicas.h" #include "super.h" #include "super-io.h" @@ -319,13 +318,6 @@ static void __bch2_write_index(struct bch_write_op *op) goto err; } - if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, - e.s_c); - if (ret) - goto err; - } - dst = bkey_next(dst); } diff --git a/libbcachefs/io.h b/libbcachefs/io.h index 5bd5f846..3ca77974 100644 --- a/libbcachefs/io.h +++ b/libbcachefs/io.h @@ -30,10 +30,9 @@ enum bch_write_flags { BCH_WRITE_PAGES_OWNED = (1 << 5), BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 6), BCH_WRITE_NOPUT_RESERVATION = (1 << 7), - BCH_WRITE_NOMARK_REPLICAS = (1 << 8), /* Internal: */ - BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 9), + BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 8), }; static inline u64 *op_journal_seq(struct bch_write_op *op) diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 961d8d81..c83e8eb8 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -780,7 +780,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list) if (!degraded && (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || fsck_err_on(!bch2_replicas_marked(c, BCH_DATA_JOURNAL, - i->devs), c, + i->devs, false), c, "superblock not marked as containing replicas (type %u)", BCH_DATA_JOURNAL))) { ret = bch2_mark_replicas(c, BCH_DATA_JOURNAL, i->devs); diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index c0dfe1c6..df4fbae2 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -71,11 +71,6 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) */ bch2_extent_normalize(c, e.s); - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, - bkey_i_to_s_c(&tmp.key)); - if (ret) - break; - iter.pos = bkey_start_pos(&tmp.key.k); ret = bch2_btree_insert_at(c, NULL, NULL, diff --git a/libbcachefs/move.c b/libbcachefs/move.c index e93725bf..885792bd 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -150,11 +150,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op) goto next; } - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, - extent_i_to_s_c(insert).s_c); - if (ret) - break; - ret = bch2_btree_insert_at(c, &op->res, op_journal_seq(op), BTREE_INSERT_ATOMIC| @@ -239,8 +234,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS| BCH_WRITE_PAGES_STABLE| BCH_WRITE_PAGES_OWNED| - BCH_WRITE_DATA_ENCODED| - BCH_WRITE_NOMARK_REPLICAS; + BCH_WRITE_DATA_ENCODED; m->op.nr_replicas = 1; m->op.nr_replicas_required = 1; diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 1dab991b..775d6a66 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -2,6 +2,7 @@ #include #include "bcachefs.h" +#include "compress.h" #include "disk_groups.h" #include "opts.h" #include "super-io.h" @@ -268,6 +269,20 @@ void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c, } } +int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v) +{ + int ret = 0; + + switch (id) { + case Opt_compression: + case Opt_background_compression: + ret = bch2_check_set_has_compressed_data(c, v); + break; + } + + return ret; +} + int bch2_parse_mount_opts(struct bch_opts *opts, char *options) { char *opt, *name, *val; diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index c65a8d13..bdf1e4fb 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -265,6 +265,7 @@ int bch2_opt_parse(struct bch_fs *, const struct bch_option *, const char *, u64 void bch2_opt_to_text(struct printbuf *, struct bch_fs *, const struct bch_option *, u64, unsigned); +int bch2_opt_check_may_set(struct bch_fs *, int, u64); int bch2_parse_mount_opts(struct bch_opts *, char *); /* inode opts: */ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index f530f202..c5d9dc4e 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -129,7 +129,8 @@ int bch2_fs_recovery(struct bch_fs *c) int ret; mutex_lock(&c->sb_lock); - if (!bch2_sb_get_replicas(c->disk_sb.sb)) { + if (!rcu_dereference_protected(c->replicas, + lockdep_is_held(&c->sb_lock))->nr) { bch_info(c, "building replicas info"); set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags); } diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 4da35d09..a7a4e280 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -44,7 +44,10 @@ static void replicas_entry_to_text(struct printbuf *out, { unsigned i; - pr_buf(out, "%u: [", e->data_type); + pr_buf(out, "%s: %u/%u [", + bch2_data_types[e->data_type], + e->nr_required, + e->nr_devs); for (i = 0; i < e->nr_devs; i++) pr_buf(out, i ? " %u" : "%u", e->devs[i]); @@ -74,6 +77,8 @@ static void extent_to_replicas(struct bkey_s_c k, const union bch_extent_entry *entry; struct extent_ptr_decoded p; + r->nr_required = 1; + extent_for_each_ptr_decode(e, p, entry) if (!p.ptr.cached) r->devs[r->nr_devs++] = p.ptr.dev; @@ -114,6 +119,7 @@ static inline void devlist_to_replicas(struct bch_devs_list devs, e->data_type = data_type; e->nr_devs = 0; + e->nr_required = 1; for (i = 0; i < devs.nr; i++) e->devs[e->nr_devs++] = devs.devs[i]; @@ -153,8 +159,8 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old, return new; } -static bool replicas_has_entry(struct bch_replicas_cpu *r, - struct bch_replicas_entry *search) +static bool __replicas_has_entry(struct bch_replicas_cpu *r, + struct bch_replicas_entry *search) { return replicas_entry_bytes(search) <= r->entry_size && eytzinger0_find(r->entries, r->nr, @@ -162,6 +168,24 @@ static bool replicas_has_entry(struct bch_replicas_cpu *r, memcmp, search) < r->nr; } +static bool replicas_has_entry(struct bch_fs *c, + struct bch_replicas_entry *search, + bool check_gc_replicas) +{ + struct bch_replicas_cpu *r, *gc_r; + bool marked; + + rcu_read_lock(); + r = rcu_dereference(c->replicas); + marked = __replicas_has_entry(r, search) && + (!check_gc_replicas || + likely(!(gc_r = rcu_dereference(c->replicas_gc))) || + __replicas_has_entry(gc_r, search)); + rcu_read_unlock(); + + return marked; +} + noinline static int bch2_mark_replicas_slowpath(struct bch_fs *c, struct bch_replicas_entry *new_entry) @@ -173,7 +197,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, old_gc = rcu_dereference_protected(c->replicas_gc, lockdep_is_held(&c->sb_lock)); - if (old_gc && !replicas_has_entry(old_gc, new_entry)) { + if (old_gc && !__replicas_has_entry(old_gc, new_entry)) { new_gc = cpu_replicas_add_entry(old_gc, new_entry); if (!new_gc) goto err; @@ -181,7 +205,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, old_r = rcu_dereference_protected(c->replicas, lockdep_is_held(&c->sb_lock)); - if (!replicas_has_entry(old_r, new_entry)) { + if (!__replicas_has_entry(old_r, new_entry)) { new_r = cpu_replicas_add_entry(old_r, new_entry); if (!new_r) goto err; @@ -220,17 +244,8 @@ err: static int __bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *devs) { - struct bch_replicas_cpu *r, *gc_r; - bool marked; - - rcu_read_lock(); - r = rcu_dereference(c->replicas); - gc_r = rcu_dereference(c->replicas_gc); - marked = replicas_has_entry(r, devs) && - (!likely(gc_r) || replicas_has_entry(gc_r, devs)); - rcu_read_unlock(); - - return likely(marked) ? 0 + return likely(replicas_has_entry(c, devs, true)) + ? 0 : bch2_mark_replicas_slowpath(c, devs); } @@ -358,14 +373,13 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r) { struct bch_replicas_entry *e, *dst; struct bch_replicas_cpu *cpu_r; - unsigned nr = 0, entry_size = 0; + unsigned nr = 0, entry_size = 0, idx = 0; - if (sb_r) - for_each_replicas_entry(sb_r, e) { - entry_size = max_t(unsigned, entry_size, - replicas_entry_bytes(e)); - nr++; - } + for_each_replicas_entry(sb_r, e) { + entry_size = max_t(unsigned, entry_size, + replicas_entry_bytes(e)); + nr++; + } cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) + nr * entry_size, GFP_NOIO); @@ -375,29 +389,71 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r) cpu_r->nr = nr; cpu_r->entry_size = entry_size; - nr = 0; + for_each_replicas_entry(sb_r, e) { + dst = cpu_replicas_entry(cpu_r, idx++); + memcpy(dst, e, replicas_entry_bytes(e)); + replicas_entry_sort(dst); + } - if (sb_r) - for_each_replicas_entry(sb_r, e) { - dst = cpu_replicas_entry(cpu_r, nr++); - memcpy(dst, e, replicas_entry_bytes(e)); - replicas_entry_sort(dst); - } + return cpu_r; +} + +static struct bch_replicas_cpu * +__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r) +{ + struct bch_replicas_entry_v0 *e; + struct bch_replicas_cpu *cpu_r; + unsigned nr = 0, entry_size = 0, idx = 0; + + for_each_replicas_entry(sb_r, e) { + entry_size = max_t(unsigned, entry_size, + replicas_entry_bytes(e)); + nr++; + } + + entry_size += sizeof(struct bch_replicas_entry) - + sizeof(struct bch_replicas_entry_v0); + + cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) + + nr * entry_size, GFP_NOIO); + if (!cpu_r) + return NULL; + + cpu_r->nr = nr; + cpu_r->entry_size = entry_size; + + for_each_replicas_entry(sb_r, e) { + struct bch_replicas_entry *dst = + cpu_replicas_entry(cpu_r, idx++); + + dst->data_type = e->data_type; + dst->nr_devs = e->nr_devs; + dst->nr_required = 1; + memcpy(dst->devs, e->devs, e->nr_devs); + replicas_entry_sort(dst); + } - bch2_cpu_replicas_sort(cpu_r); return cpu_r; } int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) { - struct bch_sb_field_replicas *sb_r; + struct bch_sb_field_replicas *sb_v1; + struct bch_sb_field_replicas_v0 *sb_v0; struct bch_replicas_cpu *cpu_r, *old_r; - sb_r = bch2_sb_get_replicas(c->disk_sb.sb); - cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r); + if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb))) + cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_v1); + else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb))) + cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0); + else + cpu_r = kzalloc(sizeof(struct bch_replicas_cpu), GFP_NOIO); + if (!cpu_r) return -ENOMEM; + bch2_cpu_replicas_sort(cpu_r); + old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock)); rcu_assign_pointer(c->replicas, cpu_r); if (old_r) @@ -406,23 +462,72 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) return 0; } -static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, - struct bch_replicas_cpu *r) +static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, + struct bch_replicas_cpu *r) { - struct bch_sb_field_replicas *sb_r; - struct bch_replicas_entry *dst, *src; + struct bch_sb_field_replicas_v0 *sb_r; + struct bch_replicas_entry_v0 *dst; + struct bch_replicas_entry *src; size_t bytes; bytes = sizeof(struct bch_sb_field_replicas); for_each_cpu_replicas_entry(r, src) + bytes += replicas_entry_bytes(src) - 1; + + sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb, + DIV_ROUND_UP(bytes, sizeof(u64))); + if (!sb_r) + return -ENOSPC; + + bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); + sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb); + + memset(&sb_r->entries, 0, + vstruct_end(&sb_r->field) - + (void *) &sb_r->entries); + + dst = sb_r->entries; + for_each_cpu_replicas_entry(r, src) { + dst->data_type = src->data_type; + dst->nr_devs = src->nr_devs; + memcpy(dst->devs, src->devs, src->nr_devs); + + dst = replicas_entry_next(dst); + + BUG_ON((void *) dst > vstruct_end(&sb_r->field)); + } + + return 0; +} + +static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, + struct bch_replicas_cpu *r) +{ + struct bch_sb_field_replicas *sb_r; + struct bch_replicas_entry *dst, *src; + bool need_v1 = false; + size_t bytes; + + bytes = sizeof(struct bch_sb_field_replicas); + + for_each_cpu_replicas_entry(r, src) { bytes += replicas_entry_bytes(src); + if (src->nr_required != 1) + need_v1 = true; + } + + if (!need_v1) + return bch2_cpu_replicas_to_sb_replicas_v0(c, r); sb_r = bch2_sb_resize_replicas(&c->disk_sb, DIV_ROUND_UP(bytes, sizeof(u64))); if (!sb_r) return -ENOSPC; + bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); + sb_r = bch2_sb_get_replicas(c->disk_sb.sb); + memset(&sb_r->entries, 0, vstruct_end(&sb_r->field) - (void *) &sb_r->entries); @@ -481,8 +586,10 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi if (!e->nr_devs) goto err; - err = "invalid replicas entry: too many devices"; - if (e->nr_devs >= BCH_REPLICAS_MAX) + err = "invalid replicas entry: bad nr_required"; + if (!e->nr_required || + (e->nr_required > 1 && + e->nr_required >= e->nr_devs)) goto err; err = "invalid replicas entry: invalid device"; @@ -524,14 +631,53 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas = { .to_text = bch2_sb_replicas_to_text, }; +static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f) +{ + struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); + struct bch_sb_field_members *mi = bch2_sb_get_members(sb); + struct bch_replicas_cpu *cpu_r = NULL; + struct bch_replicas_entry_v0 *e; + const char *err; + unsigned i; + + for_each_replicas_entry_v0(sb_r, e) { + err = "invalid replicas entry: invalid data type"; + if (e->data_type >= BCH_DATA_NR) + goto err; + + err = "invalid replicas entry: no devices"; + if (!e->nr_devs) + goto err; + + err = "invalid replicas entry: invalid device"; + for (i = 0; i < e->nr_devs; i++) + if (!bch2_dev_exists(sb, mi, e->devs[i])) + goto err; + } + + err = "cannot allocate memory"; + cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r); + if (!cpu_r) + goto err; + + err = check_dup_replicas_entries(cpu_r); +err: + kfree(cpu_r); + return err; +} + +const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { + .validate = bch2_sb_validate_replicas_v0, +}; + /* Query replicas: */ bool bch2_replicas_marked(struct bch_fs *c, enum bch_data_type data_type, - struct bch_devs_list devs) + struct bch_devs_list devs, + bool check_gc_replicas) { struct bch_replicas_entry_padded search; - bool ret; if (!devs.nr) return true; @@ -540,19 +686,15 @@ bool bch2_replicas_marked(struct bch_fs *c, devlist_to_replicas(devs, data_type, &search.e); - rcu_read_lock(); - ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e); - rcu_read_unlock(); - - return ret; + return replicas_has_entry(c, &search.e, check_gc_replicas); } bool bch2_bkey_replicas_marked(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) + struct bkey_s_c k, + bool check_gc_replicas) { struct bch_replicas_entry_padded search; - bool ret; memset(&search, 0, sizeof(search)); @@ -562,20 +704,16 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c, for (i = 0; i < cached.nr; i++) if (!bch2_replicas_marked(c, BCH_DATA_CACHED, - bch2_dev_list_single(cached.devs[i]))) + bch2_dev_list_single(cached.devs[i]), + check_gc_replicas)) return false; } bkey_to_replicas(type, k, &search.e); - if (!search.e.nr_devs) - return true; - - rcu_read_lock(); - ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e); - rcu_read_unlock(); - - return ret; + return search.e.nr_devs + ? replicas_has_entry(c, &search.e, check_gc_replicas) + : true; } struct replicas_status __bch2_replicas_status(struct bch_fs *c, @@ -590,7 +728,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c, memset(&ret, 0, sizeof(ret)); for (i = 0; i < ARRAY_SIZE(ret.replicas); i++) - ret.replicas[i].nr_online = UINT_MAX; + ret.replicas[i].redundancy = INT_MAX; mi = bch2_sb_get_members(c->disk_sb.sb); rcu_read_lock(); @@ -612,9 +750,9 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c, nr_offline++; } - ret.replicas[e->data_type].nr_online = - min(ret.replicas[e->data_type].nr_online, - nr_online); + ret.replicas[e->data_type].redundancy = + min(ret.replicas[e->data_type].redundancy, + (int) nr_online - (int) e->nr_required); ret.replicas[e->data_type].nr_offline = max(ret.replicas[e->data_type].nr_offline, @@ -623,6 +761,10 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c, rcu_read_unlock(); + for (i = 0; i < ARRAY_SIZE(ret.replicas); i++) + if (ret.replicas[i].redundancy == INT_MAX) + ret.replicas[i].redundancy = 0; + return ret; } @@ -637,7 +779,7 @@ static bool have_enough_devs(struct replicas_status s, bool force_if_lost) { return (!s.replicas[type].nr_offline || force_if_degraded) && - (s.replicas[type].nr_online || force_if_lost); + (s.replicas[type].redundancy >= 0 || force_if_lost); } bool bch2_have_enough_devs(struct replicas_status s, unsigned flags) @@ -653,14 +795,14 @@ bool bch2_have_enough_devs(struct replicas_status s, unsigned flags) flags & BCH_FORCE_IF_DATA_LOST)); } -unsigned bch2_replicas_online(struct bch_fs *c, bool meta) +int bch2_replicas_online(struct bch_fs *c, bool meta) { struct replicas_status s = bch2_replicas_status(c); - return meta - ? min(s.replicas[BCH_DATA_JOURNAL].nr_online, - s.replicas[BCH_DATA_BTREE].nr_online) - : s.replicas[BCH_DATA_USER].nr_online; + return (meta + ? min(s.replicas[BCH_DATA_JOURNAL].redundancy, + s.replicas[BCH_DATA_BTREE].redundancy) + : s.replicas[BCH_DATA_USER].redundancy) + 1; } unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h index 7deca37c..7fee927c 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/replicas.h @@ -4,9 +4,9 @@ #include "replicas_types.h" bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type, - struct bch_devs_list); + struct bch_devs_list, bool); bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type, - struct bkey_s_c); + struct bkey_s_c, bool); int bch2_mark_replicas(struct bch_fs *, enum bch_data_type, struct bch_devs_list); int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type, @@ -16,7 +16,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); struct replicas_status { struct { - unsigned nr_online; + int redundancy; unsigned nr_offline; } replicas[BCH_DATA_NR]; }; @@ -26,7 +26,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *, struct replicas_status bch2_replicas_status(struct bch_fs *); bool bch2_have_enough_devs(struct replicas_status, unsigned); -unsigned bch2_replicas_online(struct bch_fs *, bool); +int bch2_replicas_online(struct bch_fs *, bool); unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *); int bch2_replicas_gc_end(struct bch_fs *, int); @@ -45,8 +45,14 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned); (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\ (_i) = replicas_entry_next(_i)) +#define for_each_replicas_entry_v0(_r, _i) \ + for (_i = (_r)->entries; \ + (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\ + (_i) = replicas_entry_next(_i)) + int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *); extern const struct bch_sb_field_ops bch_sb_field_ops_replicas; +extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0; #endif /* _BCACHEFS_REPLICAS_H */ diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index d1fca0d6..83523572 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -56,8 +56,13 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, void *src, *dst; src = vstruct_end(f); - f->u64s = cpu_to_le32(u64s); - dst = vstruct_end(f); + + if (u64s) { + f->u64s = cpu_to_le32(u64s); + dst = vstruct_end(f); + } else { + dst = f; + } memmove(dst, src, vstruct_end(sb->sb) - src); @@ -67,7 +72,16 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, sb->sb->u64s = cpu_to_le32(sb_u64s); - return f; + return u64s ? f : NULL; +} + +void bch2_sb_field_delete(struct bch_sb_handle *sb, + enum bch_sb_field_type type) +{ + struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type); + + if (f) + __bch2_sb_field_resize(sb, f, 0); } /* Superblock realloc/free: */ @@ -167,7 +181,8 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb, } f = __bch2_sb_field_resize(sb, f, u64s); - f->type = cpu_to_le32(type); + if (f) + f->type = cpu_to_le32(type); return f; } @@ -222,6 +237,10 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) le64_to_cpu(sb->version) > BCH_SB_VERSION_MAX) return"Unsupported superblock version"; + if (sb->features[1] || + (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) + return "Filesystem has incompatible features"; + if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) { SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, 7); SET_BCH_SB_POSIX_ACL(sb, 1); @@ -354,6 +373,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) { struct bch_sb_field *src_f, *dst_f; struct bch_sb *dst = dst_handle->sb; + unsigned i; dst->version = src->version; dst->seq = src->seq; @@ -372,15 +392,17 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) memcpy(dst->features, src->features, sizeof(dst->features)); memcpy(dst->compat, src->compat, sizeof(dst->compat)); - vstruct_for_each(src, src_f) { - if (src_f->type == BCH_SB_FIELD_journal) + for (i = 0; i < BCH_SB_FIELD_NR; i++) { + if (i == BCH_SB_FIELD_journal) continue; - dst_f = bch2_sb_field_get(dst, le32_to_cpu(src_f->type)); + src_f = bch2_sb_field_get(src, i); + dst_f = bch2_sb_field_get(dst, i); dst_f = __bch2_sb_field_resize(dst_handle, dst_f, - le32_to_cpu(src_f->u64s)); + src_f ? le32_to_cpu(src_f->u64s) : 0); - memcpy(dst_f, src_f, vstruct_bytes(src_f)); + if (src_f) + memcpy(dst_f, src_f, vstruct_bytes(src_f)); } } @@ -455,7 +477,7 @@ reread: if (le64_to_cpu(sb->sb->version) < BCH_SB_VERSION_MIN || le64_to_cpu(sb->sb->version) > BCH_SB_VERSION_MAX) - return"Unsupported superblock version"; + return "Unsupported superblock version"; bytes = vstruct_bytes(sb->sb); diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index 6eb193ac..c66fd974 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -11,6 +11,7 @@ struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type); struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *, enum bch_sb_field_type, unsigned); +void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type); #define field_to_type(_f, _name) \ container_of_or_null(_f, struct bch_sb_field_##_name, field) diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index c6a653ac..f793cfba 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -9,7 +9,6 @@ #include "bcachefs.h" #include "alloc_background.h" -#include "compress.h" #include "sysfs.h" #include "btree_cache.h" #include "btree_io.h" @@ -346,8 +345,8 @@ SHOW(bch2_fs) sysfs_print(promote_whole_extents, c->promote_whole_extents); - sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true)); - sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false)); + sysfs_printf(meta_replicas_have, "%i", bch2_replicas_online(c, true)); + sysfs_printf(data_replicas_have, "%i", bch2_replicas_online(c, false)); /* Debugging: */ @@ -580,14 +579,9 @@ STORE(bch2_fs_opts_dir) if (ret < 0) return ret; - if (id == Opt_compression || - id == Opt_background_compression) { - int ret = bch2_check_set_has_compressed_data(c, v); - if (ret) { - mutex_unlock(&c->sb_lock); - return ret; - } - } + ret = bch2_opt_check_may_set(c, id, v); + if (ret < 0) + return ret; if (opt->set_sb != SET_NO_SB_OPT) { mutex_lock(&c->sb_lock); diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index ed62668b..31f3b981 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -2,7 +2,6 @@ #include "bcachefs.h" #include "bkey_methods.h" #include "btree_update.h" -#include "compress.h" #include "extents.h" #include "fs.h" #include "rebalance.h" @@ -430,12 +429,9 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, if (ret < 0) return ret; - if (s.id == Opt_compression || - s.id == Opt_background_compression) { - ret = bch2_check_set_has_compressed_data(c, s.v); - if (ret) - return ret; - } + ret = bch2_opt_check_may_set(c, s.id, s.v); + if (ret < 0) + return ret; s.defined = true; } else {