diff --git a/.bcachefs_revision b/.bcachefs_revision index 77e97af7..e52e11e8 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -242d37cbd0abfa575ebf816c715e5bb9513c90a0 +0a9f0fc68a3cfaaee05a0848673fdb3de3108982 diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 276ab56c..0f2d7437 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -240,12 +240,12 @@ retry: } xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); - acl = bch2_acl_from_disk(xattr_val(xattr.v), le16_to_cpu(xattr.v->x_val_len)); if (!IS_ERR(acl)) set_cached_acl(&inode->v, type, acl); + bch2_trans_iter_put(&trans, iter); out: bch2_trans_exit(&trans); return acl; @@ -310,7 +310,7 @@ retry: if (type == ACL_TYPE_ACCESS) { ret = posix_acl_update_mode(&inode->v, &mode, &acl); if (ret) - goto err; + goto btree_err; } hash_info = bch2_hash_info_init(c, &inode_u); @@ -327,6 +327,8 @@ retry: &inode->ei_journal_seq, BTREE_INSERT_NOUNLOCK); btree_err: + bch2_trans_iter_put(&trans, inode_iter); + if (ret == -EINTR) goto retry; if (unlikely(ret)) @@ -353,21 +355,22 @@ int bch2_acl_chmod(struct btree_trans *trans, struct bkey_s_c_xattr xattr; struct bkey_i_xattr *new; struct posix_acl *acl; - int ret = 0; + int ret; iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &hash_info, inode->bi_inum, &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); - if (IS_ERR(iter)) - return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0; + ret = PTR_ERR_OR_ZERO(iter); + if (ret) + return ret == -ENOENT ? 0 : ret; xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); - acl = bch2_acl_from_disk(xattr_val(xattr.v), le16_to_cpu(xattr.v->x_val_len)); - if (IS_ERR_OR_NULL(acl)) - return PTR_ERR(acl); + ret = PTR_ERR_OR_ZERO(acl); + if (ret || !acl) + goto err; ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode); if (ret) @@ -384,6 +387,7 @@ int bch2_acl_chmod(struct btree_trans *trans, *new_acl = acl; acl = NULL; err: + bch2_trans_iter_put(trans, iter); kfree(acl); return ret; } diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 796a061d..48971fcf 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -139,25 +139,6 @@ static void bch2_alloc_unpack_v1(struct bkey_alloc_unpacked *out, #undef x } -static void bch2_alloc_pack_v1(struct bkey_alloc_buf *dst, - const struct bkey_alloc_unpacked src) -{ - struct bkey_i_alloc *a = bkey_alloc_init(&dst->k); - void *d = a->v.data; - unsigned bytes, idx = 0; - - a->k.p = POS(src.dev, src.bucket); - a->v.fields = 0; - a->v.gen = src.gen; - -#define x(_name, _bits) alloc_field_v1_put(a, &d, idx++, src._name); - BCH_ALLOC_FIELDS_V1() -#undef x - bytes = (void *) d - (void *) &a->v; - set_bkey_val_bytes(&a->k, bytes); - memset_u64s_tail(&a->v, 0, bytes); -} - static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out, struct bkey_s_c k) { @@ -250,10 +231,7 @@ void bch2_alloc_pack(struct bch_fs *c, struct bkey_alloc_buf *dst, const struct bkey_alloc_unpacked src) { - if (c->sb.features & (1ULL << BCH_FEATURE_alloc_v2)) - bch2_alloc_pack_v2(dst, src); - else - bch2_alloc_pack_v1(dst, src); + bch2_alloc_pack_v2(dst, src); } static unsigned bch_alloc_val_u64s(const struct bch_alloc *a) @@ -410,7 +388,6 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags) int ret = 0; bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_alloc, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); @@ -430,6 +407,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags) } } err: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); return ret; } @@ -959,7 +937,6 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) int ret = 0; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_alloc, POS(ca->dev_idx, 0), BTREE_ITER_CACHED| @@ -975,6 +952,7 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca) (!fifo_empty(&ca->free_inc) ? BTREE_INSERT_NOWAIT : 0)); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); /* If we used NOWAIT, don't return the error: */ diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 5f0e45f7..4133651d 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -597,6 +597,7 @@ struct bch_fs { uuid_le user_uuid; u16 version; + u16 version_min; u16 encoded_extent_max; u8 nr_devices; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 573e1fe5..c4aa4dea 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1376,6 +1376,7 @@ LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); ((1ULL << BCH_FEATURE_new_extent_overwrite)| \ (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ (1ULL << BCH_FEATURE_btree_updates_journalled)|\ + (1ULL << BCH_FEATURE_alloc_v2)|\ (1ULL << BCH_FEATURE_extents_across_btree_nodes)) #define BCH_SB_FEATURES_ALL \ @@ -1383,8 +1384,7 @@ LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); (1ULL << BCH_FEATURE_new_siphash)| \ (1ULL << BCH_FEATURE_btree_ptr_v2)| \ (1ULL << BCH_FEATURE_new_varint)| \ - (1ULL << BCH_FEATURE_journal_no_flush)| \ - (1ULL << BCH_FEATURE_alloc_v2)) + (1ULL << BCH_FEATURE_journal_no_flush)) enum bch_sb_feature { #define x(f, n) BCH_FEATURE_##f, diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index 79e249f4..878befb5 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -149,7 +149,6 @@ const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { - const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; const char *invalid; BUG_ON(!k.k->u64s); @@ -161,11 +160,7 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_inconsistent(c, "invalid bkey %s: %s", buf, invalid); - return; } - - if (ops->key_debugcheck) - ops->key_debugcheck(c, k); } void bch2_bpos_to_text(struct printbuf *out, struct bpos pos) diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h index 0bca725a..bfa6f112 100644 --- a/libbcachefs/bkey_methods.h +++ b/libbcachefs/bkey_methods.h @@ -26,7 +26,6 @@ struct bkey_ops { /* Returns reason for being invalid if invalid, else NULL: */ const char * (*key_invalid)(const struct bch_fs *, struct bkey_s_c); - void (*key_debugcheck)(struct bch_fs *, struct bkey_s_c); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(struct bkey_s); diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 1c8244d4..8c9172a8 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -373,8 +373,6 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, bkey_init(&prev.k->k); while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) { - bch2_bkey_debugcheck(c, b, k); - ret = bch2_gc_mark_key(c, b->c.btree_id, b->c.level, false, k, max_stale, initial); if (ret) @@ -439,6 +437,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bch2_trans_cond_resched(&trans); } + bch2_trans_iter_put(&trans, iter); + ret = bch2_trans_exit(&trans) ?: ret; if (ret) return ret; @@ -470,8 +470,6 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b, bkey_init(&prev.k->k); while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) { - bch2_bkey_debugcheck(c, b, k); - BUG_ON(bkey_cmp(k.k->p, b->data->min_key) < 0); BUG_ON(bkey_cmp(k.k->p, b->data->max_key) > 0); @@ -1470,6 +1468,7 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) struct btree *b; bool kthread = (current->flags & PF_KTHREAD) != 0; unsigned i; + int ret = 0; /* Sliding window of adjacent btree nodes */ struct btree *merge[GC_MERGE_NODES]; @@ -1518,8 +1517,8 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) lock_seq[0] = merge[0]->c.lock.state.seq; if (kthread && kthread_should_stop()) { - bch2_trans_exit(&trans); - return -ESHUTDOWN; + ret = -ESHUTDOWN; + break; } bch2_trans_cond_resched(&trans); @@ -1534,7 +1533,9 @@ static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) memset(merge + 1, 0, (GC_MERGE_NODES - 1) * sizeof(merge[0])); } - return bch2_trans_exit(&trans); + bch2_trans_iter_put(&trans, iter); + + return bch2_trans_exit(&trans) ?: ret; } /** diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index dab3a713..9b74e799 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -560,6 +560,26 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BTREE_ERR_FATAL, c, ca, b, i, "unsupported bset version"); + if (btree_err_on(version < c->sb.version_min, + BTREE_ERR_FIXABLE, c, NULL, b, i, + "bset version %u older than superblock version_min %u", + version, c->sb.version_min)) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->version_min = cpu_to_le16(version); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + + if (btree_err_on(version > c->sb.version, + BTREE_ERR_FIXABLE, c, NULL, b, i, + "bset version %u newer than superblock version %u", + version, c->sb.version)) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->version = cpu_to_le16(version); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + if (btree_err_on(b->written + sectors > c->opts.btree_node_size, BTREE_ERR_FIXABLE, c, ca, b, i, "bset past end of btree node")) { @@ -753,6 +773,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, unsigned u64s; int ret, retry_read = 0, write = READ; + b->version_ondisk = U16_MAX; + iter = mempool_alloc(&c->fill_iter, GFP_NOIO); sort_iter_init(iter, b); iter->size = (btree_blocks(c) + 1) * 2; @@ -832,6 +854,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, sectors = vstruct_sectors(bne, c->block_bits); } + b->version_ondisk = min(b->version_ondisk, + le16_to_cpu(i->version)); + ret = validate_bset(c, ca, b, i, sectors, READ, have_retry); if (ret) @@ -1200,6 +1225,7 @@ retry: if (ret) goto err; out: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&k, c); bio_put(&wbio->wbio.bio); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 086d5c1b..ddd3bf5f 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -9,6 +9,7 @@ #include "btree_locking.h" #include "btree_update.h" #include "debug.h" +#include "error.h" #include "extents.h" #include "journal.h" @@ -1424,7 +1425,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) if (btree_node_read_locked(iter, iter->level)) btree_node_unlock(iter, iter->level); - iter->pos = bkey_successor(iter->pos); + iter->pos = iter->real_pos = bkey_successor(iter->pos); iter->level = iter->min_depth; btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); @@ -1496,7 +1497,7 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) btree_iter_set_search_pos(iter, btree_iter_search_key(iter)); } -static inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter) +inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter) { struct bpos pos = iter->k.p; bool ret = bkey_cmp(pos, POS_MAX) != 0; @@ -1507,7 +1508,7 @@ static inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter) return ret; } -static inline bool bch2_btree_iter_rewind_pos(struct btree_iter *iter) +inline bool bch2_btree_iter_rewind_pos(struct btree_iter *iter) { struct bpos pos = bkey_start_pos(&iter->k); bool ret = bkey_cmp(pos, POS_MIN) != 0; @@ -1955,6 +1956,7 @@ int bch2_trans_iter_put(struct btree_trans *trans, return 0; BUG_ON(trans->iters + iter->idx != iter); + BUG_ON(!btree_iter_live(trans, iter)); ret = btree_iter_err(iter); @@ -1972,7 +1974,7 @@ int bch2_trans_iter_free(struct btree_trans *trans, if (IS_ERR_OR_NULL(iter)) return 0; - trans->iters_touched &= ~(1ULL << iter->idx); + set_btree_iter_dontneed(trans, iter); return bch2_trans_iter_put(trans, iter); } @@ -2116,6 +2118,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans, for (i = 0; i < ARRAY_SIZE(iter->l); i++) iter->l[i].b = NULL; iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT; + iter->ip_allocated = _RET_IP_; return iter; } @@ -2133,7 +2136,7 @@ struct btree_iter *__bch2_trans_copy_iter(struct btree_trans *trans, * We don't need to preserve this iter since it's cheap to copy it * again - this will cause trans_iter_put() to free it right away: */ - trans->iters_touched &= ~(1ULL << iter->idx); + set_btree_iter_dontneed(trans, iter); return iter; } @@ -2214,6 +2217,8 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags) (void *) &trans->fs_usage_deltas->memset_start); } + bch2_trans_cond_resched(trans); + if (!(flags & TRANS_RESET_NOTRAVERSE)) bch2_btree_iter_traverse_all(trans); } @@ -2273,6 +2278,19 @@ int bch2_trans_exit(struct btree_trans *trans) bch2_trans_unlock(trans); #ifdef CONFIG_BCACHEFS_DEBUG + if (trans->iters_live) { + struct btree_iter *iter; + + bch_err(c, "btree iterators leaked!"); + trans_for_each_iter(trans, iter) + if (btree_iter_live(trans, iter)) + printk(KERN_ERR " btree %s allocated at %pS\n", + bch2_btree_ids[iter->btree_id], + (void *) iter->ip_allocated); + /* Be noisy about this: */ + bch2_fatal_error(c); + } + mutex_lock(&trans->c->btree_trans_lock); list_del(&trans->list); mutex_unlock(&trans->c->btree_trans_lock); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index bd0c429b..c839bfe6 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -175,6 +175,8 @@ struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *); +bool bch2_btree_iter_advance_pos(struct btree_iter *); +bool bch2_btree_iter_rewind_pos(struct btree_iter *); void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos); /* Sort order for locking btree iterators: */ @@ -298,6 +300,11 @@ static inline bool btree_iter_keep(struct btree_trans *trans, struct btree_iter (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT); } +static inline void set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter *iter) +{ + trans->iters_touched &= ~(1ULL << iter->idx); +} + #define TRANS_RESET_NOTRAVERSE (1 << 0) void bch2_trans_reset(struct btree_trans *, unsigned); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 2230da8b..0b354563 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -171,23 +171,21 @@ static int btree_key_cache_fill(struct btree_trans *trans, ck->key.pos, BTREE_ITER_SLOTS); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); - if (ret) { - bch2_trans_iter_put(trans, iter); - return ret; - } + if (ret) + goto err; if (!bch2_btree_node_relock(ck_iter, 0)) { - bch2_trans_iter_put(trans, iter); trace_transaction_restart_ip(trans->ip, _THIS_IP_); - return -EINTR; + ret = -EINTR; + goto err; } if (k.k->u64s > ck->u64s) { new_u64s = roundup_pow_of_two(k.k->u64s); new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS); if (!new_k) { - bch2_trans_iter_put(trans, iter); - return -ENOMEM; + ret = -ENOMEM; + goto err; } } @@ -203,9 +201,10 @@ static int btree_key_cache_fill(struct btree_trans *trans, bch2_btree_node_unlock_write(ck_iter->l[0].b, ck_iter); /* We're not likely to need this iterator again: */ - bch2_trans_iter_free(trans, iter); - - return 0; + set_btree_iter_dontneed(trans, iter); +err: + bch2_trans_iter_put(trans, iter); + return ret; } static int bkey_cached_check_fn(struct six_lock *lock, void *p) diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index c3148079..5999044a 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -76,6 +76,7 @@ struct btree { u16 written; u8 nsets; u8 nr_key_bits; + u16 version_ondisk; struct bkey_format format; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index df06c4a8..4c0e3d7c 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -282,6 +282,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev bch2_bset_init_first(b, &b->data->keys); b->c.level = level; b->c.btree_id = as->btree_id; + b->version_ondisk = c->sb.version; memset(&b->nr, 0, sizeof(b->nr)); b->data->magic = cpu_to_le64(bset_magic(c)); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index d7937bdf..ed3009b8 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -754,7 +754,7 @@ static int extent_handle_overwrites(struct btree_trans *trans, enum btree_id btree_id, struct bpos start, struct bpos end) { - struct btree_iter *iter = NULL, *update_iter; + struct btree_iter *iter, *update_iter; struct bkey_i *update; struct bkey_s_c k; int ret = 0; @@ -767,8 +767,6 @@ static int extent_handle_overwrites(struct btree_trans *trans, break; if (bkey_cmp(bkey_start_pos(k.k), start) < 0) { - update_iter = bch2_trans_copy_iter(trans, iter); - update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); if ((ret = PTR_ERR_OR_ZERO(update))) goto err; @@ -776,6 +774,7 @@ static int extent_handle_overwrites(struct btree_trans *trans, bkey_reassemble(update, k); bch2_cut_back(start, update); + update_iter = bch2_trans_copy_iter(trans, iter); update_iter->flags &= ~BTREE_ITER_IS_EXTENTS; bch2_btree_iter_set_pos(update_iter, update->k.p); ret = bch2_trans_update2(trans, update_iter, update); @@ -785,8 +784,6 @@ static int extent_handle_overwrites(struct btree_trans *trans, } if (bkey_cmp(k.k->p, end) > 0) { - update_iter = bch2_trans_copy_iter(trans, iter); - update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); if ((ret = PTR_ERR_OR_ZERO(update))) goto err; @@ -794,6 +791,7 @@ static int extent_handle_overwrites(struct btree_trans *trans, bkey_reassemble(update, k); bch2_cut_front(end, update); + update_iter = bch2_trans_copy_iter(trans, iter); update_iter->flags &= ~BTREE_ITER_IS_EXTENTS; bch2_btree_iter_set_pos(update_iter, update->k.p); ret = bch2_trans_update2(trans, update_iter, update); @@ -801,8 +799,6 @@ static int extent_handle_overwrites(struct btree_trans *trans, if (ret) goto err; } else { - update_iter = bch2_trans_copy_iter(trans, iter); - update = bch2_trans_kmalloc(trans, sizeof(struct bkey)); if ((ret = PTR_ERR_OR_ZERO(update))) goto err; @@ -812,6 +808,7 @@ static int extent_handle_overwrites(struct btree_trans *trans, update->k.type = KEY_TYPE_deleted; update->k.size = 0; + update_iter = bch2_trans_copy_iter(trans, iter); update_iter->flags &= ~BTREE_ITER_IS_EXTENTS; bch2_btree_iter_set_pos(update_iter, update->k.p); ret = bch2_trans_update2(trans, update_iter, update); @@ -823,8 +820,7 @@ static int extent_handle_overwrites(struct btree_trans *trans, k = bch2_btree_iter_next_with_updates(iter); } err: - if (!IS_ERR_OR_NULL(iter)) - bch2_trans_iter_put(trans, iter); + bch2_trans_iter_put(trans, iter); return ret; } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index be59e37e..e6e75235 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1481,6 +1481,10 @@ static struct btree_iter *trans_get_update(struct btree_trans *trans, bkey_cmp(pos, i->k->k.p) < 0 : !bkey_cmp(pos, i->iter->pos))) { *k = bkey_i_to_s_c(i->k); + + /* ugly hack.. */ + BUG_ON(btree_iter_live(trans, i->iter)); + trans->iters_live |= 1ULL << i->iter->idx; return i->iter; } diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 06dbca32..cce747da 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -242,6 +242,8 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, if (!i->size) break; } + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; @@ -294,6 +296,8 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (!i->size) break; } + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return err < 0 ? err : i->ret; diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index b0625176..592dd80c 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -321,6 +321,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, k = bch2_btree_iter_peek_slot(iter); inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); + bch2_trans_iter_put(&trans, iter); out: bch2_trans_exit(&trans); return inum; @@ -379,6 +380,8 @@ int bch2_readdir(struct bch_fs *c, u64 inum, struct dir_context *ctx) break; ctx->pos = dirent.k->p.offset + 1; } + bch2_trans_iter_put(&trans, iter); + ret = bch2_trans_exit(&trans) ?: ret; return ret; diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index e36ef095..8d94ee70 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -1663,12 +1663,13 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) int ret = 0; bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_stripes, POS(0, U64_MAX), 0); k = bch2_btree_iter_prev(iter); if (!IS_ERR_OR_NULL(k.k)) idx = k.k->p.offset + 1; + + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans); if (ret) return ret; diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 34f0e469..8ed3f73b 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -164,46 +164,6 @@ const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k) return bch2_bkey_ptrs_invalid(c, k); } -void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; - const char *err; - char buf[160]; - struct bucket_mark mark; - struct bch_dev *ca; - - if (!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) - return; - - if (!percpu_down_read_trylock(&c->mark_lock)) - return; - - bkey_for_each_ptr(ptrs, ptr) { - ca = bch_dev_bkey_exists(c, ptr->dev); - - mark = ptr_bucket_mark(ca, ptr); - - err = "stale"; - if (gen_after(mark.gen, ptr->gen)) - goto err; - - err = "inconsistent"; - if (mark.data_type != BCH_DATA_btree || - mark.dirty_sectors < c->opts.btree_node_size) - goto err; - } -out: - percpu_up_read(&c->mark_lock); - return; -err: - bch2_fs_inconsistent(c, "%s btree pointer %s: bucket %zi gen %i mark %08x", - err, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf), - PTR_BUCKET_NR(ca, ptr), - mark.gen, (unsigned) mark.v.counter); - goto out; -} - void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { @@ -247,49 +207,6 @@ const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) return bch2_bkey_ptrs_invalid(c, k); } -void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k) -{ - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - char buf[160]; - - if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) || - !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) - return; - - if (!percpu_down_read_trylock(&c->mark_lock)) - return; - - extent_for_each_ptr_decode(e, p, entry) { - struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - struct bucket_mark mark = ptr_bucket_mark(ca, &p.ptr); - unsigned stale = gen_after(mark.gen, p.ptr.gen); - unsigned disk_sectors = ptr_disk_sectors(p); - unsigned mark_sectors = p.ptr.cached - ? mark.cached_sectors - : mark.dirty_sectors; - - bch2_fs_inconsistent_on(stale && !p.ptr.cached, c, - "stale dirty pointer (ptr gen %u bucket %u", - p.ptr.gen, mark.gen); - - bch2_fs_inconsistent_on(stale > 96, c, - "key too stale: %i", stale); - - bch2_fs_inconsistent_on(!stale && - (mark.data_type != BCH_DATA_user || - mark_sectors < disk_sectors), c, - "extent pointer not marked: %s:\n" - "type %u sectors %u < %u", - (bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c), buf), - mark.data_type, - mark_sectors, disk_sectors); - } - - percpu_up_read(&c->mark_lock); -} - void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { @@ -688,6 +605,8 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, break; } } + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); return ret; @@ -1265,7 +1184,7 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k) len = where.offset - bkey_start_offset(k.k); - k.k->p = where; + k.k->p.offset = where.offset; k.k->size = len; if (!len) { diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 3988315f..2ee50a24 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -368,7 +368,6 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -379,14 +378,12 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, #define bch2_bkey_ops_btree_ptr (struct bkey_ops) { \ .key_invalid = bch2_btree_ptr_invalid, \ - .key_debugcheck = bch2_btree_ptr_debugcheck, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ } #define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) { \ .key_invalid = bch2_btree_ptr_invalid, \ - .key_debugcheck = bch2_btree_ptr_debugcheck, \ .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ @@ -395,14 +392,12 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, /* KEY_TYPE_extent: */ const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_extent_debugcheck(struct bch_fs *, struct bkey_s_c); void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); enum merge_result bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s); #define bch2_bkey_ops_extent (struct bkey_ops) { \ .key_invalid = bch2_extent_invalid, \ - .key_debugcheck = bch2_extent_debugcheck, \ .val_to_text = bch2_extent_to_text, \ .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 0322960d..1a94e7f7 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -883,7 +883,6 @@ void bch2_readahead(struct readahead_control *ractl) BUG_ON(ret); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS_MIN, BTREE_ITER_SLOTS); @@ -912,6 +911,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_pagecache_add_put(&inode->ei_pagecache_lock); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); kfree(readpages_iter.pages); } @@ -935,6 +935,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, bchfs_read(&trans, iter, rbio, inum, NULL); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); } @@ -2143,6 +2144,7 @@ static inline int range_has_data(struct bch_fs *c, break; } } + bch2_trans_iter_put(&trans, iter); return bch2_trans_exit(&trans) ?: ret; } @@ -2312,6 +2314,7 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr) bch2_trans_init(&trans, c, 0, 0); iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0); ret = PTR_ERR_OR_ZERO(iter); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); if (ret) @@ -2445,14 +2448,11 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, struct btree_iter *src, *dst, *del; loff_t shift, new_size; u64 src_start; - int ret; + int ret = 0; if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; - bch2_bkey_buf_init(©); - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); - /* * We need i_mutex to keep the page cache consistent with the extents * btree, and the btree consistent with i_size - we don't need outside @@ -2508,13 +2508,15 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, goto err; } + bch2_bkey_buf_init(©); + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); src = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode->v.i_ino, src_start >> 9), BTREE_ITER_INTENT); dst = bch2_trans_copy_iter(&trans, src); del = bch2_trans_copy_iter(&trans, src); - while (1) { + while (ret == 0 || ret == -EINTR) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; @@ -2528,7 +2530,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ? bch2_btree_iter_peek_prev(src) : bch2_btree_iter_peek(src); if ((ret = bkey_err(k))) - goto bkey_err; + continue; if (!k.k || k.k->p.inode != inode->v.i_ino) break; @@ -2548,7 +2550,7 @@ reassemble: ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end); if (ret) - goto bkey_err; + continue; if (bkey_cmp(atomic_end, copy.k->k.p)) { if (insert) { @@ -2591,18 +2593,18 @@ reassemble: &inode->ei_journal_seq, BTREE_INSERT_NOFAIL); bch2_disk_reservation_put(c, &disk_res); -bkey_err: + if (!ret) bch2_btree_iter_set_pos(src, next_pos); - - if (ret == -EINTR) - ret = 0; - if (ret) - goto err; - - bch2_trans_cond_resched(&trans); } - bch2_trans_unlock(&trans); + bch2_trans_iter_put(&trans, del); + bch2_trans_iter_put(&trans, dst); + bch2_trans_iter_put(&trans, src); + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(©, c); + + if (ret) + goto err; if (!insert) { i_size_write(&inode->v, new_size); @@ -2612,8 +2614,6 @@ bkey_err: mutex_unlock(&inode->ei_update_lock); } err: - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(©, c); bch2_pagecache_block_put(&inode->ei_pagecache_lock); inode_unlock(&inode->v); return ret; @@ -2668,7 +2668,7 @@ static long bchfs_fallocate(struct bch_inode_info *inode, int mode, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); end_pos = POS(inode->v.i_ino, block_end >> 9); - while (bkey_cmp(iter->pos, end_pos) < 0) { + while (!ret && bkey_cmp(iter->pos, end_pos) < 0) { s64 i_sectors_delta = 0; struct disk_reservation disk_res = { 0 }; struct quota_res quota_res = { 0 }; @@ -2732,9 +2732,11 @@ bkey_err: bch2_disk_reservation_put(c, &disk_res); if (ret == -EINTR) ret = 0; - if (ret) - goto err; } + bch2_trans_iter_put(&trans, iter); + + if (ret) + goto err; /* * Do we need to extend the file? @@ -2756,6 +2758,7 @@ bkey_err: ret = PTR_ERR_OR_ZERO(inode_iter); } while (ret == -EINTR); + bch2_trans_iter_put(&trans, inode_iter); bch2_trans_unlock(&trans); if (ret) @@ -3003,6 +3006,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) } else if (k.k->p.offset >> 9 > isize) break; } + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) @@ -3106,6 +3110,7 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) offset = max(offset, bkey_start_offset(k.k) << 9); } } + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans) ?: ret; if (ret) diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 0301ab19..1cca02f0 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -725,6 +725,8 @@ retry: BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); btree_err: + bch2_trans_iter_put(&trans, inode_iter); + if (ret == -EINTR) goto retry; if (unlikely(ret)) @@ -948,6 +950,7 @@ retry: ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), FIEMAP_EXTENT_LAST); + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&cur, c); bch2_bkey_buf_exit(&prev, c); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 7f6b4ac4..c902abc1 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -319,7 +319,7 @@ static int hash_check_key(struct btree_trans *trans, bch_err(c, "hash_redo_key err %i", ret); return ret; } - return 1; + return -EINTR; } ret = hash_check_duplicates(trans, desc, h, k_iter, k); @@ -413,18 +413,10 @@ err_redo: goto err; } -static int bch2_inode_truncate(struct bch_fs *c, u64 inode_nr, u64 new_size) -{ - return bch2_btree_delete_range(c, BTREE_ID_extents, - POS(inode_nr, round_up(new_size, block_bytes(c)) >> 9), - POS(inode_nr + 1, 0), NULL); -} - -static int bch2_fix_overlapping_extent(struct btree_trans *trans, - struct btree_iter *iter, +static int fix_overlapping_extent(struct btree_trans *trans, struct bkey_s_c k, struct bpos cut_at) { - struct btree_iter *u_iter; + struct btree_iter *iter; struct bkey_i *u; int ret; @@ -436,22 +428,24 @@ static int bch2_fix_overlapping_extent(struct btree_trans *trans, bkey_reassemble(u, k); bch2_cut_front(cut_at, u); - u_iter = bch2_trans_copy_iter(trans, iter); /* - * We don't want to go through the - * extent_handle_overwrites path: + * We don't want to go through the extent_handle_overwrites path: + * + * XXX: this is going to screw up disk accounting, extent triggers + * assume things about extent overwrites - we should be running the + * triggers manually here */ - u_iter->flags &= ~BTREE_ITER_IS_EXTENTS; - bch2_btree_iter_set_pos(u_iter, u->k.p); + iter = bch2_trans_get_iter(trans, BTREE_ID_extents, u->k.p, + BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); - /* - * XXX: this is going to leave disk space - * accounting slightly wrong - */ - ret = bch2_trans_update(trans, u_iter, u, 0); - bch2_trans_iter_put(trans, u_iter); - return ret; + BUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS); + bch2_trans_update(trans, iter, u, BTREE_TRIGGER_NORUN); + bch2_trans_iter_put(trans, iter); + + return bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); } /* @@ -466,7 +460,7 @@ static int check_extents(struct bch_fs *c) struct btree_iter *iter; struct bkey_s_c k; struct bkey_buf prev; - u64 i_sectors; + u64 i_sectors = 0; int ret = 0; bch2_bkey_buf_init(&prev); @@ -479,97 +473,86 @@ static int check_extents(struct bch_fs *c) POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_INTENT); retry: - for_each_btree_key_continue(iter, 0, k, ret) { - /* - * due to retry errors we might see the same extent twice: - */ - if (bkey_cmp(prev.k->k.p, k.k->p) && - bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k))) { + if (w.have_inode && + w.cur_inum != k.k->p.inode && + !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) && + fsck_err_on(w.inode.bi_sectors != i_sectors, c, + "inode %llu has incorrect i_sectors: got %llu, should be %llu", + w.inode.bi_inum, + w.inode.bi_sectors, i_sectors)) { + struct btree_iter *inode_iter = + bch2_trans_get_iter(&trans, BTREE_ID_inodes, + POS(0, w.cur_inum), + BTREE_ITER_INTENT); + + w.inode.bi_sectors = i_sectors; + + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, + bch2_inode_write(&trans, inode_iter, &w.inode)); + bch2_trans_iter_put(&trans, inode_iter); + if (ret) + break; + } + + if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) { char buf1[200]; char buf2[200]; bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k)); bch2_bkey_val_to_text(&PBUF(buf2), c, k); - if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) { - ret = __bch2_trans_do(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW, - bch2_fix_overlapping_extent(&trans, - iter, k, prev.k->k.p)); - if (ret) - goto err; - } + if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) + return fix_overlapping_extent(&trans, k, prev.k->k.p) ?: -EINTR; } - bch2_bkey_buf_reassemble(&prev, c, k); ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) break; + if (w.first_this_inode) + i_sectors = 0; + if (fsck_err_on(!w.have_inode, c, - "extent type %u for missing inode %llu", - k.k->type, k.k->p.inode) || + "extent type %u for missing inode %llu", + k.k->type, k.k->p.inode) || fsck_err_on(w.have_inode && - !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, - "extent type %u for non regular file, inode %llu mode %o", - k.k->type, k.k->p.inode, w.inode.bi_mode)) { - bch2_trans_unlock(&trans); - - ret = bch2_inode_truncate(c, k.k->p.inode, 0); - if (ret) - goto err; - continue; - } - - if (fsck_err_on(w.first_this_inode && - w.have_inode && - !(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) && - w.inode.bi_sectors != - (i_sectors = bch2_count_inode_sectors(&trans, w.cur_inum)), - c, "inode %llu has incorrect i_sectors: got %llu, should be %llu", - w.inode.bi_inum, - w.inode.bi_sectors, i_sectors)) { - struct bkey_inode_buf p; - - w.inode.bi_sectors = i_sectors; - - bch2_trans_unlock(&trans); - - bch2_inode_pack(c, &p, &w.inode); - - ret = bch2_btree_insert(c, BTREE_ID_inodes, - &p.inode.k_i, NULL, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW); - if (ret) { - bch_err(c, "error in fsck: error %i updating inode", ret); - goto err; - } - - /* revalidate iterator: */ - k = bch2_btree_iter_peek(iter); + !S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c, + "extent type %u for non regular file, inode %llu mode %o", + k.k->type, k.k->p.inode, w.inode.bi_mode)) { + bch2_fs_lazy_rw(c); + return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, + POS(k.k->p.inode, 0), + POS(k.k->p.inode, U64_MAX), + NULL) ?: -EINTR; } if (fsck_err_on(w.have_inode && - !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - k.k->type != KEY_TYPE_reservation && - k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c, - "extent type %u offset %llu past end of inode %llu, i_size %llu", - k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { - bch2_trans_unlock(&trans); - - ret = bch2_inode_truncate(c, k.k->p.inode, - w.inode.bi_size); - if (ret) - goto err; - continue; + !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && + k.k->type != KEY_TYPE_reservation && + k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c, + "extent type %u offset %llu past end of inode %llu, i_size %llu", + k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { + bch2_fs_lazy_rw(c); + return bch2_btree_delete_range_trans(&trans, BTREE_ID_extents, + POS(k.k->p.inode, round_up(w.inode.bi_size, block_bytes(c))), + POS(k.k->p.inode, U64_MAX), + NULL) ?: -EINTR; } + + if (bkey_extent_is_allocation(k.k)) + i_sectors += k.k->size; + bch2_bkey_buf_reassemble(&prev, c, k); + + bch2_btree_iter_advance_pos(iter); } -err: fsck_err: if (ret == -EINTR) goto retry; + bch2_trans_iter_put(&trans, iter); bch2_bkey_buf_exit(&prev, c); return bch2_trans_exit(&trans) ?: ret; } @@ -599,7 +582,8 @@ static int check_dirents(struct bch_fs *c) iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents, POS(BCACHEFS_ROOT_INO, 0), 0); retry: - for_each_btree_key_continue(iter, 0, k, ret) { + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k))) { struct bkey_s_c_dirent d; struct bch_inode_unpacked target; bool have_target; @@ -718,6 +702,8 @@ retry: goto err; } + + bch2_btree_iter_advance_pos(iter); } hash_stop_chain(&trans, &h); @@ -726,6 +712,8 @@ fsck_err: if (ret == -EINTR) goto retry; + bch2_trans_iter_put(&trans, h.chain); + bch2_trans_iter_put(&trans, iter); return bch2_trans_exit(&trans) ?: ret; } @@ -751,7 +739,8 @@ static int check_xattrs(struct bch_fs *c) iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs, POS(BCACHEFS_ROOT_INO, 0), 0); retry: - for_each_btree_key_continue(iter, 0, k, ret) { + while ((k = bch2_btree_iter_peek(iter)).k && + !(ret = bkey_err(k))) { ret = walk_inode(&trans, &w, k.k->p.inode); if (ret) break; @@ -761,7 +750,7 @@ retry: k.k->p.inode)) { ret = bch2_btree_delete_at(&trans, iter, 0); if (ret) - goto err; + break; continue; } @@ -771,12 +760,16 @@ retry: ret = hash_check_key(&trans, bch2_xattr_hash_desc, &h, iter, k); if (ret) - goto fsck_err; + break; + + bch2_btree_iter_advance_pos(iter); } -err: fsck_err: if (ret == -EINTR) goto retry; + + bch2_trans_iter_put(&trans, h.chain); + bch2_trans_iter_put(&trans, iter); return bch2_trans_exit(&trans) ?: ret; } @@ -1127,6 +1120,8 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, bch2_trans_cond_resched(&trans); } + bch2_trans_iter_put(&trans, iter); + ret = bch2_trans_exit(&trans) ?: ret; if (ret) bch_err(c, "error in fsck: btree error %i while walking dirents", ret); @@ -1279,8 +1274,10 @@ static int check_inode(struct btree_trans *trans, * XXX: need to truncate partial blocks too here - or ideally * just switch units to bytes and that issue goes away */ - - ret = bch2_inode_truncate(c, u.bi_inum, u.bi_size); + ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, + POS(u.bi_inum, round_up(u.bi_size, block_bytes(c))), + POS(u.bi_inum, U64_MAX), + NULL); if (ret) { bch_err(c, "error in fsck: error %i truncating inode", ret); return ret; @@ -1392,10 +1389,11 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (nlinks_pos == iter->pos.offset) genradix_iter_advance(&nlinks_iter, links); - bch2_btree_iter_next(iter); + bch2_btree_iter_advance_pos(iter); bch2_trans_cond_resched(&trans); } fsck_err: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); if (ret2) @@ -1487,11 +1485,12 @@ int bch2_fsck_walk_inodes_only(struct bch_fs *c) BCH_INODE_I_SECTORS_DIRTY| BCH_INODE_UNLINKED)) { ret = check_inode(&trans, NULL, iter, inode, NULL); - BUG_ON(ret == -EINTR); if (ret) break; } } + bch2_trans_iter_put(&trans, iter); + BUG_ON(ret == -EINTR); return bch2_trans_exit(&trans) ?: ret; diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index e72c49e1..c9b31afc 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -620,6 +620,7 @@ retry: ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); + bch2_trans_iter_put(&trans, iter); err: if (ret == -EINTR) goto retry; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 4fcc2c71..2a660574 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -404,6 +404,8 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, ret = bch2_fpunch_at(&trans, iter, POS(inum, end), journal_seq, i_sectors_delta); + + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); if (ret == -EINTR) @@ -450,6 +452,7 @@ int bch2_write_index_default(struct bch_write_op *op) bch2_keylist_pop_front(keys); } while (!bch2_keylist_empty(keys)); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); @@ -1667,6 +1670,7 @@ retry: goto err; out: bch2_rbio_done(rbio); + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); return; @@ -2260,7 +2264,7 @@ retry: k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) - goto err; + break; offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); @@ -2271,7 +2275,7 @@ retry: ret = bch2_read_indirect_extent(&trans, &data_btree, &offset_into_extent, &sk); if (ret) - goto err; + break; k = bkey_i_to_s_c(sk.k); @@ -2296,12 +2300,8 @@ retry: ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter->pos, data_btree, k, offset_into_extent, failed, flags); - switch (ret) { - case READ_RETRY: - goto retry; - case READ_ERR: - goto err; - }; + if (ret) + break; if (flags & BCH_READ_LAST_FRAGMENT) break; @@ -2309,19 +2309,19 @@ retry: swap(bvec_iter.bi_size, bytes); bio_advance_iter(&rbio->bio, &bvec_iter, bytes); } -out: - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); - return; -err: - if (ret == -EINTR) + bch2_trans_iter_put(&trans, iter); + + if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID) goto retry; - bch_err_inum_ratelimited(c, inode, - "read error %i from btree lookup", ret); - rbio->bio.bi_status = BLK_STS_IOERR; - bch2_rbio_done(rbio); - goto out; + if (ret) { + bch_err_inum_ratelimited(c, inode, + "read error %i from btree lookup", ret); + rbio->bio.bi_status = BLK_STS_IOERR; + bch2_rbio_done(rbio); + } + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&sk, c); } void bch2_fs_io_exit(struct bch_fs *c) diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 395021b5..1f26139d 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1234,6 +1234,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) &c->rw_devs[BCH_DATA_journal]) { struct journal_device *ja = &ca->journal; + if (!test_bit(ca->dev_idx, c->rw_devs[BCH_DATA_journal].d)) + continue; + if (!ja->nr) continue; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 253a27c5..53897450 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -196,6 +196,7 @@ nomatch: goto next; } out: + bch2_trans_iter_put(&trans, iter); bch2_trans_exit(&trans); bch2_bkey_buf_exit(&_insert, c); bch2_bkey_buf_exit(&_new, c); @@ -642,6 +643,8 @@ next_nondata: bch2_trans_cond_resched(&trans); } out: + + bch2_trans_iter_put(&trans, iter); ret = bch2_trans_exit(&trans) ?: ret; bch2_bkey_buf_exit(&sk, c); @@ -728,7 +731,7 @@ static int bch2_move_btree(struct bch_fs *c, for_each_btree_node(&trans, iter, id, id == start_btree_id ? start_pos : POS_MIN, BTREE_ITER_PREFETCH, b) { - if (kthread && (ret = kthread_should_stop())) + if (kthread && kthread_should_stop()) goto out; if ((cmp_int(id, end_btree_id) ?: @@ -837,13 +840,15 @@ static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg, struct bch_io_opts *io_opts, struct data_opts *data_opts) { - if (!btree_node_need_rewrite(b)) - return DATA_SKIP; + if (b->version_ondisk != c->sb.version || + btree_node_need_rewrite(b)) { + data_opts->target = 0; + data_opts->nr_replicas = 1; + data_opts->btree_insert_flags = 0; + return DATA_REWRITE; + } - data_opts->target = 0; - data_opts->nr_replicas = 1; - data_opts->btree_insert_flags = 0; - return DATA_REWRITE; + return DATA_SKIP; } int bch2_data_job(struct bch_fs *c, @@ -895,11 +900,17 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_replicas_gc2(c) ?: ret; break; case BCH_DATA_OP_REWRITE_OLD_NODES: - ret = bch2_move_btree(c, op.start_btree, op.start_pos, op.end_btree, op.end_pos, rewrite_old_nodes_pred, &op, stats) ?: ret; + + if (!ret) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->version_min = c->disk_sb.sb->version; + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } break; default: ret = -EINVAL; diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 0975cf33..f8efeb36 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -990,11 +990,17 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } + if (!c->sb.clean && + !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) { + bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix"); + ret = -EINVAL; + goto err; + } + if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) { bch_info(c, "alloc_v2 feature bit not set, fsck required"); c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_alloc_v2; } if (!c->replicas.entries || @@ -1060,13 +1066,6 @@ use_clean: blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1; } - if (!c->sb.clean && - !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) { - bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix"); - ret = -EINVAL; - goto err; - } - if (c->opts.reconstruct_alloc) { c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO); drop_alloc_keys(&c->journal_keys); @@ -1209,15 +1208,6 @@ use_clean: } mutex_lock(&c->sb_lock); - if (c->opts.version_upgrade) { - if (c->sb.version < bcachefs_metadata_version_new_versioning) - c->disk_sb.sb->version_min = - le16_to_cpu(bcachefs_metadata_version_min); - c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current); - c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL; - write_sb = true; - } - if (!test_bit(BCH_FS_ERROR, &c->flags)) { c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO; write_sb = true; @@ -1274,15 +1264,6 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_mark_dev_superblock(c, ca, 0); mutex_unlock(&c->sb_lock); - mutex_lock(&c->sb_lock); - c->disk_sb.sb->version = c->disk_sb.sb->version_min = - le16_to_cpu(bcachefs_metadata_version_current); - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink; - c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL; - - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index e9a6a5f6..0978ad92 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -223,20 +223,18 @@ s64 bch2_remap_range(struct bch_fs *c, dst_iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, dst_start, BTREE_ITER_INTENT); - while (1) { + while (ret == 0 || ret == -EINTR) { bch2_trans_begin(&trans); - trans.mem_top = 0; - if (fatal_signal_pending(current)) { ret = -EINTR; - goto err; + break; } src_k = get_next_src(src_iter, src_end); ret = bkey_err(src_k); if (ret) - goto btree_err; + continue; src_done = bpos_min(src_iter->pos, src_end).offset - src_start.offset; @@ -245,8 +243,6 @@ s64 bch2_remap_range(struct bch_fs *c, if (bkey_cmp(dst_iter->pos, dst_want) < 0) { ret = bch2_fpunch_at(&trans, dst_iter, dst_want, journal_seq, i_sectors_delta); - if (ret) - goto btree_err; continue; } @@ -265,7 +261,7 @@ s64 bch2_remap_range(struct bch_fs *c, ret = bch2_make_extent_indirect(&trans, src_iter, new_src.k); if (ret) - goto btree_err; + continue; BUG_ON(src_k.k->type != KEY_TYPE_reflink_p); } @@ -294,20 +290,16 @@ s64 bch2_remap_range(struct bch_fs *c, NULL, journal_seq, new_i_size, i_sectors_delta); if (ret) - goto btree_err; + continue; dst_done = dst_iter->pos.offset - dst_start.offset; src_want = POS(src_start.inode, src_start.offset + dst_done); bch2_btree_iter_set_pos(src_iter, src_want); -btree_err: - if (ret == -EINTR) - ret = 0; - if (ret) - goto err; } + bch2_trans_iter_put(&trans, dst_iter); + bch2_trans_iter_put(&trans, src_iter); - BUG_ON(bkey_cmp(dst_iter->pos, dst_end)); -err: + BUG_ON(!ret && bkey_cmp(dst_iter->pos, dst_end)); BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0); dst_done = dst_iter->pos.offset - dst_start.offset; @@ -329,6 +321,8 @@ err: ret2 = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, journal_seq, 0); } + + bch2_trans_iter_put(&trans, inode_iter); } while (ret2 == -EINTR); ret = bch2_trans_exit(&trans) ?: ret; diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index b9ad9c4d..b4eb51af 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -362,6 +362,7 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.uuid = src->uuid; c->sb.user_uuid = src->user_uuid; c->sb.version = le16_to_cpu(src->version); + c->sb.version_min = le16_to_cpu(src->version_min); c->sb.nr_devices = src->nr_devices; c->sb.clean = BCH_SB_CLEAN(src); c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); @@ -964,6 +965,11 @@ int bch2_fs_mark_dirty(struct bch_fs *c) */ mutex_lock(&c->sb_lock); + if (c->opts.version_upgrade) { + c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current); + c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALL; + } + SET_BCH_SB_CLEAN(c->disk_sb.sb, false); c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALWAYS; ret = bch2_write_super(c); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 2096c76e..4eaa4cee 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -396,6 +396,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) (!early || c->opts.read_only))) return -EROFS; + bch_info(c, "going read-write"); + ret = bch2_fs_mark_dirty(c); if (ret) goto err; diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 6bb5565c..858aa876 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -133,12 +133,9 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, inode->v.i_ino, &X_SEARCH(type, name, strlen(name)), 0); - if (IS_ERR(iter)) { - bch2_trans_exit(&trans); - BUG_ON(PTR_ERR(iter) == -EINTR); - - return PTR_ERR(iter) == -ENOENT ? -ENODATA : PTR_ERR(iter); - } + ret = PTR_ERR_OR_ZERO(iter); + if (ret) + goto err; xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); ret = le16_to_cpu(xattr.v->x_val_len); @@ -148,9 +145,12 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, else memcpy(buffer, xattr_val(xattr.v), ret); } - + bch2_trans_iter_put(&trans, iter); +err: bch2_trans_exit(&trans); - return ret; + + BUG_ON(ret == -EINTR); + return ret == -ENOENT ? -ENODATA : ret; } int bch2_xattr_set(struct btree_trans *trans, u64 inum, @@ -294,6 +294,8 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ret) break; } + bch2_trans_iter_put(&trans, iter); + ret = bch2_trans_exit(&trans) ?: ret; if (ret)