diff --git a/.bcachefs_revision b/.bcachefs_revision index 61666d60..77e97af7 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -63924135a103cbf2411ef73e7ca9b1b6ebe265bd +242d37cbd0abfa575ebf816c715e5bb9513c90a0 diff --git a/cmd_data.c b/cmd_data.c index f495b6c0..1e45d2ff 100644 --- a/cmd_data.c +++ b/cmd_data.c @@ -41,8 +41,10 @@ int cmd_data_rereplicate(int argc, char *argv[]) die("too many arguments"); return bchu_data(bcache_fs_open(fs_path), (struct bch_ioctl_data) { - .op = BCH_DATA_OP_REREPLICATE, - .start = POS_MIN, - .end = POS_MAX, + .op = BCH_DATA_OP_REREPLICATE, + .start_btree = 0, + .start_pos = POS_MIN, + .end_btree = BTREE_ID_NR, + .end_pos = POS_MAX, }); } diff --git a/cmd_device.c b/cmd_device.c index b356bcb1..161e6692 100644 --- a/cmd_device.c +++ b/cmd_device.c @@ -320,8 +320,10 @@ int cmd_device_evacuate(int argc, char *argv[]) return bchu_data(fs, (struct bch_ioctl_data) { .op = BCH_DATA_OP_MIGRATE, - .start = POS_MIN, - .end = POS_MAX, + .start_btree = 0, + .start_pos = POS_MIN, + .end_btree = BTREE_ID_NR, + .end_pos = POS_MAX, .migrate.dev = dev_idx, }); } diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 76c98ddb..276ab56c 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -216,6 +216,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type) { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c_xattr xattr; @@ -226,7 +227,7 @@ retry: bch2_trans_begin(&trans); iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, - &inode->ei_str_hash, inode->v.i_ino, + &hash, inode->v.i_ino, &X_SEARCH(acl_to_xattr_type(type), "", 0), 0); if (IS_ERR(iter)) { @@ -287,6 +288,7 @@ int bch2_set_acl(struct inode *vinode, struct posix_acl *_acl, int type) struct btree_trans trans; struct btree_iter *inode_iter; struct bch_inode_unpacked inode_u; + struct bch_hash_info hash_info; struct posix_acl *acl; umode_t mode; int ret; @@ -311,9 +313,9 @@ retry: goto err; } - ret = bch2_set_acl_trans(&trans, &inode_u, - &inode->ei_str_hash, - acl, type); + hash_info = bch2_hash_info_init(c, &inode_u); + + ret = bch2_set_acl_trans(&trans, &inode_u, &hash_info, acl, type); if (ret) goto btree_err; @@ -342,10 +344,11 @@ err: } int bch2_acl_chmod(struct btree_trans *trans, - struct bch_inode_info *inode, + struct bch_inode_unpacked *inode, umode_t mode, struct posix_acl **new_acl) { + struct bch_hash_info hash_info = bch2_hash_info_init(trans->c, inode); struct btree_iter *iter; struct bkey_s_c_xattr xattr; struct bkey_i_xattr *new; @@ -353,7 +356,7 @@ int bch2_acl_chmod(struct btree_trans *trans, int ret = 0; iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, - &inode->ei_str_hash, inode->v.i_ino, + &hash_info, inode->bi_inum, &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); if (IS_ERR(iter)) diff --git a/libbcachefs/acl.h b/libbcachefs/acl.h index cb62d502..ba210c26 100644 --- a/libbcachefs/acl.h +++ b/libbcachefs/acl.h @@ -33,7 +33,7 @@ int bch2_set_acl_trans(struct btree_trans *, const struct bch_hash_info *, struct posix_acl *, int); int bch2_set_acl(struct inode *, struct posix_acl *, int); -int bch2_acl_chmod(struct btree_trans *, struct bch_inode_info *, +int bch2_acl_chmod(struct btree_trans *, struct bch_inode_unpacked *, umode_t, struct posix_acl **); #else @@ -47,7 +47,7 @@ static inline int bch2_set_acl_trans(struct btree_trans *trans, } static inline int bch2_acl_chmod(struct btree_trans *trans, - struct bch_inode_info *inode, + struct bch_inode_unpacked *inode, umode_t mode, struct posix_acl **new_acl) { diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 3d06547e..573e1fe5 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1309,7 +1309,7 @@ LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60); LE64_BITMASK(BCH_SB_HAS_ERRORS, struct bch_sb, flags[0], 60, 61); -LE64_BITMASK(BCH_SB_REFLINK, struct bch_sb, flags[0], 61, 62); +/* bit 61 was reflink option */ LE64_BITMASK(BCH_SB_BIG_ENDIAN, struct bch_sb, flags[0], 62, 63); /* 61-64 unused */ diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h index f1cb5d40..f679fc21 100644 --- a/libbcachefs/bcachefs_ioctl.h +++ b/libbcachefs/bcachefs_ioctl.h @@ -171,10 +171,11 @@ struct bch_ioctl_disk_set_state { }; enum bch_data_ops { - BCH_DATA_OP_SCRUB = 0, - BCH_DATA_OP_REREPLICATE = 1, - BCH_DATA_OP_MIGRATE = 2, - BCH_DATA_OP_NR = 3, + BCH_DATA_OP_SCRUB = 0, + BCH_DATA_OP_REREPLICATE = 1, + BCH_DATA_OP_MIGRATE = 2, + BCH_DATA_OP_REWRITE_OLD_NODES = 3, + BCH_DATA_OP_NR = 4, }; /* @@ -187,11 +188,13 @@ enum bch_data_ops { * job. The file descriptor is O_CLOEXEC. */ struct bch_ioctl_data { - __u32 op; + __u16 op; + __u8 start_btree; + __u8 end_btree; __u32 flags; - struct bpos start; - struct bpos end; + struct bpos start_pos; + struct bpos end_pos; union { struct { diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index f8da65de..1c8244d4 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -265,9 +265,22 @@ again: if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) { struct stripe *m = genradix_ptr(&c->stripes[true], entry->stripe_ptr.idx); + union bch_extent_entry *next_ptr; + + bkey_extent_entry_for_each_from(ptrs, next_ptr, entry) + if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr) + goto found; + next_ptr = NULL; +found: + if (!next_ptr) { + bch_err(c, "aieee, found stripe ptr with no data ptr"); + continue; + } if (!m || !m->alive || - !bch2_ptr_matches_stripe_m(m, p)) { + !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block], + &next_ptr->ptr, + m->sectors)) { bch2_bkey_extent_entry_drop(new, entry); goto again; } diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 72e3d6d8..086d5c1b 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1445,13 +1445,16 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) /* Iterate across keys (in leaf nodes only) */ -static void btree_iter_pos_changed(struct btree_iter *iter, int cmp) +static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos) { + int cmp = bkey_cmp(new_pos, iter->real_pos); unsigned l = iter->level; if (!cmp) goto out; + iter->real_pos = new_pos; + if (unlikely(btree_iter_type(iter) == BTREE_ITER_CACHED)) { btree_node_unlock(iter, 0); iter->l[0].b = BTREE_ITER_NO_NODE_UP; @@ -1481,15 +1484,6 @@ out: btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); else btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK); -} - -static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_pos) -{ - int cmp = bkey_cmp(new_pos, iter->real_pos); - - iter->real_pos = new_pos; - - btree_iter_pos_changed(iter, cmp); bch2_btree_iter_verify(iter); } @@ -1992,7 +1986,7 @@ static void btree_trans_iter_alloc_fail(struct btree_trans *trans) char buf[100]; trans_for_each_iter(trans, iter) - printk(KERN_ERR "iter: btree %s pos %s%s%s%s %ps\n", + printk(KERN_ERR "iter: btree %s pos %s%s%s%s %pS\n", bch2_btree_ids[iter->btree_id], (bch2_bpos_to_text(&PBUF(buf), iter->pos), buf), btree_iter_live(trans, iter) ? " live" : "", @@ -2063,7 +2057,7 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, if (best && bkey_cmp(bpos_diff(best->pos, pos), - bpos_diff(iter->pos, pos)) < 0) + bpos_diff(iter->real_pos, pos)) < 0) continue; best = iter; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 35e828f5..0322960d 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -805,6 +805,7 @@ retry: while (1) { struct bkey_s_c k; unsigned bytes, sectors, offset_into_extent; + enum btree_id data_btree = BTREE_ID_extents; bch2_btree_iter_set_pos(iter, POS(inum, rbio->bio.bi_iter.bi_sector)); @@ -820,7 +821,7 @@ retry: bch2_bkey_buf_reassemble(&sk, c, k); - ret = bch2_read_indirect_extent(trans, + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, &sk); if (ret) break; @@ -844,7 +845,8 @@ retry: if (bkey_extent_is_allocation(k.k)) bch2_add_page_sectors(&rbio->bio, k); - bch2_read_extent(trans, rbio, k, offset_into_extent, flags); + bch2_read_extent(trans, rbio, iter->pos, + data_btree, k, offset_into_extent, flags); if (flags & BCH_READ_LAST_FRAGMENT) break; @@ -2858,9 +2860,6 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, u64 aligned_len; loff_t ret = 0; - if (!c->opts.reflink) - return -EOPNOTSUPP; - if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY)) return -EINVAL; diff --git a/libbcachefs/fs-ioctl.c b/libbcachefs/fs-ioctl.c index 0873d2f0..eb871634 100644 --- a/libbcachefs/fs-ioctl.c +++ b/libbcachefs/fs-ioctl.c @@ -183,6 +183,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, struct bch_inode_info *src, const char __user *name) { + struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode); struct bch_inode_info *dst; struct inode *vinode = NULL; char *kname = NULL; @@ -202,8 +203,7 @@ static int bch2_ioc_reinherit_attrs(struct bch_fs *c, qstr.name = kname; ret = -ENOENT; - inum = bch2_dirent_lookup(c, src->v.i_ino, - &src->ei_str_hash, + inum = bch2_dirent_lookup(c, src->v.i_ino, &hash, &qstr); if (!inum) goto err1; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 612273aa..0301ab19 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -368,11 +368,11 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, { struct bch_fs *c = vdir->i_sb->s_fs_info; struct bch_inode_info *dir = to_bch_ei(vdir); + struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode); struct inode *vinode = NULL; u64 inum; - inum = bch2_dirent_lookup(c, dir->v.i_ino, - &dir->ei_str_hash, + inum = bch2_dirent_lookup(c, dir->v.i_ino, &hash, &dentry->d_name); if (inum) @@ -412,16 +412,12 @@ static int __bch2_link(struct bch_fs *c, mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 4, 1024); - do { - bch2_trans_begin(&trans); - ret = bch2_link_trans(&trans, + ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, + BTREE_INSERT_NOUNLOCK, + bch2_link_trans(&trans, dir->v.i_ino, inode->v.i_ino, &dir_u, &inode_u, - &dentry->d_name) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK); - } while (ret == -EINTR); + &dentry->d_name)); if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); @@ -468,17 +464,12 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); bch2_trans_init(&trans, c, 4, 1024); - do { - bch2_trans_begin(&trans); - - ret = bch2_unlink_trans(&trans, + ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq, + BTREE_INSERT_NOUNLOCK| + BTREE_INSERT_NOFAIL, + bch2_unlink_trans(&trans, dir->v.i_ino, &dir_u, - &inode_u, &dentry->d_name) ?: - bch2_trans_commit(&trans, NULL, - &dir->ei_journal_seq, - BTREE_INSERT_NOUNLOCK| - BTREE_INSERT_NOFAIL); - } while (ret == -EINTR); + &inode_u, &dentry->d_name)); if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); @@ -592,21 +583,16 @@ static int bch2_rename2(struct inode *src_vdir, struct dentry *src_dentry, goto err; } -retry: - bch2_trans_begin(&trans); - ret = bch2_rename_trans(&trans, - src_dir->v.i_ino, &src_dir_u, - dst_dir->v.i_ino, &dst_dir_u, - &src_inode_u, - &dst_inode_u, - &src_dentry->d_name, - &dst_dentry->d_name, - mode) ?: - bch2_trans_commit(&trans, NULL, - &journal_seq, - BTREE_INSERT_NOUNLOCK); - if (ret == -EINTR) - goto retry; + ret = __bch2_trans_do(&trans, NULL, &journal_seq, + BTREE_INSERT_NOUNLOCK, + bch2_rename_trans(&trans, + src_dir->v.i_ino, &src_dir_u, + dst_dir->v.i_ino, &dst_dir_u, + &src_inode_u, + &dst_inode_u, + &src_dentry->d_name, + &dst_dentry->d_name, + mode)); if (unlikely(ret)) goto err; @@ -728,7 +714,7 @@ retry: bch2_setattr_copy(inode, &inode_u, attr); if (attr->ia_valid & ATTR_MODE) { - ret = bch2_acl_chmod(&trans, inode, inode_u.bi_mode, &acl); + ret = bch2_acl_chmod(&trans, &inode_u, inode_u.bi_mode, &acl); if (ret) goto btree_err; } @@ -909,6 +895,8 @@ retry: while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k)) && bkey_cmp(iter->pos, end) < 0) { + enum btree_id data_btree = BTREE_ID_extents; + if (!bkey_extent_is_data(k.k) && k.k->type != KEY_TYPE_reservation) { bch2_btree_iter_next(iter); @@ -921,7 +909,7 @@ retry: bch2_bkey_buf_reassemble(&cur, c, k); - ret = bch2_read_indirect_extent(&trans, + ret = bch2_read_indirect_extent(&trans, &data_btree, &offset_into_extent, &cur); if (ret) break; @@ -1154,7 +1142,6 @@ static void bch2_vfs_inode_init(struct bch_fs *c, inode->ei_flags = 0; inode->ei_journal_seq = 0; inode->ei_quota_reserved = 0; - inode->ei_str_hash = bch2_hash_info_init(c, bi); inode->ei_qid = bch_qid(bi); inode->v.i_mapping->a_ops = &bch_address_space_operations; diff --git a/libbcachefs/fs.h b/libbcachefs/fs.h index 3df85ffb..2d82ed7d 100644 --- a/libbcachefs/fs.h +++ b/libbcachefs/fs.h @@ -45,8 +45,6 @@ struct bch_inode_info { struct mutex ei_quota_lock; struct bch_qid ei_qid; - struct bch_hash_info ei_str_hash; - /* copy of inode in btree: */ struct bch_inode_unpacked ei_inode; }; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index d95e23d8..4fcc2c71 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1635,8 +1635,8 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); - iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - rbio->pos, BTREE_ITER_SLOTS); + iter = bch2_trans_get_iter(&trans, rbio->data_btree, + rbio->read_pos, BTREE_ITER_SLOTS); retry: rbio->bio.bi_status = 0; @@ -1650,14 +1650,17 @@ retry: if (!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, - rbio->pos.offset - + rbio->data_pos.offset - rbio->pick.crc.offset)) { /* extent we wanted to read no longer exists: */ rbio->hole = true; goto out; } - ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, 0, failed, flags); + ret = __bch2_read_extent(&trans, rbio, bvec_iter, + rbio->read_pos, + rbio->data_btree, + k, 0, failed, flags); if (ret == READ_RETRY) goto retry; if (ret) @@ -1672,82 +1675,6 @@ err: goto out; } -static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, - struct bvec_iter bvec_iter, u64 inode, - struct bch_io_failures *failed, unsigned flags) -{ - struct btree_trans trans; - struct btree_iter *iter; - struct bkey_buf sk; - struct bkey_s_c k; - int ret; - - flags &= ~BCH_READ_LAST_FRAGMENT; - flags |= BCH_READ_MUST_CLONE; - - bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, 0, 0); -retry: - bch2_trans_begin(&trans); - - for_each_btree_key(&trans, iter, BTREE_ID_extents, - POS(inode, bvec_iter.bi_sector), - BTREE_ITER_SLOTS, k, ret) { - unsigned bytes, sectors, offset_into_extent; - - bch2_bkey_buf_reassemble(&sk, c, k); - - offset_into_extent = iter->pos.offset - - bkey_start_offset(k.k); - sectors = k.k->size - offset_into_extent; - - ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, &sk); - if (ret) - break; - - k = bkey_i_to_s_c(sk.k); - - sectors = min(sectors, k.k->size - offset_into_extent); - - bch2_trans_unlock(&trans); - - bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; - swap(bvec_iter.bi_size, bytes); - - ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, - offset_into_extent, failed, flags); - switch (ret) { - case READ_RETRY: - goto retry; - case READ_ERR: - goto err; - }; - - if (bytes == bvec_iter.bi_size) - goto out; - - swap(bvec_iter.bi_size, bytes); - bio_advance_iter(&rbio->bio, &bvec_iter, bytes); - } - - if (ret == -EINTR) - goto retry; - /* - * If we get here, it better have been because there was an error - * reading a btree node - */ - BUG_ON(!ret); - bch_err_inum_ratelimited(c, inode, - "read error %i from btree lookup", ret); -err: - rbio->bio.bi_status = BLK_STS_IOERR; -out: - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); - bch2_rbio_done(rbio); -} - static void bch2_rbio_retry(struct work_struct *work) { struct bch_read_bio *rbio = @@ -1755,7 +1682,7 @@ static void bch2_rbio_retry(struct work_struct *work) struct bch_fs *c = rbio->c; struct bvec_iter iter = rbio->bvec_iter; unsigned flags = rbio->flags; - u64 inode = rbio->pos.inode; + u64 inode = rbio->read_pos.inode; struct bch_io_failures failed = { .nr = 0 }; trace_read_retry(&rbio->bio); @@ -1770,10 +1697,14 @@ static void bch2_rbio_retry(struct work_struct *work) flags |= BCH_READ_IN_RETRY; flags &= ~BCH_READ_MAY_PROMOTE; - if (flags & BCH_READ_NODECODE) + if (flags & BCH_READ_NODECODE) { bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags); - else - bch2_read_retry(c, rbio, iter, inode, &failed, flags); + } else { + flags &= ~BCH_READ_LAST_FRAGMENT; + flags |= BCH_READ_MUST_CLONE; + + __bch2_read(c, rbio, iter, inode, &failed, flags); + } } static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, @@ -1799,7 +1730,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, struct bch_read_bio *rbio) { struct bch_fs *c = rbio->c; - u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset; + u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; struct bch_extent_crc_unpacked new_crc; struct btree_iter *iter = NULL; struct bkey_i *new; @@ -1809,7 +1740,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, if (crc_is_compressed(rbio->pick.crc)) return 0; - iter = bch2_trans_get_iter(trans, BTREE_ID_extents, rbio->pos, + iter = bch2_trans_get_iter(trans, rbio->data_btree, rbio->data_pos, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(iter); if ((ret = bkey_err(k))) @@ -1942,14 +1873,14 @@ csum_err: return; } - bch2_dev_inum_io_error(ca, rbio->pos.inode, (u64) rbio->bvec_iter.bi_sector, + bch2_dev_inum_io_error(ca, rbio->read_pos.inode, (u64) rbio->bvec_iter.bi_sector, "data checksum error: expected %0llx:%0llx got %0llx:%0llx (type %u)", rbio->pick.crc.csum.hi, rbio->pick.crc.csum.lo, csum.hi, csum.lo, crc.csum_type); bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); return; decompression_err: - bch_err_inum_ratelimited(c, rbio->pos.inode, + bch_err_inum_ratelimited(c, rbio->read_pos.inode, "decompression error"); bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); return; @@ -1972,13 +1903,9 @@ static void bch2_read_endio(struct bio *bio) if (!rbio->split) rbio->bio.bi_end_io = rbio->end_io; - /* - * XXX: rbio->pos is not what we want here when reading from indirect - * extents - */ if (bch2_dev_inum_io_err_on(bio->bi_status, ca, - rbio->pos.inode, - rbio->pos.offset, + rbio->read_pos.inode, + rbio->read_pos.offset, "data read error: %s", bch2_blk_status_to_str(bio->bi_status))) { bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); @@ -2043,7 +1970,8 @@ err: } int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, - struct bvec_iter iter, struct bkey_s_c k, + struct bvec_iter iter, struct bpos read_pos, + enum btree_id data_btree, struct bkey_s_c k, unsigned offset_into_extent, struct bch_io_failures *failed, unsigned flags) { @@ -2053,7 +1981,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bch_dev *ca; struct promote_op *promote = NULL; bool bounce = false, read_full = false, narrow_crcs = false; - struct bpos pos = bkey_start_pos(k.k); + struct bpos data_pos = bkey_start_pos(k.k); int pick_ret; if (bkey_extent_is_inline_data(k.k)) { @@ -2129,7 +2057,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, pick.crc.offset || offset_into_extent)); - pos.offset += offset_into_extent; + data_pos.offset += offset_into_extent; pick.ptr.offset += pick.crc.offset + offset_into_extent; offset_into_extent = 0; @@ -2201,7 +2129,9 @@ get_bio: /* XXX: only initialize this if needed */ rbio->devs_have = bch2_bkey_devs(k); rbio->pick = pick; - rbio->pos = pos; + rbio->read_pos = read_pos; + rbio->data_btree = data_btree; + rbio->data_pos = data_pos; rbio->version = k.k->version; rbio->promote = promote; INIT_WORK(&rbio->work, NULL); @@ -2271,6 +2201,9 @@ out: ret = READ_RETRY; } + if (!ret) + goto out_read_done; + return ret; } @@ -2297,23 +2230,17 @@ out_read_done: return 0; } -void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) +void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, + struct bvec_iter bvec_iter, u64 inode, + struct bch_io_failures *failed, unsigned flags) { struct btree_trans trans; struct btree_iter *iter; struct bkey_buf sk; struct bkey_s_c k; - unsigned flags = BCH_READ_RETRY_IF_STALE| - BCH_READ_MAY_PROMOTE| - BCH_READ_USER_MAPPED; int ret; - BUG_ON(rbio->_state); BUG_ON(flags & BCH_READ_NODECODE); - BUG_ON(flags & BCH_READ_IN_RETRY); - - rbio->c = c; - rbio->start_time = local_clock(); bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); @@ -2321,13 +2248,14 @@ retry: bch2_trans_begin(&trans); iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, - POS(inode, rbio->bio.bi_iter.bi_sector), + POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS); while (1) { unsigned bytes, sectors, offset_into_extent; + enum btree_id data_btree = BTREE_ID_extents; bch2_btree_iter_set_pos(iter, - POS(inode, rbio->bio.bi_iter.bi_sector)); + POS(inode, bvec_iter.bi_sector)); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); @@ -2340,7 +2268,7 @@ retry: bch2_bkey_buf_reassemble(&sk, c, k); - ret = bch2_read_indirect_extent(&trans, + ret = bch2_read_indirect_extent(&trans, &data_btree, &offset_into_extent, &sk); if (ret) goto err; @@ -2359,19 +2287,27 @@ retry: */ bch2_trans_unlock(&trans); - bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; - swap(rbio->bio.bi_iter.bi_size, bytes); + bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; + swap(bvec_iter.bi_size, bytes); - if (rbio->bio.bi_iter.bi_size == bytes) + if (bvec_iter.bi_size == bytes) flags |= BCH_READ_LAST_FRAGMENT; - bch2_read_extent(&trans, rbio, k, offset_into_extent, flags); + ret = __bch2_read_extent(&trans, rbio, bvec_iter, iter->pos, + data_btree, k, + offset_into_extent, failed, flags); + switch (ret) { + case READ_RETRY: + goto retry; + case READ_ERR: + goto err; + }; if (flags & BCH_READ_LAST_FRAGMENT) break; - swap(rbio->bio.bi_iter.bi_size, bytes); - bio_advance(&rbio->bio, bytes); + swap(bvec_iter.bi_size, bytes); + bio_advance_iter(&rbio->bio, &bvec_iter, bytes); } out: bch2_trans_exit(&trans); diff --git a/libbcachefs/io.h b/libbcachefs/io.h index 04f6baa1..2ac03c04 100644 --- a/libbcachefs/io.h +++ b/libbcachefs/io.h @@ -117,12 +117,15 @@ int __bch2_read_indirect_extent(struct btree_trans *, unsigned *, struct bkey_buf *); static inline int bch2_read_indirect_extent(struct btree_trans *trans, + enum btree_id *data_btree, unsigned *offset_into_extent, struct bkey_buf *k) { - return k->k->k.type == KEY_TYPE_reflink_p - ? __bch2_read_indirect_extent(trans, offset_into_extent, k) - : 0; + if (k->k->k.type != KEY_TYPE_reflink_p) + return 0; + + *data_btree = BTREE_ID_reflink; + return __bch2_read_indirect_extent(trans, offset_into_extent, k); } enum bch_read_flags { @@ -139,20 +142,37 @@ enum bch_read_flags { }; int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *, - struct bvec_iter, struct bkey_s_c, unsigned, + struct bvec_iter, struct bpos, enum btree_id, + struct bkey_s_c, unsigned, struct bch_io_failures *, unsigned); static inline void bch2_read_extent(struct btree_trans *trans, - struct bch_read_bio *rbio, - struct bkey_s_c k, - unsigned offset_into_extent, - unsigned flags) + struct bch_read_bio *rbio, struct bpos read_pos, + enum btree_id data_btree, struct bkey_s_c k, + unsigned offset_into_extent, unsigned flags) { - __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, k, - offset_into_extent, NULL, flags); + __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, + data_btree, k, offset_into_extent, NULL, flags); } -void bch2_read(struct bch_fs *, struct bch_read_bio *, u64); +void __bch2_read(struct bch_fs *, struct bch_read_bio *, struct bvec_iter, + u64, struct bch_io_failures *, unsigned flags); + +static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, + u64 inode) +{ + struct bch_io_failures failed = { .nr = 0 }; + + BUG_ON(rbio->_state); + + rbio->c = c; + rbio->start_time = local_clock(); + + __bch2_read(c, rbio, rbio->bio.bi_iter, inode, &failed, + BCH_READ_RETRY_IF_STALE| + BCH_READ_MAY_PROMOTE| + BCH_READ_USER_MAPPED); +} static inline struct bch_read_bio *rbio_init(struct bio *bio, struct bch_io_opts opts) diff --git a/libbcachefs/io_types.h b/libbcachefs/io_types.h index b23727d2..e7aca7c9 100644 --- a/libbcachefs/io_types.h +++ b/libbcachefs/io_types.h @@ -58,8 +58,18 @@ struct bch_read_bio { struct bch_devs_list devs_have; struct extent_ptr_decoded pick; - /* start pos of data we read (may not be pos of data we want) */ - struct bpos pos; + + /* + * pos we read from - different from data_pos for indirect extents: + */ + struct bpos read_pos; + + /* + * start pos of data we read (may not be pos of data we want) - for + * promote, narrow extents paths: + */ + enum btree_id data_btree; + struct bpos data_pos; struct bversion version; struct promote_op *promote; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 115433e7..253a27c5 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -209,9 +209,9 @@ void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio) BUG_ON(!m->op.wbio.bio.bi_vcnt); m->ptr = rbio->pick.ptr; - m->offset = rbio->pos.offset - rbio->pick.crc.offset; + m->offset = rbio->data_pos.offset - rbio->pick.crc.offset; m->op.devs_have = rbio->devs_have; - m->op.pos = rbio->pos; + m->op.pos = rbio->data_pos; m->op.version = rbio->version; m->op.crc = rbio->pick.crc; m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; @@ -493,7 +493,9 @@ static int bch2_move_extent(struct btree_trans *trans, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - bch2_read_extent(trans, &io->rbio, k, 0, + bch2_read_extent(trans, &io->rbio, + bkey_start_pos(k.k), + btree_id, k, 0, BCH_READ_NODECODE| BCH_READ_LAST_FRAGMENT); return 0; @@ -532,7 +534,7 @@ static int __bch2_move_data(struct bch_fs *c, stats->data_type = BCH_DATA_user; stats->btree_id = btree_id; - stats->pos = POS_MIN; + stats->pos = start; iter = bch2_trans_get_iter(&trans, btree_id, start, BTREE_ITER_PREFETCH); @@ -647,14 +649,15 @@ out: } int bch2_move_data(struct bch_fs *c, + enum btree_id start_btree_id, struct bpos start_pos, + enum btree_id end_btree_id, struct bpos end_pos, struct bch_ratelimit *rate, struct write_point_specifier wp, - struct bpos start, - struct bpos end, move_pred_fn pred, void *arg, struct bch_move_stats *stats) { struct moving_context ctxt = { .stats = stats }; + enum btree_id id; int ret; closure_init_stack(&ctxt.cl); @@ -663,10 +666,23 @@ int bch2_move_data(struct bch_fs *c, stats->data_type = BCH_DATA_user; - ret = __bch2_move_data(c, &ctxt, rate, wp, start, end, - pred, arg, stats, BTREE_ID_extents) ?: - __bch2_move_data(c, &ctxt, rate, wp, start, end, - pred, arg, stats, BTREE_ID_reflink); + for (id = start_btree_id; + id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1); + id++) { + stats->btree_id = id; + + if (id != BTREE_ID_extents && + id != BTREE_ID_reflink) + continue; + + ret = __bch2_move_data(c, &ctxt, rate, wp, + id == start_btree_id ? start_pos : POS_MIN, + id == end_btree_id ? end_pos : POS_MAX, + pred, arg, stats, id); + if (ret) + break; + } + move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); closure_sync(&ctxt.cl); @@ -680,16 +696,22 @@ int bch2_move_data(struct bch_fs *c, return ret; } +typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *, + struct btree *, struct bch_io_opts *, + struct data_opts *); + static int bch2_move_btree(struct bch_fs *c, - move_pred_fn pred, - void *arg, + enum btree_id start_btree_id, struct bpos start_pos, + enum btree_id end_btree_id, struct bpos end_pos, + move_btree_pred pred, void *arg, struct bch_move_stats *stats) { + bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_trans trans; struct btree_iter *iter; struct btree *b; - unsigned id; + enum btree_id id; struct data_opts data_opts; enum data_cmd cmd; int ret = 0; @@ -698,16 +720,24 @@ static int bch2_move_btree(struct bch_fs *c, stats->data_type = BCH_DATA_btree; - for (id = 0; id < BTREE_ID_NR; id++) { + for (id = start_btree_id; + id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1); + id++) { stats->btree_id = id; - for_each_btree_node(&trans, iter, id, POS_MIN, + for_each_btree_node(&trans, iter, id, + id == start_btree_id ? start_pos : POS_MIN, BTREE_ITER_PREFETCH, b) { + if (kthread && (ret = kthread_should_stop())) + goto out; + + if ((cmp_int(id, end_btree_id) ?: + bkey_cmp(b->key.k.p, end_pos)) > 0) + break; + stats->pos = iter->pos; - switch ((cmd = pred(c, arg, - bkey_i_to_s_c(&b->key), - &io_opts, &data_opts))) { + switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -727,7 +757,7 @@ next: ret = bch2_trans_iter_free(&trans, iter) ?: ret; } - +out: bch2_trans_exit(&trans); return ret; @@ -786,6 +816,36 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, return DATA_REWRITE; } +static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + +static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + +static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + if (!btree_node_need_rewrite(b)) + return DATA_SKIP; + + data_opts->target = 0; + data_opts->nr_replicas = 1; + data_opts->btree_insert_flags = 0; + return DATA_REWRITE; +} + int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) @@ -797,17 +857,20 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); - ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret; + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + rereplicate_btree_pred, c, stats) ?: ret; closure_wait_event(&c->btree_interior_update_wait, !bch2_btree_interior_updates_nr_pending(c)); ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, NULL, - writepoint_hashed((unsigned long) current), - op.start, - op.end, + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + NULL, writepoint_hashed((unsigned long) current), rereplicate_pred, c, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; break; @@ -818,16 +881,26 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret; + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + migrate_btree_pred, &op, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, NULL, - writepoint_hashed((unsigned long) current), - op.start, - op.end, + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + NULL, writepoint_hashed((unsigned long) current), migrate_pred, &op, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; break; + case BCH_DATA_OP_REWRITE_OLD_NODES: + + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + rewrite_old_nodes_pred, &op, stats) ?: ret; + break; default: ret = -EINVAL; } diff --git a/libbcachefs/move.h b/libbcachefs/move.h index b04bc669..403ca695 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -52,9 +52,11 @@ typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, struct bch_io_opts *, struct data_opts *); -int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, +int bch2_move_data(struct bch_fs *, + enum btree_id, struct bpos, + enum btree_id, struct bpos, + struct bch_ratelimit *, struct write_point_specifier, - struct bpos, struct bpos, move_pred_fn, void *, struct bch_move_stats *); diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 0b1faee5..03668e48 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -219,9 +219,11 @@ static int bch2_copygc(struct bch_fs *c) sizeof(h->data[0]), bucket_offset_cmp, NULL); - ret = bch2_move_data(c, &c->copygc_pd.rate, + ret = bch2_move_data(c, + 0, POS_MIN, + BTREE_ID_NR, POS_MAX, + &c->copygc_pd.rate, writepoint_ptr(&c->copygc_write_point), - POS_MIN, POS_MAX, copygc_pred, NULL, &move_stats); diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index e292d7fa..4ae58b68 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -212,11 +212,6 @@ enum opt_type { OPT_BOOL(), \ BCH_SB_PRJQUOTA, false, \ NULL, "Enable project quotas") \ - x(reflink, u8, \ - OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_BOOL(), \ - BCH_SB_REFLINK, true, \ - NULL, "Enable reflink support") \ x(degraded, u8, \ OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 482aca43..19cfd1ca 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -239,10 +239,11 @@ static int bch2_rebalance_thread(void *arg) rebalance_work_reset(c); bch2_move_data(c, + 0, POS_MIN, + BTREE_ID_NR, POS_MAX, /* ratelimiting disabled for now */ NULL, /* &r->pd.rate, */ writepoint_ptr(&c->rebalance_write_point), - POS_MIN, POS_MAX, rebalance_pred, NULL, &r->move_stats); } diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index a2cc0785..e9a6a5f6 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -157,8 +157,10 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, bch2_trans_update(trans, reflink_iter, r_v, 0); r_p = bch2_trans_kmalloc(trans, sizeof(*r_p)); - if (IS_ERR(r_p)) - return PTR_ERR(r_p); + if (IS_ERR(r_p)) { + ret = PTR_ERR(r_p); + goto err; + } orig->k.type = KEY_TYPE_reflink_p; r_p = bkey_i_to_reflink_p(orig); @@ -204,9 +206,6 @@ s64 bch2_remap_range(struct bch_fs *c, u64 src_done, dst_done; int ret = 0, ret2 = 0; - if (!c->opts.reflink) - return -EOPNOTSUPP; - if (!percpu_ref_tryget(&c->writes)) return -EROFS; diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index bf968ebe..6bb5565c 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -121,6 +121,7 @@ void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, const char *name, void *buffer, size_t size, int type) { + struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c_xattr xattr; @@ -128,8 +129,8 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, bch2_trans_init(&trans, c, 0, 0); - iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, - &inode->ei_str_hash, inode->v.i_ino, + iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, &hash, + inode->v.i_ino, &X_SEARCH(type, name, strlen(name)), 0); if (IS_ERR(iter)) { @@ -239,7 +240,7 @@ static int bch2_xattr_emit(struct dentry *dentry, } static int bch2_xattr_list_bcachefs(struct bch_fs *c, - struct bch_inode_info *inode, + struct bch_inode_unpacked *inode, struct xattr_buf *buf, bool all) { @@ -249,12 +250,12 @@ static int bch2_xattr_list_bcachefs(struct bch_fs *c, u64 v; for (id = 0; id < Inode_opt_nr; id++) { - v = bch2_inode_opt_get(&inode->ei_inode, id); + v = bch2_inode_opt_get(inode, id); if (!v) continue; if (!all && - !(inode->ei_inode.bi_fields_set & (1 << id))) + !(inode->bi_fields_set & (1 << id))) continue; ret = __bch2_xattr_emit(prefix, bch2_inode_opts[id], @@ -298,11 +299,11 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (ret) return ret; - ret = bch2_xattr_list_bcachefs(c, inode, &buf, false); + ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false); if (ret) return ret; - ret = bch2_xattr_list_bcachefs(c, inode, &buf, true); + ret = bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true); if (ret) return ret; @@ -326,10 +327,10 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0, - bch2_xattr_set(&trans, inode->v.i_ino, - &inode->ei_str_hash, + bch2_xattr_set(&trans, inode->v.i_ino, &hash, name, value, size, handler->flags, flags)); }