diff --git a/.bcachefs_revision b/.bcachefs_revision index c626bfee..3f01000e 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -6d9ff21de70cf194bd9f783172bd6cac5cbce3eb +6afa1fcb13a8c66b1cafa08027f484a3f846c52d diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index d0c6878b..0cde2638 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -331,8 +331,7 @@ retry: inode_u.bi_mode = mode; ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, 0); + bch2_trans_commit(&trans, NULL, NULL, 0); btree_err: bch2_trans_iter_exit(&trans, &inode_iter); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index a762a31b..c3542d3c 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -1203,11 +1203,15 @@ static int bch2_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; - u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); - u64 end_idx = le64_to_cpu(p.v->idx) + p.k->size + - le32_to_cpu(p.v->back_pad); + u64 idx = le64_to_cpu(p.v->idx); + u64 end = le64_to_cpu(p.v->idx) + p.k->size; int ret = 0; + if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) { + idx -= le32_to_cpu(p.v->front_pad); + end += le32_to_cpu(p.v->back_pad); + } + l = 0; r = c->reflink_gc_nr; while (l < r) { @@ -1220,7 +1224,7 @@ static int bch2_mark_reflink_p(struct btree_trans *trans, r = m; } - while (idx < end_idx && !ret) + while (idx < end && !ret) ret = __bch2_mark_reflink_p(c, p, &idx, flags, l++); return ret; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 684488ef..a2367b3e 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -1126,7 +1126,6 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, op = &w->io->op; bch2_write_op_init(op, c, w->opts); op->target = w->opts.foreground_target; - op_journal_seq_set(op, &inode->ei_journal_seq); op->nr_replicas = nr_replicas; op->res.nr_replicas = nr_replicas; op->write_point = writepoint_hashed(inode->ei_last_dirtied); @@ -1936,7 +1935,6 @@ static long bch2_dio_write_loop(struct dio_write *dio) bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode)); dio->op.end_io = bch2_dio_write_loop_async; dio->op.target = dio->op.opts.foreground_target; - op_journal_seq_set(&dio->op, &inode->ei_journal_seq); dio->op.write_point = writepoint_hashed((unsigned long) current); dio->op.nr_replicas = dio->op.opts.data_replicas; dio->op.subvol = inode->ei_subvol; @@ -2168,27 +2166,33 @@ unlock: /* fsync: */ +/* + * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an + * insert trigger: look up the btree inode instead + */ +static int bch2_flush_inode(struct bch_fs *c, subvol_inum inum) +{ + struct bch_inode_unpacked inode; + int ret; + + if (c->opts.journal_flush_disabled) + return 0; + + ret = bch2_inode_find_by_inum(c, inum, &inode); + if (ret) + return ret; + + return bch2_journal_flush_seq(&c->journal, inode.bi_journal_seq); +} + int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret, ret2; + int ret, ret2 = 0; ret = file_write_and_wait_range(file, start, end); - if (ret) - return ret; - - if (datasync && !(inode->v.i_state & I_DIRTY_DATASYNC)) - goto out; - - ret = sync_inode_metadata(&inode->v, 1); - if (ret) - return ret; -out: - if (!c->opts.journal_flush_disabled) - ret = bch2_journal_flush_seq(&c->journal, - inode->ei_journal_seq); - ret2 = file_check_and_advance_wb_err(file); + ret2 = bch2_flush_inode(c, inode_inum(inode)); return ret ?: ret2; } @@ -2452,7 +2456,7 @@ int bch2_truncate(struct user_namespace *mnt_userns, ret = bch2_fpunch(c, inode_inum(inode), round_up(iattr->ia_size, block_bytes(c)) >> 9, - U64_MAX, &inode->ei_journal_seq, &i_sectors_delta); + U64_MAX, &i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta); if (unlikely(ret)) @@ -2512,7 +2516,6 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len ret = bch2_fpunch(c, inode_inum(inode), discard_start, discard_end, - &inode->ei_journal_seq, &i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta); } @@ -2591,7 +2594,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ret = bch2_fpunch(c, inode_inum(inode), offset >> 9, (offset + len) >> 9, - &inode->ei_journal_seq, &i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta); @@ -2695,8 +2697,7 @@ reassemble: ret = bch2_btree_iter_traverse(&del) ?: bch2_trans_update(&trans, &del, &delete, trigger_flags) ?: bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?: - bch2_trans_commit(&trans, &disk_res, - &inode->ei_journal_seq, + bch2_trans_commit(&trans, &disk_res, NULL, BTREE_INSERT_NOFAIL); bch2_disk_reservation_put(c, &disk_res); @@ -2807,7 +2808,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, ret = bch2_extent_update(&trans, inode_inum(inode), &iter, &reservation.k_i, - &disk_res, &inode->ei_journal_seq, + &disk_res, NULL, 0, &i_sectors_delta, true); i_sectors_acct(c, inode, "a_res, i_sectors_delta); bkey_err: @@ -3011,7 +3012,6 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, inode_inum(dst), pos_dst >> 9, inode_inum(src), pos_src >> 9, aligned_len >> 9, - &dst->ei_journal_seq, pos_dst + len, &i_sectors_delta); if (ret < 0) goto err; @@ -3029,10 +3029,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, i_size_write(&dst->v, pos_dst + ret); spin_unlock(&dst->v.i_lock); - if (((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || - IS_SYNC(file_inode(file_dst))) && - !c->opts.journal_flush_disabled) - ret = bch2_journal_flush_seq(&c->journal, dst->ei_journal_seq); + if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || + IS_SYNC(file_inode(file_dst))) + ret = bch2_flush_inode(c, inode_inum(dst)); err: bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 9c5d16c6..74de7bc1 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -40,25 +40,6 @@ static void bch2_vfs_inode_init(struct bch_fs *, subvol_inum, struct bch_inode_info *, struct bch_inode_unpacked *); -static void journal_seq_copy(struct bch_fs *c, - struct bch_inode_info *dst, - u64 journal_seq) -{ - /* - * atomic64_cmpxchg has a fallback for archs that don't support it, - * cmpxchg does not: - */ - atomic64_t *dst_seq = (void *) &dst->ei_journal_seq; - u64 old, v = READ_ONCE(dst->ei_journal_seq); - - do { - old = v; - - if (old >= journal_seq) - break; - } while ((v = atomic64_cmpxchg(dst_seq, old, journal_seq)) != old); -} - static void __pagecache_lock_put(struct pagecache_lock *lock, long i) { BUG_ON(atomic_long_read(&lock->v) == 0); @@ -151,9 +132,7 @@ retry: BTREE_ITER_INTENT) ?: (set ? set(inode, &inode_u, p) : 0) ?: bch2_inode_write(&trans, &iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, - BTREE_INSERT_NOFAIL); + bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); /* * the btree node lock protects inode->ei_inode, not ei_update_lock; @@ -328,7 +307,6 @@ err_before_quota: if (!(flags & BCH_CREATE_TMPFILE)) { bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(c, dir, journal_seq); mutex_unlock(&dir->ei_update_lock); } @@ -336,7 +314,6 @@ err_before_quota: inum.inum = inode_u.bi_inum; bch2_vfs_inode_init(c, inum, inode, &inode_u); - journal_seq_copy(c, inode, journal_seq); set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl); @@ -361,7 +338,6 @@ err_before_quota: * We raced, another process pulled the new inode into cache * before us: */ - journal_seq_copy(c, old, journal_seq); make_bad_inode(&inode->v); iput(&inode->v); @@ -445,7 +421,7 @@ static int __bch2_link(struct bch_fs *c, mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0, + ret = __bch2_trans_do(&trans, NULL, NULL, 0, bch2_link_trans(&trans, inode_inum(dir), &dir_u, inode_inum(inode), &inode_u, @@ -454,7 +430,6 @@ static int __bch2_link(struct bch_fs *c, if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); - journal_seq_copy(c, inode, dir->ei_journal_seq); bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME); @@ -497,7 +472,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq, + ret = __bch2_trans_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL, bch2_unlink_trans(&trans, inode_inum(dir), &dir_u, @@ -507,7 +482,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, if (likely(!ret)) { BUG_ON(inode_u.bi_inum != inode->v.i_ino); - journal_seq_copy(c, inode, dir->ei_journal_seq); bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); bch2_inode_update_after_write(c, inode, &inode_u, @@ -549,8 +523,6 @@ static int bch2_symlink(struct user_namespace *mnt_userns, if (unlikely(ret)) goto err; - journal_seq_copy(c, dir, inode->ei_journal_seq); - ret = __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) goto err; @@ -585,7 +557,6 @@ static int bch2_rename2(struct user_namespace *mnt_userns, ? BCH_RENAME_EXCHANGE : dst_dentry->d_inode ? BCH_RENAME_OVERWRITE : BCH_RENAME; - u64 journal_seq = 0; int ret; if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE)) @@ -625,7 +596,7 @@ static int bch2_rename2(struct user_namespace *mnt_userns, goto err; } - ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0, + ret = __bch2_trans_do(&trans, NULL, NULL, 0, bch2_rename_trans(&trans, inode_inum(src_dir), &src_dir_u, inode_inum(dst_dir), &dst_dir_u, @@ -643,23 +614,17 @@ static int bch2_rename2(struct user_namespace *mnt_userns, bch2_inode_update_after_write(c, src_dir, &src_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(c, src_dir, journal_seq); - if (src_dir != dst_dir) { + if (src_dir != dst_dir) bch2_inode_update_after_write(c, dst_dir, &dst_dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(c, dst_dir, journal_seq); - } bch2_inode_update_after_write(c, src_inode, &src_inode_u, ATTR_CTIME); - journal_seq_copy(c, src_inode, journal_seq); - if (dst_inode) { + if (dst_inode) bch2_inode_update_after_write(c, dst_inode, &dst_inode_u, ATTR_CTIME); - journal_seq_copy(c, dst_inode, journal_seq); - } err: bch2_trans_exit(&trans); @@ -766,8 +731,7 @@ retry: } ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, + bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL); btree_err: bch2_trans_iter_exit(&trans, &inode_iter); @@ -1201,7 +1165,6 @@ static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum, inode->v.i_size = bi->bi_size; inode->ei_flags = 0; - inode->ei_journal_seq = bi->bi_journal_seq; inode->ei_quota_reserved = 0; inode->ei_qid = bch_qid(bi); inode->ei_subvol = inum.subvol; @@ -1240,7 +1203,6 @@ static struct inode *bch2_alloc_inode(struct super_block *sb) mutex_init(&inode->ei_update_lock); pagecache_lock_init(&inode->ei_pagecache_lock); mutex_init(&inode->ei_quota_lock); - inode->ei_journal_seq = 0; return &inode->v; } diff --git a/libbcachefs/fs.h b/libbcachefs/fs.h index bf62e80f..40212b3d 100644 --- a/libbcachefs/fs.h +++ b/libbcachefs/fs.h @@ -36,7 +36,6 @@ struct bch_inode_info { unsigned long ei_flags; struct mutex ei_update_lock; - u64 ei_journal_seq; u64 ei_quota_reserved; unsigned long ei_last_dirtied; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 701e9d0e..7c9ea91d 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -376,7 +376,7 @@ int bch2_extent_update(struct btree_trans *trans, */ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, subvol_inum inum, u64 end, - u64 *journal_seq, s64 *i_sectors_delta) + s64 *i_sectors_delta) { struct bch_fs *c = trans->c; unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); @@ -414,7 +414,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, bch2_cut_back(end_pos, &delete); ret = bch2_extent_update(trans, inum, iter, &delete, - &disk_res, journal_seq, + &disk_res, NULL, 0, i_sectors_delta, false); bch2_disk_reservation_put(c, &disk_res); btree_err: @@ -433,7 +433,7 @@ btree_err: } int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, - u64 *journal_seq, s64 *i_sectors_delta) + s64 *i_sectors_delta) { struct btree_trans trans; struct btree_iter iter; @@ -444,8 +444,7 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, POS(inum.inum, start), BTREE_ITER_INTENT); - ret = bch2_fpunch_at(&trans, &iter, inum, end, - journal_seq, i_sectors_delta); + ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta); bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); diff --git a/libbcachefs/io.h b/libbcachefs/io.h index 38efd39c..fbe46660 100644 --- a/libbcachefs/io.h +++ b/libbcachefs/io.h @@ -48,12 +48,6 @@ static inline u64 *op_journal_seq(struct bch_write_op *op) ? op->journal_seq_p : &op->journal_seq; } -static inline void op_journal_seq_set(struct bch_write_op *op, u64 *journal_seq) -{ - op->journal_seq_p = journal_seq; - op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR; -} - static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) { return op->alloc_reserve == RESERVE_MOVINGGC @@ -68,8 +62,8 @@ int bch2_extent_update(struct btree_trans *, subvol_inum, struct disk_reservation *, u64 *, u64, s64 *, bool); int bch2_fpunch_at(struct btree_trans *, struct btree_iter *, - subvol_inum, u64, u64 *, s64 *); -int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, u64 *, s64 *); + subvol_inum, u64, s64 *); +int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *); int bch2_write_index_default(struct bch_write_op *); diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 8e66e639..d003f408 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -210,7 +210,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) s64 bch2_remap_range(struct bch_fs *c, subvol_inum dst_inum, u64 dst_offset, subvol_inum src_inum, u64 src_offset, - u64 remap_sectors, u64 *journal_seq, + u64 remap_sectors, u64 new_i_size, s64 *i_sectors_delta) { struct btree_trans trans; @@ -281,7 +281,7 @@ s64 bch2_remap_range(struct bch_fs *c, min(dst_end.offset, dst_iter.pos.offset + src_iter.pos.offset - src_want.offset), - journal_seq, i_sectors_delta); + i_sectors_delta); continue; } @@ -320,7 +320,7 @@ s64 bch2_remap_range(struct bch_fs *c, dst_end.offset - dst_iter.pos.offset)); ret = bch2_extent_update(&trans, dst_inum, &dst_iter, - new_dst.k, &disk_res, journal_seq, + new_dst.k, &disk_res, NULL, new_i_size, i_sectors_delta, true); bch2_disk_reservation_put(c, &disk_res); @@ -347,7 +347,7 @@ s64 bch2_remap_range(struct bch_fs *c, inode_u.bi_size < new_i_size) { inode_u.bi_size = new_i_size; ret2 = bch2_inode_write(&trans, &inode_iter, &inode_u) ?: - bch2_trans_commit(&trans, NULL, journal_seq, 0); + bch2_trans_commit(&trans, NULL, NULL, 0); } bch2_trans_iter_exit(&trans, &inode_iter); diff --git a/libbcachefs/reflink.h b/libbcachefs/reflink.h index 4c1b8286..3745873f 100644 --- a/libbcachefs/reflink.h +++ b/libbcachefs/reflink.h @@ -58,6 +58,6 @@ static inline __le64 *bkey_refcount(struct bkey_i *k) } s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64, - subvol_inum, u64, u64, u64 *, u64, s64 *); + subvol_inum, u64, u64, u64, s64 *); #endif /* _BCACHEFS_REFLINK_H */ diff --git a/libbcachefs/super.c b/libbcachefs/super.c index dc8f6415..3744b6d5 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -588,48 +588,53 @@ void bch2_fs_stop(struct bch_fs *c) bch2_fs_free(c); } -static const char *bch2_fs_online(struct bch_fs *c) +static int bch2_fs_online(struct bch_fs *c) { struct bch_dev *ca; - const char *err = NULL; unsigned i; - int ret; + int ret = 0; lockdep_assert_held(&bch_fs_list_lock); - if (!list_empty(&c->list)) - return NULL; - - if (__bch2_uuid_to_fs(c->sb.uuid)) - return "filesystem UUID already open"; + if (__bch2_uuid_to_fs(c->sb.uuid)) { + bch_err(c, "filesystem UUID already open"); + return -EINVAL; + } ret = bch2_fs_chardev_init(c); - if (ret) - return "error creating character device"; + if (ret) { + bch_err(c, "error creating character device"); + return ret; + } bch2_fs_debug_init(c); - if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) || - kobject_add(&c->internal, &c->kobj, "internal") || - kobject_add(&c->opts_dir, &c->kobj, "options") || - kobject_add(&c->time_stats, &c->kobj, "time_stats") || - bch2_opts_create_sysfs_files(&c->opts_dir)) - return "error creating sysfs objects"; + ret = kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ?: + kobject_add(&c->internal, &c->kobj, "internal") ?: + kobject_add(&c->opts_dir, &c->kobj, "options") ?: + kobject_add(&c->time_stats, &c->kobj, "time_stats") ?: + bch2_opts_create_sysfs_files(&c->opts_dir); + if (ret) { + bch_err(c, "error creating sysfs objects"); + return ret; + } down_write(&c->state_lock); - err = "error creating sysfs objects"; - for_each_member_device(ca, c, i) - if (bch2_dev_sysfs_online(c, ca)) { + for_each_member_device(ca, c, i) { + ret = bch2_dev_sysfs_online(c, ca); + if (ret) { + bch_err(c, "error creating sysfs objects"); percpu_ref_put(&ca->ref); goto err; } + } + BUG_ON(!list_empty(&c->list)); list_add(&c->list, &bch_fs_list); - err = NULL; err: up_write(&c->state_lock); - return err; + return ret; } static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) @@ -637,13 +642,15 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) struct bch_sb_field_members *mi; struct bch_fs *c; unsigned i, iter_size; - const char *err; + int ret = 0; pr_verbose_init(opts, ""); c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO); - if (!c) + if (!c) { + c = ERR_PTR(-ENOMEM); goto out; + } __module_get(THIS_MODULE); @@ -724,18 +731,17 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_init(&c->sectors_available_lock); - if (percpu_init_rwsem(&c->mark_lock)) + ret = percpu_init_rwsem(&c->mark_lock); + if (ret) goto err; mutex_lock(&c->sb_lock); - - if (bch2_sb_to_fs(c, sb)) { - mutex_unlock(&c->sb_lock); - goto err; - } - + ret = bch2_sb_to_fs(c, sb); mutex_unlock(&c->sb_lock); + if (ret) + goto err; + scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid); c->opts = bch2_opts_default; @@ -745,8 +751,11 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->block_bits = ilog2(c->opts.block_size); c->btree_foreground_merge_threshold = BTREE_FOREGROUND_MERGE_THRESHOLD(c); - if (bch2_fs_init_fault("fs_alloc")) + if (bch2_fs_init_fault("fs_alloc")) { + bch_err(c, "fs_alloc fault injected"); + ret = -EFAULT; goto err; + } iter_size = sizeof(struct sort_iter) + (btree_blocks(c) + 1) * 2 * @@ -776,21 +785,26 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) btree_bytes(c)) || mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) || !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits, - sizeof(u64), GFP_KERNEL)) || - bch2_io_clock_init(&c->io_clock[READ]) || - bch2_io_clock_init(&c->io_clock[WRITE]) || - bch2_fs_journal_init(&c->journal) || - bch2_fs_replicas_init(c) || - bch2_fs_btree_cache_init(c) || - bch2_fs_btree_key_cache_init(&c->btree_key_cache) || - bch2_fs_btree_iter_init(c) || - bch2_fs_btree_interior_update_init(c) || - bch2_fs_subvolumes_init(c) || - bch2_fs_io_init(c) || - bch2_fs_encryption_init(c) || - bch2_fs_compress_init(c) || - bch2_fs_ec_init(c) || - bch2_fs_fsio_init(c)) + sizeof(u64), GFP_KERNEL))) { + ret = -ENOMEM; + goto err; + } + + ret = bch2_io_clock_init(&c->io_clock[READ]) ?: + bch2_io_clock_init(&c->io_clock[WRITE]) ?: + bch2_fs_journal_init(&c->journal) ?: + bch2_fs_replicas_init(c) ?: + bch2_fs_btree_cache_init(c) ?: + bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: + bch2_fs_btree_iter_init(c) ?: + bch2_fs_btree_interior_update_init(c) ?: + bch2_fs_subvolumes_init(c) ?: + bch2_fs_io_init(c) ?: + bch2_fs_encryption_init(c) ?: + bch2_fs_compress_init(c) ?: + bch2_fs_ec_init(c) ?: + bch2_fs_fsio_init(c); + if (ret) goto err; if (c->opts.nochanges) @@ -799,8 +813,10 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mi = bch2_sb_get_members(c->disk_sb.sb); for (i = 0; i < c->sb.nr_devices; i++) if (bch2_dev_exists(c->disk_sb.sb, mi, i) && - bch2_dev_alloc(c, i)) + bch2_dev_alloc(c, i)) { + ret = -EEXIST; goto err; + } bch2_journal_entry_res_resize(&c->journal, &c->btree_root_journal_res, @@ -811,18 +827,17 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) (sizeof(struct jset_entry_clock) / sizeof(u64)) * 2); mutex_lock(&bch_fs_list_lock); - err = bch2_fs_online(c); + ret = bch2_fs_online(c); mutex_unlock(&bch_fs_list_lock); - if (err) { - bch_err(c, "bch2_fs_online() error: %s", err); + + if (ret) goto err; - } out: - pr_verbose_init(opts, "ret %i", c ? 0 : -ENOMEM); + pr_verbose_init(opts, "ret %i", PTR_ERR_OR_ZERO(c)); return c; err: bch2_fs_free(c); - c = NULL; + c = ERR_PTR(ret); goto out; } @@ -862,7 +877,6 @@ static void print_mount_opts(struct bch_fs *c) int bch2_fs_start(struct bch_fs *c) { - const char *err = "cannot allocate memory"; struct bch_sb_field_members *mi; struct bch_dev *ca; time64_t now = ktime_get_real_seconds(); @@ -898,10 +912,11 @@ int bch2_fs_start(struct bch_fs *c) if (ret) goto err; - err = "dynamic fault"; ret = -EINVAL; - if (bch2_fs_init_fault("fs_start")) + if (bch2_fs_init_fault("fs_start")) { + bch_err(c, "fs_start fault injected"); goto err; + } set_bit(BCH_FS_STARTED, &c->flags); @@ -922,7 +937,6 @@ int bch2_fs_start(struct bch_fs *c) if (c->opts.read_only || c->opts.nochanges) { bch2_fs_read_only(c); } else { - err = "error going read write"; ret = !test_bit(BCH_FS_RW, &c->flags) ? bch2_fs_read_write(c) : bch2_fs_read_write_late(c); @@ -940,25 +954,22 @@ err: case BCH_FSCK_ERRORS_NOT_FIXED: bch_err(c, "filesystem contains errors: please report this to the developers"); pr_cont("mount with -o fix_errors to repair\n"); - err = "fsck error"; break; case BCH_FSCK_REPAIR_UNIMPLEMENTED: bch_err(c, "filesystem contains errors: please report this to the developers"); pr_cont("repair unimplemented: inform the developers so that it can be added\n"); - err = "fsck error"; break; case BCH_FSCK_REPAIR_IMPOSSIBLE: bch_err(c, "filesystem contains errors, but repair impossible"); - err = "fsck error"; break; case BCH_FSCK_UNKNOWN_VERSION: - err = "unknown metadata version";; + bch_err(c, "unknown metadata version"); break; case -ENOMEM: - err = "cannot allocate memory"; + bch_err(c, "cannot allocate memory"); break; case -EIO: - err = "IO error"; + bch_err(c, "IO error"); break; } @@ -1378,7 +1389,7 @@ static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) bch2_copygc_start(c); } -static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) +static int __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) { lockdep_assert_held(&c->state_lock); @@ -1387,10 +1398,7 @@ static const char *__bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); - if (bch2_dev_allocator_start(ca)) - return "error starting allocator thread"; - - return NULL; + return bch2_dev_allocator_start(ca); } int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, @@ -1416,9 +1424,8 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, bch2_write_super(c); mutex_unlock(&c->sb_lock); - if (new_state == BCH_MEMBER_STATE_rw && - __bch2_dev_read_write(c, ca)) - ret = -ENOMEM; + if (new_state == BCH_MEMBER_STATE_rw) + ret = __bch2_dev_read_write(c, ca); rebalance_wakeup(c); @@ -1702,8 +1709,8 @@ have_slot: goto err_late; if (ca->mi.state == BCH_MEMBER_STATE_rw) { - err = __bch2_dev_read_write(c, ca); - if (err) + ret = __bch2_dev_read_write(c, ca); + if (ret) goto err_late; } @@ -1747,24 +1754,27 @@ int bch2_dev_online(struct bch_fs *c, const char *path) dev_idx = sb.sb->dev_idx; err = bch2_dev_in_fs(c->disk_sb.sb, sb.sb); - if (err) - goto err; - - if (bch2_dev_attach_bdev(c, &sb)) { - err = "bch2_dev_attach_bdev() error"; + if (err) { + bch_err(c, "error bringing %s online: %s", path, err); goto err; } + ret = bch2_dev_attach_bdev(c, &sb); + if (ret) + goto err; + ca = bch_dev_locked(c, dev_idx); - if (bch2_trans_mark_dev_sb(c, ca)) { - err = "bch2_trans_mark_dev_sb() error"; + ret = bch2_trans_mark_dev_sb(c, ca); + if (ret) { + bch_err(c, "error bringing %s online: error %i from bch2_trans_mark_dev_sb", + path, ret); goto err; } if (ca->mi.state == BCH_MEMBER_STATE_rw) { - err = __bch2_dev_read_write(c, ca); - if (err) + ret = __bch2_dev_read_write(c, ca); + if (ret) goto err; } @@ -1782,7 +1792,6 @@ int bch2_dev_online(struct bch_fs *c, const char *path) err: up_write(&c->state_lock); bch2_free_super(&sb); - bch_err(c, "error bringing %s online: %s", path, err); return -EINVAL; } @@ -1886,7 +1895,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, struct bch_sb_field_members *mi; unsigned i, best_sb = 0; const char *err; - int ret = -ENOMEM; + int ret = 0; pr_verbose_init(opts, ""); @@ -1901,8 +1910,10 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL); - if (!sb) + if (!sb) { + ret = -ENOMEM; goto err; + } for (i = 0; i < nr_devices; i++) { ret = bch2_read_super(devices[i], &opts, &sb[i]); @@ -1939,18 +1950,20 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, i++; } - ret = -ENOMEM; c = bch2_fs_alloc(sb[best_sb].sb, opts); - if (!c) + if (IS_ERR(c)) { + ret = PTR_ERR(c); goto err; + } - err = "bch2_dev_online() error"; down_write(&c->state_lock); - for (i = 0; i < nr_devices; i++) - if (bch2_dev_attach_bdev(c, &sb[i])) { + for (i = 0; i < nr_devices; i++) { + ret = bch2_dev_attach_bdev(c, &sb[i]); + if (ret) { up_write(&c->state_lock); - goto err_print; + goto err; } + } up_write(&c->state_lock); err = "insufficient devices"; @@ -1973,10 +1986,11 @@ err_print: devices[0], err); ret = -EINVAL; err: - if (c) + if (!IS_ERR_OR_NULL(c)) bch2_fs_stop(c); - for (i = 0; i < nr_devices; i++) - bch2_free_super(&sb[i]); + if (sb) + for (i = 0; i < nr_devices; i++) + bch2_free_super(&sb[i]); c = ERR_PTR(ret); goto out; } @@ -2002,12 +2016,12 @@ static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb, if (err) goto err; } else { - c = bch2_fs_alloc(sb->sb, opts); - err = "cannot allocate memory"; - if (!c) - goto err; - allocated_fs = true; + c = bch2_fs_alloc(sb->sb, opts); + + err = "bch2_fs_alloc() error"; + if (IS_ERR(c)) + goto err; } err = "bch2_dev_online() error"; @@ -2033,7 +2047,7 @@ static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb, err: mutex_unlock(&bch_fs_list_lock); - if (allocated_fs) + if (allocated_fs && !IS_ERR(c)) bch2_fs_stop(c); else if (c) closure_put(&c->cl); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 51eb19b8..864be860 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -155,11 +155,6 @@ read_attribute(congested); read_attribute(btree_avg_write_size); -read_attribute(bucket_quantiles_last_read); -read_attribute(bucket_quantiles_last_write); -read_attribute(bucket_quantiles_fragmentation); -read_attribute(bucket_quantiles_oldest_gen); - read_attribute(reserve_stats); read_attribute(btree_cache_size); read_attribute(compression_stats); @@ -751,76 +746,6 @@ struct attribute *bch2_fs_time_stats_files[] = { NULL }; -typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *, - size_t, void *); - -static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca, - size_t b, void *private) -{ - int rw = (private ? 1 : 0); - - return atomic64_read(&c->io_clock[rw].now) - bucket(ca, b)->io_time[rw]; -} - -static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca, - size_t b, void *private) -{ - struct bucket *g = bucket(ca, b); - return bucket_sectors_used(g->mark); -} - -static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca, - size_t b, void *private) -{ - return bucket_gc_gen(bucket(ca, b)); -} - -static int unsigned_cmp(const void *_l, const void *_r) -{ - const unsigned *l = _l; - const unsigned *r = _r; - - return cmp_int(*l, *r); -} - -static int quantiles_to_text(struct printbuf *out, - struct bch_fs *c, struct bch_dev *ca, - bucket_map_fn *fn, void *private) -{ - size_t i, n; - /* Compute 31 quantiles */ - unsigned q[31], *p; - - down_read(&ca->bucket_lock); - n = ca->mi.nbuckets; - - p = vzalloc(n * sizeof(unsigned)); - if (!p) { - up_read(&ca->bucket_lock); - return -ENOMEM; - } - - for (i = ca->mi.first_bucket; i < n; i++) - p[i] = fn(c, ca, i, private); - - sort(p, n, sizeof(unsigned), unsigned_cmp, NULL); - up_read(&ca->bucket_lock); - - while (n && - !p[n - 1]) - --n; - - for (i = 0; i < ARRAY_SIZE(q); i++) - q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)]; - - vfree(p); - - for (i = 0; i < ARRAY_SIZE(q); i++) - pr_buf(out, "%u ", q[i]); - pr_buf(out, "\n"); - return 0; -} - static void reserve_stats_to_text(struct printbuf *out, struct bch_dev *ca) { enum alloc_reserve i; @@ -982,15 +907,6 @@ SHOW(bch2_dev) clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX) * 100 / CONGESTED_MAX); - if (attr == &sysfs_bucket_quantiles_last_read) - return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 0) ?: out.pos - buf; - if (attr == &sysfs_bucket_quantiles_last_write) - return quantiles_to_text(&out, c, ca, bucket_last_io_fn, (void *) 1) ?: out.pos - buf; - if (attr == &sysfs_bucket_quantiles_fragmentation) - return quantiles_to_text(&out, c, ca, bucket_sectors_used_fn, NULL) ?: out.pos - buf; - if (attr == &sysfs_bucket_quantiles_oldest_gen) - return quantiles_to_text(&out, c, ca, bucket_oldest_gen_fn, NULL) ?: out.pos - buf; - if (attr == &sysfs_reserve_stats) { reserve_stats_to_text(&out, ca); return out.pos - buf; @@ -1082,12 +998,6 @@ struct attribute *bch2_dev_files[] = { &sysfs_io_latency_stats_write, &sysfs_congested, - /* alloc info - other stats: */ - &sysfs_bucket_quantiles_last_read, - &sysfs_bucket_quantiles_last_write, - &sysfs_bucket_quantiles_fragmentation, - &sysfs_bucket_quantiles_oldest_gen, - &sysfs_reserve_stats, /* debug: */ diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index bb5da310..464ed683 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -165,8 +165,24 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, const char *name, const void *value, size_t size, int type, int flags) { + struct btree_iter inode_iter = { NULL }; + struct bch_inode_unpacked inode_u; int ret; + /* + * We need to do an inode update so that bi_journal_sync gets updated + * and fsync works: + * + * Perhaps we should be updating bi_mtime too? + */ + + ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum, BTREE_ITER_INTENT) ?: + bch2_inode_write(trans, &inode_iter, &inode_u); + bch2_trans_iter_exit(trans, &inode_iter); + + if (ret) + return ret; + if (value) { struct bkey_i_xattr *xattr; unsigned namelen = strlen(name); @@ -352,7 +368,7 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); - return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0, + return bch2_trans_do(c, NULL, NULL, 0, bch2_xattr_set(&trans, inode_inum(inode), &hash, name, value, size, handler->flags, flags));