diff --git a/.bcachefs_revision b/.bcachefs_revision index ad735e5a..f4cee9aa 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -83667254ddf04f558c90f32439e36d7a04ac3a39 +297c81ae4d608707fdabedc60158ff1f4fbec257 diff --git a/cmd_device.c b/cmd_device.c index ac586ffe..2f7a7bc1 100644 --- a/cmd_device.c +++ b/cmd_device.c @@ -167,7 +167,12 @@ int cmd_device_show(int argc, char *argv[]) static void disk_ioctl(const char *fs, const char *dev, int cmd, int flags) { - struct bch_ioctl_disk i = { .flags = flags, .dev = (__u64) dev, }; + struct bch_ioctl_disk i = { .flags = flags, }; + + if (!kstrtoull(dev, 10, &i.dev)) + i.flags |= BCH_BY_INDEX; + else + i.dev = (u64) dev; xioctl(bcache_fs_open(fs).ioctl_fd, cmd, &i); } @@ -435,11 +440,16 @@ int cmd_device_set_state(int argc, char *argv[]) struct bch_ioctl_disk_set_state i = { .flags = flags, - .dev = (__u64) argv[optind + 1], .new_state = read_string_list_or_die(argv[optind + 2], bch2_dev_state, "device state"), }; + const char *dev = argv[optind + 1]; + if (!kstrtoull(dev, 10, &i.dev)) + i.flags |= BCH_BY_INDEX; + else + i.dev = (u64) dev; + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_SET_STATE, &i); return 0; } diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h index 2218a00b..22d6845e 100644 --- a/libbcachefs/bcachefs_ioctl.h +++ b/libbcachefs/bcachefs_ioctl.h @@ -17,7 +17,7 @@ extern "C" { (BCH_FORCE_IF_DATA_DEGRADED| \ BCH_FORCE_IF_METADATA_DEGRADED) -#define BCH_BY_UUID (1 << 4) +#define BCH_BY_INDEX (1 << 4) /* global control dev: */ diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 46612c10..d827692b 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1288,6 +1288,9 @@ static void btree_node_write_endio(struct bio *bio) bch2_meta_write_fault("btree")) set_btree_node_write_error(b); + if (wbio->have_io_ref) + percpu_ref_put(&ca->io_ref); + if (wbio->bounce) btree_bounce_free(c, wbio->order, @@ -1304,9 +1307,6 @@ static void btree_node_write_endio(struct bio *bio) if (cl) closure_put(cl); } - - if (wbio->have_io_ref) - percpu_ref_put(&ca->io_ref); } void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index 24b92a29..d3cfb00b 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -12,6 +12,51 @@ #include #include +/* returns with ref on ca->ref */ +static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, + unsigned flags) +{ + struct bch_dev *ca; + + if (flags & BCH_BY_INDEX) { + if (dev >= c->sb.nr_devices) + return ERR_PTR(-EINVAL); + + rcu_read_lock(); + ca = c->devs[dev]; + if (ca) + percpu_ref_get(&ca->ref); + rcu_read_unlock(); + + if (!ca) + return ERR_PTR(-EINVAL); + } else { + struct block_device *bdev; + char *path; + unsigned i; + + path = strndup_user((const char __user *) + (unsigned long) dev, PATH_MAX); + if (!path) + return ERR_PTR(-ENOMEM); + + bdev = lookup_bdev(strim(path)); + kfree(path); + if (IS_ERR(bdev)) + return ERR_CAST(bdev); + + for_each_member_device(ca, c, i) + if (ca->disk_sb.bdev == bdev) + goto found; + + ca = NULL; +found: + bdput(bdev); + } + + return ca; +} + static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg) { struct bch_ioctl_assemble arg; @@ -110,13 +155,8 @@ static long bch2_ioctl_query_uuid(struct bch_fs *c, sizeof(c->sb.user_uuid)); } -static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start __user *user_arg) +static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg) { - struct bch_ioctl_start arg; - - if (copy_from_user(&arg, user_arg, sizeof(arg))) - return -EFAULT; - if (arg.flags || arg.pad) return -EINVAL; @@ -129,60 +169,11 @@ static long bch2_ioctl_stop(struct bch_fs *c) return 0; } -/* returns with ref on ca->ref */ -static struct bch_dev *bch2_device_lookup(struct bch_fs *c, - const char __user *dev) +static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) { - struct block_device *bdev; - struct bch_dev *ca; - char *path; - unsigned i; - - path = strndup_user(dev, PATH_MAX); - if (!path) - return ERR_PTR(-ENOMEM); - - bdev = lookup_bdev(strim(path)); - kfree(path); - if (IS_ERR(bdev)) - return ERR_CAST(bdev); - - for_each_member_device(ca, c, i) - if (ca->disk_sb.bdev == bdev) - goto found; - - ca = NULL; -found: - bdput(bdev); - return ca; -} - -#if 0 -static struct bch_member *bch2_uuid_lookup(struct bch_fs *c, uuid_le uuid) -{ - struct bch_sb_field_members *mi = bch2_sb_get_members(c->disk_sb); - unsigned i; - - lockdep_assert_held(&c->sb_lock); - - for (i = 0; i < c->disk_sb->nr_devices; i++) - if (!memcmp(&mi->members[i].uuid, &uuid, sizeof(uuid))) - return &mi->members[i]; - - return NULL; -} -#endif - -static long bch2_ioctl_disk_add(struct bch_fs *c, - struct bch_ioctl_disk __user *user_arg) -{ - struct bch_ioctl_disk arg; char *path; int ret; - if (copy_from_user(&arg, user_arg, sizeof(arg))) - return -EFAULT; - if (arg.flags || arg.pad) return -EINVAL; @@ -196,32 +187,29 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, return ret; } -static long bch2_ioctl_disk_remove(struct bch_fs *c, - struct bch_ioctl_disk __user *user_arg) +static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) { - struct bch_ioctl_disk arg; struct bch_dev *ca; - if (copy_from_user(&arg, user_arg, sizeof(arg))) - return -EFAULT; + if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| + BCH_FORCE_IF_METADATA_LOST| + BCH_FORCE_IF_DEGRADED| + BCH_BY_INDEX)) || + arg.pad) + return -EINVAL; - ca = bch2_device_lookup(c, (const char __user *)(unsigned long) arg.dev); + ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); return bch2_dev_remove(c, ca, arg.flags); } -static long bch2_ioctl_disk_online(struct bch_fs *c, - struct bch_ioctl_disk __user *user_arg) +static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) { - struct bch_ioctl_disk arg; char *path; int ret; - if (copy_from_user(&arg, user_arg, sizeof(arg))) - return -EFAULT; - if (arg.flags || arg.pad) return -EINVAL; @@ -234,20 +222,19 @@ static long bch2_ioctl_disk_online(struct bch_fs *c, return ret; } -static long bch2_ioctl_disk_offline(struct bch_fs *c, - struct bch_ioctl_disk __user *user_arg) +static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) { - struct bch_ioctl_disk arg; struct bch_dev *ca; int ret; - if (copy_from_user(&arg, user_arg, sizeof(arg))) - return -EFAULT; - - if (arg.pad) + if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| + BCH_FORCE_IF_METADATA_LOST| + BCH_FORCE_IF_DEGRADED| + BCH_BY_INDEX)) || + arg.pad) return -EINVAL; - ca = bch2_device_lookup(c, (const char __user *)(unsigned long) arg.dev); + ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); @@ -257,16 +244,19 @@ static long bch2_ioctl_disk_offline(struct bch_fs *c, } static long bch2_ioctl_disk_set_state(struct bch_fs *c, - struct bch_ioctl_disk_set_state __user *user_arg) + struct bch_ioctl_disk_set_state arg) { - struct bch_ioctl_disk_set_state arg; struct bch_dev *ca; int ret; - if (copy_from_user(&arg, user_arg, sizeof(arg))) - return -EFAULT; + if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST| + BCH_FORCE_IF_METADATA_LOST| + BCH_FORCE_IF_DEGRADED| + BCH_BY_INDEX)) || + arg.pad[0] || arg.pad[1] || arg.pad[2]) + return -EINVAL; - ca = bch2_device_lookup(c, (const char __user *)(unsigned long) arg.dev); + ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); @@ -277,16 +267,16 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c, } static long bch2_ioctl_disk_evacuate(struct bch_fs *c, - struct bch_ioctl_disk __user *user_arg) + struct bch_ioctl_disk arg) { - struct bch_ioctl_disk arg; struct bch_dev *ca; int ret; - if (copy_from_user(&arg, user_arg, sizeof(arg))) - return -EFAULT; + if ((arg.flags & ~BCH_BY_INDEX) || + arg.pad) + return -EINVAL; - ca = bch2_device_lookup(c, (const char __user *)(unsigned long) arg.dev); + ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); @@ -296,6 +286,15 @@ static long bch2_ioctl_disk_evacuate(struct bch_fs *c, return ret; } +#define BCH_IOCTL(_name, _argtype) \ +do { \ + _argtype i; \ + \ + if (copy_from_user(&i, arg, sizeof(i))) \ + return -EFAULT; \ + return bch2_ioctl_##_name(c, i); \ +} while (0) + long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) { /* ioctls that don't require admin cap: */ @@ -310,22 +309,22 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) /* ioctls that do require admin cap: */ switch (cmd) { case BCH_IOCTL_START: - return bch2_ioctl_start(c, arg); + BCH_IOCTL(start, struct bch_ioctl_start); case BCH_IOCTL_STOP: return bch2_ioctl_stop(c); case BCH_IOCTL_DISK_ADD: - return bch2_ioctl_disk_add(c, arg); + BCH_IOCTL(disk_add, struct bch_ioctl_disk); case BCH_IOCTL_DISK_REMOVE: - return bch2_ioctl_disk_remove(c, arg); + BCH_IOCTL(disk_remove, struct bch_ioctl_disk); case BCH_IOCTL_DISK_ONLINE: - return bch2_ioctl_disk_online(c, arg); + BCH_IOCTL(disk_online, struct bch_ioctl_disk); case BCH_IOCTL_DISK_OFFLINE: - return bch2_ioctl_disk_offline(c, arg); + BCH_IOCTL(disk_offline, struct bch_ioctl_disk); case BCH_IOCTL_DISK_SET_STATE: - return bch2_ioctl_disk_set_state(c, arg); + BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state); case BCH_IOCTL_DISK_EVACUATE: - return bch2_ioctl_disk_evacuate(c, arg); + BCH_IOCTL(disk_evacuate, struct bch_ioctl_disk); default: return -ENOTTY; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 44082a0e..0a64f35d 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -112,7 +112,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, n->bounce = false; n->split = true; n->put_bio = true; - n->have_io_ref = true; n->bio.bi_opf = wbio->bio.bi_opf; __bio_inc_remaining(n->orig); } else { @@ -127,7 +126,8 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, n->bio.bi_iter.bi_sector = ptr->offset; if (likely(percpu_ref_tryget(&ca->io_ref))) { - n->bio.bi_bdev = ca->disk_sb.bdev; + n->have_io_ref = true; + n->bio.bi_bdev = ca->disk_sb.bdev; generic_make_request(&n->bio); } else { n->have_io_ref = false; @@ -315,9 +315,8 @@ static void bch2_write_endio(struct bio *bio) struct bch_dev *ca = wbio->ca; if (bch2_dev_nonfatal_io_err_on(bio->bi_error, ca, - "data write")) { + "data write")) set_closure_fn(cl, bch2_write_io_error, index_update_wq(op)); - } if (wbio->have_io_ref) percpu_ref_put(&ca->io_ref); diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 7d250df8..0fc680b4 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -170,8 +170,7 @@ redo_peek: /* The node might have already been rewritten: */ - if (b->data->keys.seq == n.seq && - !bkey_cmp(b->key.k.p, n.pos)) { + if (b->data->keys.seq == n.seq) { ret = bch2_btree_node_rewrite(&iter, b, &cl); if (ret) { bch2_btree_iter_unlock(&iter); @@ -255,6 +254,10 @@ bch2_journal_seq_blacklisted_new(struct journal *j, u64 seq) lockdep_assert_held(&j->blacklist_lock); + /* + * When we start the journal, bch2_journal_start() will skip over @seq: + */ + bl = kzalloc(sizeof(*bl), GFP_KERNEL); if (!bl) return NULL; @@ -287,16 +290,6 @@ int bch2_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b) BUG_ON(b->level); BUG_ON(seq > journal_seq && test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)); - if (seq <= journal_seq) { - if (list_empty_careful(&j->seq_blacklist)) - return 0; - - mutex_lock(&j->blacklist_lock); - ret = journal_seq_blacklist_find(j, seq) != NULL; - mutex_unlock(&j->blacklist_lock); - return ret; - } - /* * Decrease this back to j->seq + 2 when we next rev the on disk format: * increasing it temporarily to work around bug in old kernels @@ -305,22 +298,27 @@ int bch2_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b) "bset journal seq too far in the future: %llu > %llu", seq, journal_seq); - bch_verbose(c, "btree node %u:%llu:%llu has future journal sequence number %llu, blacklisting", - b->btree_id, b->key.k.p.inode, b->key.k.p.offset, seq); - - /* - * When we start the journal, bch2_journal_start() will skip over @seq: - */ + if (seq <= journal_seq && + list_empty_careful(&j->seq_blacklist)) + return 0; mutex_lock(&j->blacklist_lock); - for (i = journal_seq + 1; i <= seq; i++) { - bl = journal_seq_blacklist_find(j, i) ?: - bch2_journal_seq_blacklisted_new(j, i); - - if (!bl) { - ret = -ENOMEM; + if (seq <= journal_seq) { + bl = journal_seq_blacklist_find(j, seq); + if (!bl) goto out; + } else { + bch_verbose(c, "btree node %u:%llu:%llu has future journal sequence number %llu, blacklisting", + b->btree_id, b->key.k.p.inode, b->key.k.p.offset, seq); + + for (i = journal_seq + 1; i <= seq; i++) { + bl = journal_seq_blacklist_find(j, i) ?: + bch2_journal_seq_blacklisted_new(j, i); + if (!bl) { + ret = -ENOMEM; + goto out; + } } }