Update bcachefs sources to bdf6d7c135 fixup! bcachefs: Kill journal buf bloom filter

This commit is contained in:
Kent Overstreet 2022-05-02 18:39:16 -04:00
parent 88b27bd794
commit 6f5afc0c12
55 changed files with 908 additions and 535 deletions

View File

@ -1 +1 @@
4c2d3669b15475674b750244bb1e096849352bc8 bdf6d7c1350497bc7b0be6027a51d9330645672d

View File

@ -108,7 +108,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd,
qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data, qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data,
max_t(unsigned, btree_bytes(c) / 8, block_bytes(c))); max_t(unsigned, btree_bytes(c) / 8, block_bytes(c)));
darray_exit(data); darray_exit(&data);
} }
int cmd_dump(int argc, char *argv[]) int cmd_dump(int argc, char *argv[])

View File

@ -197,9 +197,9 @@ int cmd_format(int argc, char *argv[])
initialize = false; initialize = false;
break; break;
case O_no_opt: case O_no_opt:
darray_push(device_paths, optarg); darray_push(&device_paths, optarg);
dev_opts.path = optarg; dev_opts.path = optarg;
darray_push(devices, dev_opts); darray_push(&devices, dev_opts);
dev_opts.size = 0; dev_opts.size = 0;
break; break;
case O_quiet: case O_quiet:
@ -253,7 +253,7 @@ int cmd_format(int argc, char *argv[])
free(opts.passphrase); free(opts.passphrase);
} }
darray_exit(devices); darray_exit(&devices);
if (initialize) { if (initialize) {
struct bch_opts mount_opts = bch2_opts_empty(); struct bch_opts mount_opts = bch2_opts_empty();
@ -275,7 +275,7 @@ int cmd_format(int argc, char *argv[])
bch2_fs_stop(c); bch2_fs_stop(c);
} }
darray_exit(device_paths); darray_exit(&device_paths);
return 0; return 0;
} }

View File

@ -267,7 +267,7 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
free(dev->dev); free(dev->dev);
free(dev->label); free(dev->label);
} }
darray_exit(dev_names); darray_exit(&dev_names);
bcache_fs_close(fs); bcache_fs_close(fs);
} }

View File

@ -603,7 +603,7 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
update_inode(c, &root_inode); update_inode(c, &root_inode);
darray_exit(s.extents); darray_exit(&s.extents);
genradix_free(&s.hardlinks); genradix_free(&s.hardlinks);
} }

View File

@ -530,6 +530,8 @@ passed as mount parameters the persistent options are unmodified.
\subsection{File and directory options} \subsection{File and directory options}
<say something here about how attrs must be set via bcachefs attr command>
Options set on inodes (files and directories) are automatically inherited by Options set on inodes (files and directories) are automatically inherited by
their descendants, and inodes also record whether a given option was explicitly their descendants, and inodes also record whether a given option was explicitly
set or inherited from their parent. When renaming a directory would cause set or inherited from their parent. When renaming a directory would cause

View File

@ -142,17 +142,21 @@ DEFINE_EVENT(bio, journal_write,
); );
TRACE_EVENT(journal_reclaim_start, TRACE_EVENT(journal_reclaim_start,
TP_PROTO(struct bch_fs *c, u64 min_nr, TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
u64 min_nr, u64 min_key_cache,
u64 prereserved, u64 prereserved_total, u64 prereserved, u64 prereserved_total,
u64 btree_cache_dirty, u64 btree_cache_total, u64 btree_cache_dirty, u64 btree_cache_total,
u64 btree_key_cache_dirty, u64 btree_key_cache_total), u64 btree_key_cache_dirty, u64 btree_key_cache_total),
TP_ARGS(c, min_nr, prereserved, prereserved_total, TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
btree_cache_dirty, btree_cache_total, btree_cache_dirty, btree_cache_total,
btree_key_cache_dirty, btree_key_cache_total), btree_key_cache_dirty, btree_key_cache_total),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev ) __field(dev_t, dev )
__field(bool, direct )
__field(bool, kicked )
__field(u64, min_nr ) __field(u64, min_nr )
__field(u64, min_key_cache )
__field(u64, prereserved ) __field(u64, prereserved )
__field(u64, prereserved_total ) __field(u64, prereserved_total )
__field(u64, btree_cache_dirty ) __field(u64, btree_cache_dirty )
@ -163,7 +167,10 @@ TRACE_EVENT(journal_reclaim_start,
TP_fast_assign( TP_fast_assign(
__entry->dev = c->dev; __entry->dev = c->dev;
__entry->direct = direct;
__entry->kicked = kicked;
__entry->min_nr = min_nr; __entry->min_nr = min_nr;
__entry->min_key_cache = min_key_cache;
__entry->prereserved = prereserved; __entry->prereserved = prereserved;
__entry->prereserved_total = prereserved_total; __entry->prereserved_total = prereserved_total;
__entry->btree_cache_dirty = btree_cache_dirty; __entry->btree_cache_dirty = btree_cache_dirty;
@ -172,9 +179,12 @@ TRACE_EVENT(journal_reclaim_start,
__entry->btree_key_cache_total = btree_key_cache_total; __entry->btree_key_cache_total = btree_key_cache_total;
), ),
TP_printk("%d,%d min %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu", TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->direct,
__entry->kicked,
__entry->min_nr, __entry->min_nr,
__entry->min_key_cache,
__entry->prereserved, __entry->prereserved,
__entry->prereserved_total, __entry->prereserved_total,
__entry->btree_cache_dirty, __entry->btree_cache_dirty,
@ -197,45 +207,13 @@ TRACE_EVENT(journal_reclaim_finish,
__entry->nr_flushed = nr_flushed; __entry->nr_flushed = nr_flushed;
), ),
TP_printk("%d%d flushed %llu", TP_printk("%d,%d flushed %llu",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->nr_flushed) __entry->nr_flushed)
); );
/* allocator: */ /* allocator: */
TRACE_EVENT(do_discards,
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
u64 need_journal_commit, u64 discarded, int ret),
TP_ARGS(c, seen, open, need_journal_commit, discarded, ret),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(u64, seen )
__field(u64, open )
__field(u64, need_journal_commit )
__field(u64, discarded )
__field(int, ret )
),
TP_fast_assign(
__entry->dev = c->dev;
__entry->seen = seen;
__entry->open = open;
__entry->need_journal_commit = need_journal_commit;
__entry->discarded = discarded;
__entry->ret = ret;
),
TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu ret %i",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->seen,
__entry->open,
__entry->need_journal_commit,
__entry->discarded,
__entry->ret)
);
/* bset.c: */ /* bset.c: */
DEFINE_EVENT(bpos, bkey_pack_pos_fail, DEFINE_EVENT(bpos, bkey_pack_pos_fail,
@ -367,6 +345,11 @@ DEFINE_EVENT(btree_node, btree_merge,
TP_ARGS(c, b) TP_ARGS(c, b)
); );
DEFINE_EVENT(btree_node, btree_rewrite,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
);
DEFINE_EVENT(btree_node, btree_set_root, DEFINE_EVENT(btree_node, btree_set_root,
TP_PROTO(struct bch_fs *c, struct btree *b), TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b) TP_ARGS(c, b)
@ -440,79 +423,18 @@ TRACE_EVENT(btree_node_relock_fail,
/* Garbage collection */ /* Garbage collection */
DEFINE_EVENT(btree_node, btree_gc_rewrite_node, DEFINE_EVENT(bch_fs, gc_gens_start,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
);
DEFINE_EVENT(btree_node, btree_gc_rewrite_node_fail,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b)
);
DEFINE_EVENT(bch_fs, gc_start,
TP_PROTO(struct bch_fs *c), TP_PROTO(struct bch_fs *c),
TP_ARGS(c) TP_ARGS(c)
); );
DEFINE_EVENT(bch_fs, gc_end, DEFINE_EVENT(bch_fs, gc_gens_end,
TP_PROTO(struct bch_fs *c),
TP_ARGS(c)
);
DEFINE_EVENT(bch_fs, gc_cannot_inc_gens,
TP_PROTO(struct bch_fs *c), TP_PROTO(struct bch_fs *c),
TP_ARGS(c) TP_ARGS(c)
); );
/* Allocator */ /* Allocator */
TRACE_EVENT(alloc_scan,
TP_PROTO(struct bch_dev *ca, u64 found, u64 inc_gen, u64 inc_gen_skipped),
TP_ARGS(ca, found, inc_gen, inc_gen_skipped),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(u64, found )
__field(u64, inc_gen )
__field(u64, inc_gen_skipped )
),
TP_fast_assign(
__entry->dev = ca->dev;
__entry->found = found;
__entry->inc_gen = inc_gen;
__entry->inc_gen_skipped = inc_gen_skipped;
),
TP_printk("%d,%d found %llu inc_gen %llu inc_gen_skipped %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->found, __entry->inc_gen, __entry->inc_gen_skipped)
);
TRACE_EVENT(invalidate,
TP_PROTO(struct bch_dev *ca, u64 offset, unsigned sectors),
TP_ARGS(ca, offset, sectors),
TP_STRUCT__entry(
__field(unsigned, sectors )
__field(dev_t, dev )
__field(__u64, offset )
),
TP_fast_assign(
__entry->dev = ca->dev;
__entry->offset = offset,
__entry->sectors = sectors;
),
TP_printk("invalidated %u sectors at %d,%d sector=%llu",
__entry->sectors,
MAJOR(__entry->dev),
MINOR(__entry->dev),
__entry->offset)
);
TRACE_EVENT(bucket_alloc, TRACE_EVENT(bucket_alloc,
TP_PROTO(struct bch_dev *ca, const char *alloc_reserve), TP_PROTO(struct bch_dev *ca, const char *alloc_reserve),
TP_ARGS(ca, alloc_reserve), TP_ARGS(ca, alloc_reserve),
@ -579,6 +501,59 @@ TRACE_EVENT(bucket_alloc_fail,
__entry->ret) __entry->ret)
); );
TRACE_EVENT(discard_buckets,
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
u64 need_journal_commit, u64 discarded, int ret),
TP_ARGS(c, seen, open, need_journal_commit, discarded, ret),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(u64, seen )
__field(u64, open )
__field(u64, need_journal_commit )
__field(u64, discarded )
__field(int, ret )
),
TP_fast_assign(
__entry->dev = c->dev;
__entry->seen = seen;
__entry->open = open;
__entry->need_journal_commit = need_journal_commit;
__entry->discarded = discarded;
__entry->ret = ret;
),
TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu ret %i",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->seen,
__entry->open,
__entry->need_journal_commit,
__entry->discarded,
__entry->ret)
);
TRACE_EVENT(invalidate_bucket,
TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket),
TP_ARGS(c, dev, bucket),
TP_STRUCT__entry(
__field(dev_t, dev )
__field(u32, dev_idx )
__field(u64, bucket )
),
TP_fast_assign(
__entry->dev = c->dev;
__entry->dev_idx = dev;
__entry->bucket = bucket;
),
TP_printk("%d:%d invalidated %u:%llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->dev_idx, __entry->bucket)
);
/* Moving IO */ /* Moving IO */
DEFINE_EVENT(bkey, move_extent, DEFINE_EVENT(bkey, move_extent,
@ -586,7 +561,7 @@ DEFINE_EVENT(bkey, move_extent,
TP_ARGS(k) TP_ARGS(k)
); );
DEFINE_EVENT(bkey, move_alloc_fail, DEFINE_EVENT(bkey, move_alloc_mem_fail,
TP_PROTO(const struct bkey *k), TP_PROTO(const struct bkey *k),
TP_ARGS(k) TP_ARGS(k)
); );
@ -670,7 +645,7 @@ TRACE_EVENT(copygc_wait,
__entry->wait_amount, __entry->until) __entry->wait_amount, __entry->until)
); );
DECLARE_EVENT_CLASS(transaction_restart, DECLARE_EVENT_CLASS(transaction_event,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip), TP_ARGS(trans_fn, caller_ip),
@ -688,55 +663,61 @@ DECLARE_EVENT_CLASS(transaction_restart,
TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip) TP_printk("%s %pS", __entry->trans_fn, (void *) __entry->caller_ip)
); );
DEFINE_EVENT(transaction_restart, transaction_restart_ip, DEFINE_EVENT(transaction_event, transaction_commit,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_blocked_journal_reclaim, DEFINE_EVENT(transaction_event, transaction_restart_ip,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get, DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get, DEFINE_EVENT(transaction_event, trans_restart_journal_res_get,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_journal_reclaim, DEFINE_EVENT(transaction_event, trans_restart_journal_preres_get,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_fault_inject, DEFINE_EVENT(transaction_event, trans_restart_journal_reclaim,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_traverse_all, DEFINE_EVENT(transaction_event, trans_restart_fault_inject,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas, DEFINE_EVENT(transaction_event, trans_traverse_all,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip) TP_ARGS(trans_fn, caller_ip)
); );
DEFINE_EVENT(transaction_restart, trans_restart_key_cache_raced, DEFINE_EVENT(transaction_event, trans_restart_mark_replicas,
TP_PROTO(const char *trans_fn,
unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip)
);
DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced,
TP_PROTO(const char *trans_fn, TP_PROTO(const char *trans_fn,
unsigned long caller_ip), unsigned long caller_ip),
TP_ARGS(trans_fn, caller_ip) TP_ARGS(trans_fn, caller_ip)

View File

@ -689,7 +689,7 @@ dev_names bchu_fs_get_devices(struct bchfs_handle fs)
struct dirent *d; struct dirent *d;
dev_names devs; dev_names devs;
darray_init(devs); darray_init(&devs);
while ((errno = 0), (d = readdir(dir))) { while ((errno = 0), (d = readdir(dir))) {
struct dev_name n = { 0, NULL, NULL }; struct dev_name n = { 0, NULL, NULL };
@ -713,7 +713,7 @@ dev_names bchu_fs_get_devices(struct bchfs_handle fs)
n.label = read_file_str(fs.sysfs_fd, label_attr); n.label = read_file_str(fs.sysfs_fd, label_attr);
free(label_attr); free(label_attr);
darray_push(devs, n); darray_push(&devs, n);
} }
closedir(dir); closedir(dir);

View File

@ -382,7 +382,8 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
return -EINVAL; return -EINVAL;
} }
if (!a.v->io_time[READ]) { if (!a.v->io_time[READ] &&
test_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags)) {
pr_buf(err, "cached bucket with read_time == 0"); pr_buf(err, "cached bucket with read_time == 0");
return -EINVAL; return -EINVAL;
} }
@ -540,6 +541,7 @@ err:
} }
int bch2_trans_mark_alloc(struct btree_trans *trans, int bch2_trans_mark_alloc(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_i *new, struct bkey_s_c old, struct bkey_i *new,
unsigned flags) unsigned flags)
{ {
@ -587,7 +589,6 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
!new_a->io_time[READ]) !new_a->io_time[READ])
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
old_lru = alloc_lru_idx(old_a); old_lru = alloc_lru_idx(old_a);
new_lru = alloc_lru_idx(*new_a); new_lru = alloc_lru_idx(*new_a);
@ -1065,7 +1066,7 @@ static void bch2_do_discards_work(struct work_struct *work)
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);
trace_do_discards(c, seen, open, need_journal_commit, discarded, ret); trace_discard_buckets(c, seen, open, need_journal_commit, discarded, ret);
} }
void bch2_do_discards(struct bch_fs *c) void bch2_do_discards(struct bch_fs *c)
@ -1087,6 +1088,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca)
bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru, bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru,
POS(ca->dev_idx, 0), 0); POS(ca->dev_idx, 0), 0);
next_lru:
k = bch2_btree_iter_peek(&lru_iter); k = bch2_btree_iter_peek(&lru_iter);
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
@ -1095,9 +1097,20 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca)
if (!k.k || k.k->p.inode != ca->dev_idx) if (!k.k || k.k->p.inode != ca->dev_idx)
goto out; goto out;
if (bch2_trans_inconsistent_on(k.k->type != KEY_TYPE_lru, trans, if (k.k->type != KEY_TYPE_lru) {
"non lru key in lru btree")) pr_buf(&buf, "non lru key in lru btree:\n ");
bch2_bkey_val_to_text(&buf, c, k);
if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
bch_err(c, "%s", buf.buf);
bch2_btree_iter_advance(&lru_iter);
goto next_lru;
} else {
bch2_trans_inconsistent(trans, "%s", buf.buf);
ret = -EINVAL;
goto out; goto out;
}
}
idx = k.k->p.offset; idx = k.k->p.offset;
bucket = le64_to_cpu(bkey_s_c_to_lru(k).v->idx); bucket = le64_to_cpu(bkey_s_c_to_lru(k).v->idx);
@ -1110,14 +1123,20 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca)
if (idx != alloc_lru_idx(a->v)) { if (idx != alloc_lru_idx(a->v)) {
pr_buf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n "); pr_buf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
pr_buf(&buf, "\n "); pr_buf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, k); bch2_bkey_val_to_text(&buf, c, k);
if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
bch_err(c, "%s", buf.buf);
bch2_btree_iter_advance(&lru_iter);
goto next_lru;
} else {
bch2_trans_inconsistent(trans, "%s", buf.buf); bch2_trans_inconsistent(trans, "%s", buf.buf);
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
}
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
a->v.gen++; a->v.gen++;
@ -1129,6 +1148,10 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca)
ret = bch2_trans_update(trans, &alloc_iter, &a->k_i, ret = bch2_trans_update(trans, &alloc_iter, &a->k_i,
BTREE_TRIGGER_BUCKET_INVALIDATE); BTREE_TRIGGER_BUCKET_INVALIDATE);
if (ret)
goto out;
trace_invalidate_bucket(c, a->k.p.inode, a->k.p.offset);
out: out:
bch2_trans_iter_exit(trans, &alloc_iter); bch2_trans_iter_exit(trans, &alloc_iter);
bch2_trans_iter_exit(trans, &lru_iter); bch2_trans_iter_exit(trans, &lru_iter);

View File

@ -125,8 +125,8 @@ static inline bool bkey_is_alloc(const struct bkey *k)
int bch2_alloc_read(struct bch_fs *); int bch2_alloc_read(struct bch_fs *);
int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c, int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned,
struct bkey_i *, unsigned); struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_do_discards(struct bch_fs *); void bch2_do_discards(struct bch_fs *);

View File

@ -276,10 +276,11 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
u64 *skipped_open, u64 *skipped_open,
u64 *skipped_need_journal_commit, u64 *skipped_need_journal_commit,
u64 *skipped_nouse, u64 *skipped_nouse,
struct bkey_s_c freespace_k,
struct closure *cl) struct closure *cl)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter = { NULL };
struct bkey_s_c k; struct bkey_s_c k;
struct open_bucket *ob; struct open_bucket *ob;
struct bch_alloc_v4 a; struct bch_alloc_v4 a;
@ -288,6 +289,16 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret; int ret;
if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) {
pr_buf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n"
" freespace key ",
ca->mi.first_bucket, ca->mi.nbuckets);
bch2_bkey_val_to_text(&buf, c, freespace_k);
bch2_trans_inconsistent(trans, "%s", buf.buf);
ob = ERR_PTR(-EIO);
goto err;
}
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, b), BTREE_ITER_CACHED); bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, b), BTREE_ITER_CACHED);
k = bch2_btree_iter_peek_slot(&iter); k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k); ret = bkey_err(k);
@ -298,29 +309,26 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
bch2_alloc_to_v4(k, &a); bch2_alloc_to_v4(k, &a);
if (bch2_fs_inconsistent_on(a.data_type != BCH_DATA_free, c, if (genbits != (alloc_freespace_genbits(a) >> 56)) {
"non free bucket in freespace btree (state %s)\n" pr_buf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
" %s\n" " freespace key ",
" at %llu (genbits %u)", genbits, alloc_freespace_genbits(a) >> 56);
bch2_data_types[a.data_type], bch2_bkey_val_to_text(&buf, c, freespace_k);
(bch2_bkey_val_to_text(&buf, c, k), buf.buf), pr_buf(&buf, "\n ");
free_entry, genbits)) { bch2_bkey_val_to_text(&buf, c, k);
bch2_trans_inconsistent(trans, "%s", buf.buf);
ob = ERR_PTR(-EIO); ob = ERR_PTR(-EIO);
goto err; goto err;
} }
if (bch2_fs_inconsistent_on(genbits != (alloc_freespace_genbits(a) >> 56), c, if (a.data_type != BCH_DATA_free) {
"bucket in freespace btree with wrong genbits (got %u should be %llu)\n" pr_buf(&buf, "non free bucket in freespace btree\n"
" %s", " freespace key ");
genbits, alloc_freespace_genbits(a) >> 56, bch2_bkey_val_to_text(&buf, c, freespace_k);
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { pr_buf(&buf, "\n ");
ob = ERR_PTR(-EIO); bch2_bkey_val_to_text(&buf, c, k);
goto err; bch2_trans_inconsistent(trans, "%s", buf.buf);
}
if (bch2_fs_inconsistent_on(b < ca->mi.first_bucket || b >= ca->mi.nbuckets, c,
"freespace btree has bucket outside allowed range (got %llu, valid %u-%llu)",
b, ca->mi.first_bucket, ca->mi.nbuckets)) {
ob = ERR_PTR(-EIO); ob = ERR_PTR(-EIO);
goto err; goto err;
} }
@ -446,13 +454,13 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
BUG_ON(ca->new_fs_bucket_idx); BUG_ON(ca->new_fs_bucket_idx);
for_each_btree_key(trans, iter, BTREE_ID_freespace, for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
POS(ca->dev_idx, *cur_bucket), 0, k, ret) { POS(ca->dev_idx, *cur_bucket), 0, k, ret) {
if (k.k->p.inode != ca->dev_idx) if (k.k->p.inode != ca->dev_idx)
break; break;
for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k)); for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k));
*cur_bucket != k.k->p.offset && !ob; *cur_bucket < k.k->p.offset && !ob;
(*cur_bucket)++) { (*cur_bucket)++) {
if (btree_trans_too_many_iters(trans)) { if (btree_trans_too_many_iters(trans)) {
ob = ERR_PTR(-EINTR); ob = ERR_PTR(-EINTR);
@ -466,7 +474,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
skipped_open, skipped_open,
skipped_need_journal_commit, skipped_need_journal_commit,
skipped_nouse, skipped_nouse,
cl); k, cl);
} }
if (ob) if (ob)
break; break;

View File

@ -494,11 +494,6 @@ struct bch_dev {
enum { enum {
/* startup: */ /* startup: */
BCH_FS_ALLOC_CLEAN,
BCH_FS_INITIAL_GC_DONE,
BCH_FS_INITIAL_GC_UNFIXED,
BCH_FS_TOPOLOGY_REPAIR_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_STARTED, BCH_FS_STARTED,
BCH_FS_MAY_GO_RW, BCH_FS_MAY_GO_RW,
BCH_FS_RW, BCH_FS_RW,
@ -508,17 +503,22 @@ enum {
BCH_FS_STOPPING, BCH_FS_STOPPING,
BCH_FS_EMERGENCY_RO, BCH_FS_EMERGENCY_RO,
BCH_FS_WRITE_DISABLE_COMPLETE, BCH_FS_WRITE_DISABLE_COMPLETE,
BCH_FS_CLEAN_SHUTDOWN,
/* fsck passes: */
BCH_FS_TOPOLOGY_REPAIR_DONE,
BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */
BCH_FS_CHECK_LRUS_DONE,
BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */
BCH_FS_NEED_ANOTHER_GC,
/* errors: */ /* errors: */
BCH_FS_ERROR, BCH_FS_ERROR,
BCH_FS_TOPOLOGY_ERROR, BCH_FS_TOPOLOGY_ERROR,
BCH_FS_ERRORS_FIXED, BCH_FS_ERRORS_FIXED,
BCH_FS_ERRORS_NOT_FIXED, BCH_FS_ERRORS_NOT_FIXED,
/* misc: */
BCH_FS_NEED_ANOTHER_GC,
BCH_FS_DELETED_NODES,
BCH_FS_REBUILD_REPLICAS,
}; };
struct btree_debug { struct btree_debug {
@ -585,6 +585,7 @@ struct bch_fs {
struct list_head list; struct list_head list;
struct kobject kobj; struct kobject kobj;
struct kobject counters_kobj;
struct kobject internal; struct kobject internal;
struct kobject opts_dir; struct kobject opts_dir;
struct kobject time_stats; struct kobject time_stats;
@ -901,12 +902,15 @@ struct bch_fs {
u64 last_bucket_seq_cleanup; u64 last_bucket_seq_cleanup;
/* The rest of this all shows up in sysfs */ /* TODO rewrite as counters - The rest of this all shows up in sysfs */
atomic_long_t read_realloc_races; atomic_long_t read_realloc_races;
atomic_long_t extent_migrate_done; atomic_long_t extent_migrate_done;
atomic_long_t extent_migrate_raced; atomic_long_t extent_migrate_raced;
atomic_long_t bucket_alloc_fail; atomic_long_t bucket_alloc_fail;
u64 counters_on_mount[BCH_COUNTER_NR];
u64 __percpu *counters;
unsigned btree_gc_periodic:1; unsigned btree_gc_periodic:1;
unsigned copy_gc_enabled:1; unsigned copy_gc_enabled:1;
bool promote_whole_extents; bool promote_whole_extents;

View File

@ -1086,7 +1086,8 @@ struct bch_sb_field {
x(clean, 6) \ x(clean, 6) \
x(replicas, 7) \ x(replicas, 7) \
x(journal_seq_blacklist, 8) \ x(journal_seq_blacklist, 8) \
x(journal_v2, 9) x(journal_v2, 9) \
x(counters, 10)
enum bch_sb_field_type { enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr, #define x(f, nr) BCH_SB_FIELD_##f = nr,
@ -1319,6 +1320,25 @@ struct bch_sb_field_disk_groups {
struct bch_disk_group entries[0]; struct bch_disk_group entries[0];
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
/* BCH_SB_FIELD_counters */
#define BCH_PERSISTENT_COUNTERS() \
x(io_read, 0) \
x(io_write, 1) \
x(io_move, 2)
enum bch_persistent_counters {
#define x(t, n, ...) BCH_COUNTER_##t,
BCH_PERSISTENT_COUNTERS()
#undef x
BCH_COUNTER_NR
};
struct bch_sb_field_counters {
struct bch_sb_field field;
__le64 d[0];
};
/* /*
* On clean shutdown, store btree roots and current journal sequence number in * On clean shutdown, store btree roots and current journal sequence number in
* the superblock: * the superblock:

View File

@ -27,8 +27,8 @@ struct bkey_ops {
void (*swab)(struct bkey_s); void (*swab)(struct bkey_s);
bool (*key_normalize)(struct bch_fs *, struct bkey_s); bool (*key_normalize)(struct bch_fs *, struct bkey_s);
bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c); bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
int (*trans_trigger)(struct btree_trans *, struct bkey_s_c, int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned,
struct bkey_i *, unsigned); struct bkey_s_c, struct bkey_i *, unsigned);
int (*atomic_trigger)(struct btree_trans *, struct bkey_s_c, int (*atomic_trigger)(struct btree_trans *, struct bkey_s_c,
struct bkey_s_c, unsigned); struct bkey_s_c, unsigned);
void (*compat)(enum btree_id id, unsigned version, void (*compat)(enum btree_id id, unsigned version,
@ -80,16 +80,80 @@ static inline int bch2_mark_key(struct btree_trans *trans,
: 0; : 0;
} }
static inline int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old, enum btree_update_flags {
struct bkey_i *new, unsigned flags) __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
__BTREE_UPDATE_KEY_CACHE_RECLAIM,
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
__BTREE_TRIGGER_INSERT,
__BTREE_TRIGGER_OVERWRITE,
__BTREE_TRIGGER_GC,
__BTREE_TRIGGER_BUCKET_INVALIDATE,
__BTREE_TRIGGER_NOATOMIC,
};
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT)
#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE)
#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \
((1U << KEY_TYPE_alloc)| \
(1U << KEY_TYPE_alloc_v2)| \
(1U << KEY_TYPE_alloc_v3)| \
(1U << KEY_TYPE_alloc_v4)| \
(1U << KEY_TYPE_stripe)| \
(1U << KEY_TYPE_inode)| \
(1U << KEY_TYPE_inode_v2)| \
(1U << KEY_TYPE_snapshot))
static inline int bch2_trans_mark_key(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_i *new,
unsigned flags)
{ {
const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new->k.type]; const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new->k.type];
return ops->trans_trigger return ops->trans_trigger
? ops->trans_trigger(trans, old, new, flags) ? ops->trans_trigger(trans, btree_id, level, old, new, flags)
: 0; : 0;
} }
static inline int bch2_trans_mark_old(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, unsigned flags)
{
struct bkey_i deleted;
bkey_init(&deleted.k);
deleted.k.p = old.k->p;
return bch2_trans_mark_key(trans, btree_id, level, old, &deleted,
BTREE_TRIGGER_OVERWRITE|flags);
}
static inline int bch2_trans_mark_new(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_i *new, unsigned flags)
{
struct bkey_i deleted;
bkey_init(&deleted.k);
deleted.k.p = new->k.p;
return bch2_trans_mark_key(trans, btree_id, level, bkey_i_to_s_c(&deleted), new,
BTREE_TRIGGER_INSERT|flags);
}
void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned, void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,

View File

@ -1745,18 +1745,14 @@ static void bch2_gc_stripes_reset(struct bch_fs *c, bool metadata_only)
*/ */
int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
{ {
u64 start_time = local_clock();
unsigned iter = 0; unsigned iter = 0;
int ret; int ret;
lockdep_assert_held(&c->state_lock); lockdep_assert_held(&c->state_lock);
trace_gc_start(c);
down_write(&c->gc_lock); down_write(&c->gc_lock);
/* flush interior btree updates: */ bch2_btree_interior_updates_flush(c);
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = bch2_gc_start(c, metadata_only) ?: ret = bch2_gc_start(c, metadata_only) ?:
bch2_gc_alloc_start(c, metadata_only) ?: bch2_gc_alloc_start(c, metadata_only) ?:
@ -1845,9 +1841,6 @@ out:
up_write(&c->gc_lock); up_write(&c->gc_lock);
trace_gc_end(c);
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
/* /*
* At startup, allocations can happen directly instead of via the * At startup, allocations can happen directly instead of via the
* allocator thread - issue wakeup in case they blocked on gc_lock: * allocator thread - issue wakeup in case they blocked on gc_lock:
@ -1984,6 +1977,7 @@ int bch2_gc_gens(struct bch_fs *c)
if (!mutex_trylock(&c->gc_gens_lock)) if (!mutex_trylock(&c->gc_gens_lock))
return 0; return 0;
trace_gc_gens_start(c);
down_read(&c->gc_lock); down_read(&c->gc_lock);
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
@ -2035,6 +2029,7 @@ int bch2_gc_gens(struct bch_fs *c)
c->gc_count++; c->gc_count++;
bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time); bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
trace_gc_gens_end(c);
err: err:
for_each_member_device(ca, c, i) { for_each_member_device(ca, c, i) {
kvfree(ca->oldest_gen); kvfree(ca->oldest_gen);

View File

@ -820,10 +820,10 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
printbuf_reset(&buf); printbuf_reset(&buf);
if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) {
printbuf_reset(&buf); printbuf_reset(&buf);
pr_buf(&buf, "invalid bkey:\n "); pr_buf(&buf, "invalid bkey: ");
bch2_bkey_val_to_text(&buf, c, u.s_c);
pr_buf(&buf, " \n");
bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); bset_key_invalid(c, b, u.s_c, updated_range, write, &buf);
pr_buf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, u.s_c);
btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, "%s", buf.buf); btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, "%s", buf.buf);
@ -1081,10 +1081,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
!bversion_cmp(u.k->version, MAX_VERSION))) { !bversion_cmp(u.k->version, MAX_VERSION))) {
printbuf_reset(&buf); printbuf_reset(&buf);
pr_buf(&buf, "invalid bkey\n "); pr_buf(&buf, "invalid bkey: ");
bch2_bkey_val_to_text(&buf, c, u.s_c);
pr_buf(&buf, "\n ");
bch2_bkey_val_invalid(c, u.s_c, READ, &buf); bch2_bkey_val_invalid(c, u.s_c, READ, &buf);
pr_buf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, u.s_c);
btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, "%s", buf.buf); btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i, "%s", buf.buf);
@ -2102,29 +2102,33 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
} }
} }
static void __bch2_btree_flush_all(struct bch_fs *c, unsigned flag) static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
{ {
struct bucket_table *tbl; struct bucket_table *tbl;
struct rhash_head *pos; struct rhash_head *pos;
struct btree *b; struct btree *b;
unsigned i; unsigned i;
bool ret = false;
restart: restart:
rcu_read_lock(); rcu_read_lock();
for_each_cached_btree(b, c, tbl, i, pos) for_each_cached_btree(b, c, tbl, i, pos)
if (test_bit(flag, &b->flags)) { if (test_bit(flag, &b->flags)) {
rcu_read_unlock(); rcu_read_unlock();
wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE); wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
ret = true;
goto restart; goto restart;
} }
rcu_read_unlock(); rcu_read_unlock();
return ret;
} }
void bch2_btree_flush_all_reads(struct bch_fs *c) bool bch2_btree_flush_all_reads(struct bch_fs *c)
{ {
__bch2_btree_flush_all(c, BTREE_NODE_read_in_flight); return __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
} }
void bch2_btree_flush_all_writes(struct bch_fs *c) bool bch2_btree_flush_all_writes(struct bch_fs *c)
{ {
__bch2_btree_flush_all(c, BTREE_NODE_write_in_flight); return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
} }

View File

@ -152,8 +152,8 @@ static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED); bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
} }
void bch2_btree_flush_all_reads(struct bch_fs *); bool bch2_btree_flush_all_reads(struct bch_fs *);
void bch2_btree_flush_all_writes(struct bch_fs *); bool bch2_btree_flush_all_writes(struct bch_fs *);
static inline void compat_bformat(unsigned level, enum btree_id btree_id, static inline void compat_bformat(unsigned level, enum btree_id btree_id,
unsigned version, unsigned big_endian, unsigned version, unsigned big_endian,

View File

@ -1527,6 +1527,30 @@ static inline bool btree_path_good_node(struct btree_trans *trans,
return true; return true;
} }
static void btree_path_set_level_up(struct btree_path *path)
{
btree_node_unlock(path, path->level);
path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
path->level++;
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
}
static void btree_path_set_level_down(struct btree_trans *trans,
struct btree_path *path,
unsigned new_level)
{
unsigned l;
path->level = new_level;
for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
btree_node_unlock(path, l);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
bch2_btree_path_verify(trans, path);
}
static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
struct btree_path *path, struct btree_path *path,
int check_pos) int check_pos)
@ -2100,7 +2124,6 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
struct btree_path *path = iter->path; struct btree_path *path = iter->path;
struct btree *b = NULL; struct btree *b = NULL;
unsigned l;
int ret; int ret;
BUG_ON(trans->restarted); BUG_ON(trans->restarted);
@ -2113,10 +2136,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
/* got to end? */ /* got to end? */
if (!btree_path_node(path, path->level + 1)) { if (!btree_path_node(path, path->level + 1)) {
btree_node_unlock(path, path->level); btree_path_set_level_up(path);
path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
path->level++;
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
return NULL; return NULL;
} }
@ -2148,14 +2168,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
iter->flags & BTREE_ITER_INTENT, iter->flags & BTREE_ITER_INTENT,
btree_iter_ip_allocated(iter)); btree_iter_ip_allocated(iter));
path->level = iter->min_depth; btree_path_set_level_down(trans, path, iter->min_depth);
for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
btree_node_unlock(path, l);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
bch2_btree_iter_verify(iter);
ret = bch2_btree_path_traverse(trans, path, iter->flags); ret = bch2_btree_path_traverse(trans, path, iter->flags);
if (ret) if (ret)
@ -2186,6 +2199,7 @@ err:
inline bool bch2_btree_iter_advance(struct btree_iter *iter) inline bool bch2_btree_iter_advance(struct btree_iter *iter)
{ {
if (likely(!(iter->flags & BTREE_ITER_ALL_LEVELS))) {
struct bpos pos = iter->k.p; struct bpos pos = iter->k.p;
bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
? bpos_cmp(pos, SPOS_MAX) ? bpos_cmp(pos, SPOS_MAX)
@ -2195,6 +2209,13 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter)
pos = bkey_successor(iter, pos); pos = bkey_successor(iter, pos);
bch2_btree_iter_set_pos(iter, pos); bch2_btree_iter_set_pos(iter, pos);
return ret; return ret;
} else {
if (!btree_path_node(iter->path, iter->path->level))
return true;
iter->advanced = true;
return false;
}
} }
inline bool bch2_btree_iter_rewind(struct btree_iter *iter) inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
@ -2377,6 +2398,8 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
struct bpos iter_pos; struct bpos iter_pos;
int ret; int ret;
EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS);
if (iter->update_path) { if (iter->update_path) {
bch2_path_put(trans, iter->update_path, bch2_path_put(trans, iter->update_path,
iter->flags & BTREE_ITER_INTENT); iter->flags & BTREE_ITER_INTENT);
@ -2494,6 +2517,100 @@ out:
return k; return k;
} }
/**
* bch2_btree_iter_peek_all_levels: returns the first key greater than or equal
* to iterator's current position, returning keys from every level of the btree.
* For keys at different levels of the btree that compare equal, the key from
* the lower level (leaf) is returned first.
*/
struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
struct bkey_s_c k;
int ret;
EBUG_ON(iter->path->cached);
bch2_btree_iter_verify(iter);
BUG_ON(iter->path->level < iter->min_depth);
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
EBUG_ON(!(iter->flags & BTREE_ITER_ALL_LEVELS));
while (1) {
iter->path = bch2_btree_path_set_pos(trans, iter->path, iter->pos,
iter->flags & BTREE_ITER_INTENT,
btree_iter_ip_allocated(iter));
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (unlikely(ret)) {
/* ensure that iter->k is consistent with iter->pos: */
bch2_btree_iter_set_pos(iter, iter->pos);
k = bkey_s_c_err(ret);
goto out;
}
/* Already at end? */
if (!btree_path_node(iter->path, iter->path->level)) {
k = bkey_s_c_null;
goto out;
}
k = btree_path_level_peek_all(trans->c,
&iter->path->l[iter->path->level], &iter->k);
/* Check if we should go up to the parent node: */
if (!k.k ||
(iter->advanced &&
!bpos_cmp(path_l(iter->path)->b->key.k.p, iter->pos))) {
iter->pos = path_l(iter->path)->b->key.k.p;
btree_path_set_level_up(iter->path);
iter->advanced = false;
continue;
}
/*
* Check if we should go back down to a leaf:
* If we're not in a leaf node, we only return the current key
* if it exactly matches iter->pos - otherwise we first have to
* go back to the leaf:
*/
if (iter->path->level != iter->min_depth &&
(iter->advanced ||
!k.k ||
bpos_cmp(iter->pos, k.k->p))) {
btree_path_set_level_down(trans, iter->path, iter->min_depth);
iter->pos = bpos_successor(iter->pos);
iter->advanced = false;
continue;
}
/* Check if we should go to the next key: */
if (iter->path->level == iter->min_depth &&
iter->advanced &&
k.k &&
!bpos_cmp(iter->pos, k.k->p)) {
iter->pos = bpos_successor(iter->pos);
iter->advanced = false;
continue;
}
if (iter->advanced &&
iter->path->level == iter->min_depth &&
bpos_cmp(k.k->p, iter->pos))
iter->advanced = false;
BUG_ON(iter->advanced);
BUG_ON(!k.k);
break;
}
iter->pos = k.k->p;
out:
iter->path->should_be_locked = true;
bch2_btree_iter_verify(iter);
return k;
}
/** /**
* bch2_btree_iter_next: returns first key greater than iterator's current * bch2_btree_iter_next: returns first key greater than iterator's current
* position * position
@ -2650,9 +2767,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
EBUG_ON(iter->path->level);
bch2_btree_iter_verify(iter); bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS);
EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE));
/* extents can't span inode numbers: */ /* extents can't span inode numbers: */
if ((iter->flags & BTREE_ITER_IS_EXTENTS) && if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
@ -2687,7 +2805,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) && if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
(next_update = bch2_journal_keys_peek_slot(trans->c, (next_update = bch2_journal_keys_peek_slot(trans->c,
iter->btree_id, 0, iter->pos))) { iter->btree_id,
iter->path->level,
iter->pos))) {
iter->k = next_update->k; iter->k = next_update->k;
k = bkey_i_to_s_c(next_update); k = bkey_i_to_s_c(next_update);
goto out; goto out;
@ -2704,6 +2824,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
} else { } else {
struct bpos next; struct bpos next;
EBUG_ON(iter->path->level);
if (iter->flags & BTREE_ITER_INTENT) { if (iter->flags & BTREE_ITER_INTENT) {
struct btree_iter iter2; struct btree_iter iter2;
struct bpos end = iter->pos; struct bpos end = iter->pos;
@ -2802,6 +2924,9 @@ static void btree_trans_verify_sorted(struct btree_trans *trans)
struct btree_path *path, *prev = NULL; struct btree_path *path, *prev = NULL;
unsigned i; unsigned i;
if (!bch2_debug_check_iterators)
return;
trans_for_each_path_inorder(trans, path, i) { trans_for_each_path_inorder(trans, path, i) {
if (prev && btree_path_cmp(prev, path) > 0) { if (prev && btree_path_cmp(prev, path) > 0) {
bch2_dump_trans_paths_updates(trans); bch2_dump_trans_paths_updates(trans);
@ -2919,6 +3044,9 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
{ {
EBUG_ON(trans->restarted); EBUG_ON(trans->restarted);
if (flags & BTREE_ITER_ALL_LEVELS)
flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS;
if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
btree_node_type_is_extents(btree_id)) btree_node_type_is_extents(btree_id))
flags |= BTREE_ITER_IS_EXTENTS; flags |= BTREE_ITER_IS_EXTENTS;
@ -2934,12 +3062,6 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
if (!test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags)) if (!test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags))
flags |= BTREE_ITER_WITH_JOURNAL; flags |= BTREE_ITER_WITH_JOURNAL;
if (!btree_id_cached(trans->c, btree_id)) {
flags &= ~BTREE_ITER_CACHED;
flags &= ~BTREE_ITER_WITH_KEY_CACHE;
} else if (!(flags & BTREE_ITER_CACHED))
flags |= BTREE_ITER_WITH_KEY_CACHE;
iter->trans = trans; iter->trans = trans;
iter->path = NULL; iter->path = NULL;
iter->update_path = NULL; iter->update_path = NULL;
@ -2965,6 +3087,12 @@ void bch2_trans_iter_init(struct btree_trans *trans,
unsigned btree_id, struct bpos pos, unsigned btree_id, struct bpos pos,
unsigned flags) unsigned flags)
{ {
if (!btree_id_cached(trans->c, btree_id)) {
flags &= ~BTREE_ITER_CACHED;
flags &= ~BTREE_ITER_WITH_KEY_CACHE;
} else if (!(flags & BTREE_ITER_CACHED))
flags |= BTREE_ITER_WITH_KEY_CACHE;
__bch2_trans_iter_init(trans, iter, btree_id, pos, __bch2_trans_iter_init(trans, iter, btree_id, pos,
0, 0, flags, _RET_IP_); 0, 0, flags, _RET_IP_);
} }

View File

@ -212,6 +212,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *);
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
{ {
return bch2_btree_iter_peek_upto(iter, SPOS_MAX); return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
@ -313,9 +315,9 @@ static inline int bkey_err(struct bkey_s_c k)
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
unsigned flags) unsigned flags)
{ {
return flags & BTREE_ITER_SLOTS return flags & BTREE_ITER_ALL_LEVELS ? bch2_btree_iter_peek_all_levels(iter) :
? bch2_btree_iter_peek_slot(iter) flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) :
: bch2_btree_iter_peek(iter); bch2_btree_iter_peek(iter);
} }
static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter, static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,

View File

@ -236,6 +236,13 @@ static int btree_key_cache_fill(struct btree_trans *trans,
*/ */
new_u64s = k.k->u64s + 1; new_u64s = k.k->u64s + 1;
/*
* Allocate some extra space so that the transaction commit path is less
* likely to have to reallocate, since that requires a transaction
* restart:
*/
new_u64s = min(256U, (new_u64s * 3) / 2);
if (new_u64s > ck->u64s) { if (new_u64s > ck->u64s) {
new_u64s = roundup_pow_of_two(new_u64s); new_u64s = roundup_pow_of_two(new_u64s);
new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS); new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOFS);

View File

@ -182,22 +182,16 @@ struct btree_node_iter {
* Iterate over all possible positions, synthesizing deleted keys for holes: * Iterate over all possible positions, synthesizing deleted keys for holes:
*/ */
#define BTREE_ITER_SLOTS (1 << 0) #define BTREE_ITER_SLOTS (1 << 0)
#define BTREE_ITER_ALL_LEVELS (1 << 1)
/* /*
* Indicates that intent locks should be taken on leaf nodes, because we expect * Indicates that intent locks should be taken on leaf nodes, because we expect
* to be doing updates: * to be doing updates:
*/ */
#define BTREE_ITER_INTENT (1 << 1) #define BTREE_ITER_INTENT (1 << 2)
/* /*
* Causes the btree iterator code to prefetch additional btree nodes from disk: * Causes the btree iterator code to prefetch additional btree nodes from disk:
*/ */
#define BTREE_ITER_PREFETCH (1 << 2) #define BTREE_ITER_PREFETCH (1 << 3)
/*
* Indicates that this iterator should not be reused until transaction commit,
* either because a pending update references it or because the update depends
* on that particular key being locked (e.g. by the str_hash code, for hash
* table consistency)
*/
#define BTREE_ITER_KEEP_UNTIL_COMMIT (1 << 3)
/* /*
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
* @pos or the first key strictly greater than @pos * @pos or the first key strictly greater than @pos
@ -282,7 +276,8 @@ struct btree_iter {
struct btree_path *key_cache_path; struct btree_path *key_cache_path;
enum btree_id btree_id:4; enum btree_id btree_id:4;
unsigned min_depth:4; unsigned min_depth:3;
unsigned advanced:1;
/* btree_iter_copy starts here: */ /* btree_iter_copy starts here: */
u16 flags; u16 flags;
@ -639,42 +634,6 @@ static inline bool btree_type_has_snapshots(enum btree_id id)
return (1 << id) & BTREE_ID_HAS_SNAPSHOTS; return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
} }
enum btree_update_flags {
__BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
__BTREE_UPDATE_KEY_CACHE_RECLAIM,
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
__BTREE_TRIGGER_INSERT,
__BTREE_TRIGGER_OVERWRITE,
__BTREE_TRIGGER_GC,
__BTREE_TRIGGER_BUCKET_INVALIDATE,
__BTREE_TRIGGER_NOATOMIC,
};
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT)
#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE)
#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \
((1U << KEY_TYPE_alloc)| \
(1U << KEY_TYPE_alloc_v2)| \
(1U << KEY_TYPE_alloc_v3)| \
(1U << KEY_TYPE_alloc_v4)| \
(1U << KEY_TYPE_stripe)| \
(1U << KEY_TYPE_inode)| \
(1U << KEY_TYPE_inode_v2)| \
(1U << KEY_TYPE_snapshot))
static inline bool btree_node_type_needs_gc(enum btree_node_type type) static inline bool btree_node_type_needs_gc(enum btree_node_type type)
{ {
return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type); return BTREE_NODE_TYPE_HAS_TRIGGERS & (1U << type);

View File

@ -381,16 +381,13 @@ static void bch2_btree_reserve_put(struct btree_update *as)
struct bch_fs *c = as->c; struct bch_fs *c = as->c;
struct prealloc_nodes *p; struct prealloc_nodes *p;
mutex_lock(&c->btree_reserve_cache_lock);
for (p = as->prealloc_nodes; for (p = as->prealloc_nodes;
p < as->prealloc_nodes + ARRAY_SIZE(as->prealloc_nodes); p < as->prealloc_nodes + ARRAY_SIZE(as->prealloc_nodes);
p++) { p++) {
while (p->nr) { while (p->nr) {
struct btree *b = p->b[--p->nr]; struct btree *b = p->b[--p->nr];
six_lock_intent(&b->c.lock, NULL, NULL); mutex_lock(&c->btree_reserve_cache_lock);
six_lock_write(&b->c.lock, NULL, NULL);
if (c->btree_reserve_cache_nr < if (c->btree_reserve_cache_nr <
ARRAY_SIZE(c->btree_reserve_cache)) { ARRAY_SIZE(c->btree_reserve_cache)) {
@ -404,13 +401,15 @@ static void bch2_btree_reserve_put(struct btree_update *as)
bch2_open_buckets_put(c, &b->ob); bch2_open_buckets_put(c, &b->ob);
} }
mutex_unlock(&c->btree_reserve_cache_lock);
six_lock_intent(&b->c.lock, NULL, NULL);
six_lock_write(&b->c.lock, NULL, NULL);
__btree_node_free(c, b); __btree_node_free(c, b);
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock); six_unlock_intent(&b->c.lock);
} }
} }
mutex_unlock(&c->btree_reserve_cache_lock);
} }
static int bch2_btree_reserve_get(struct btree_update *as, static int bch2_btree_reserve_get(struct btree_update *as,
@ -506,20 +505,18 @@ static void bch2_btree_update_free(struct btree_update *as)
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
} }
static void btree_update_will_delete_key(struct btree_update *as, static void btree_update_add_key(struct btree_update *as,
struct bkey_i *k) struct keylist *keys, struct btree *b)
{ {
BUG_ON(bch2_keylist_u64s(&as->old_keys) + k->k.u64s > struct bkey_i *k = &b->key;
ARRAY_SIZE(as->_old_keys));
bch2_keylist_add(&as->old_keys, k);
}
static void btree_update_will_add_key(struct btree_update *as, BUG_ON(bch2_keylist_u64s(keys) + k->k.u64s >
struct bkey_i *k) ARRAY_SIZE(as->_old_keys));
{
BUG_ON(bch2_keylist_u64s(&as->new_keys) + k->k.u64s > bkey_copy(keys->top, k);
ARRAY_SIZE(as->_new_keys)); bkey_i_to_btree_ptr_v2(keys->top)->v.mem_ptr = b->c.level + 1;
bch2_keylist_add(&as->new_keys, k);
bch2_keylist_push(keys);
} }
/* /*
@ -532,7 +529,7 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
struct bkey_i *k; struct bkey_i *k;
int ret; int ret;
ret = darray_make_room(trans->extra_journal_entries, as->journal_u64s); ret = darray_make_room(&trans->extra_journal_entries, as->journal_u64s);
if (ret) if (ret)
return ret; return ret;
@ -543,14 +540,18 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
trans->journal_pin = &as->journal; trans->journal_pin = &as->journal;
for_each_keylist_key(&as->new_keys, k) { for_each_keylist_key(&as->old_keys, k) {
ret = bch2_trans_mark_new(trans, k, 0); unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
ret = bch2_trans_mark_old(trans, as->btree_id, level, bkey_i_to_s_c(k), 0);
if (ret) if (ret)
return ret; return ret;
} }
for_each_keylist_key(&as->old_keys, k) { for_each_keylist_key(&as->new_keys, k) {
ret = bch2_trans_mark_old(trans, bkey_i_to_s_c(k), 0); unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
ret = bch2_trans_mark_new(trans, as->btree_id, level, k, 0);
if (ret) if (ret)
return ret; return ret;
} }
@ -822,7 +823,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
btree_update_will_add_key(as, &b->key); btree_update_add_key(as, &as->new_keys, b);
} }
/* /*
@ -939,7 +940,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
*/ */
btree_update_drop_new_node(c, b); btree_update_drop_new_node(c, b);
btree_update_will_delete_key(as, &b->key); btree_update_add_key(as, &as->old_keys, b);
as->old_nodes[as->nr_old_nodes] = b; as->old_nodes[as->nr_old_nodes] = b;
as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq; as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq;
@ -1095,11 +1096,6 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
list_del_init(&b->list); list_del_init(&b->list);
mutex_unlock(&c->btree_cache.lock); mutex_unlock(&c->btree_cache.lock);
if (b->c.level)
six_lock_pcpu_alloc(&b->c.lock);
else
six_lock_pcpu_free(&b->c.lock);
mutex_lock(&c->btree_root_lock); mutex_lock(&c->btree_root_lock);
BUG_ON(btree_node_root(c, b) && BUG_ON(btree_node_root(c, b) &&
(b->c.level < btree_node_root(c, b)->c.level || (b->c.level < btree_node_root(c, b)->c.level ||
@ -1249,13 +1245,14 @@ static struct btree *__btree_split_node(struct btree_update *as,
struct bpos n1_pos; struct bpos n1_pos;
n2 = bch2_btree_node_alloc(as, n1->c.level); n2 = bch2_btree_node_alloc(as, n1->c.level);
bch2_btree_update_add_new_node(as, n2);
n2->data->max_key = n1->data->max_key; n2->data->max_key = n1->data->max_key;
n2->data->format = n1->format; n2->data->format = n1->format;
SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data)); SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
n2->key.k.p = n1->key.k.p; n2->key.k.p = n1->key.k.p;
bch2_btree_update_add_new_node(as, n2);
set1 = btree_bset_first(n1); set1 = btree_bset_first(n1);
set2 = btree_bset_first(n2); set2 = btree_bset_first(n2);
@ -1412,7 +1409,6 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_interior_update_will_free_node(as, b); bch2_btree_interior_update_will_free_node(as, b);
n1 = bch2_btree_node_alloc_replacement(as, b); n1 = bch2_btree_node_alloc_replacement(as, b);
bch2_btree_update_add_new_node(as, n1);
if (keys) if (keys)
btree_split_insert_keys(as, trans, path, n1, keys); btree_split_insert_keys(as, trans, path, n1, keys);
@ -1427,6 +1423,8 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
six_unlock_write(&n2->c.lock); six_unlock_write(&n2->c.lock);
six_unlock_write(&n1->c.lock); six_unlock_write(&n1->c.lock);
bch2_btree_update_add_new_node(as, n1);
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0); bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
@ -1455,6 +1453,8 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_build_aux_trees(n1); bch2_btree_build_aux_trees(n1);
six_unlock_write(&n1->c.lock); six_unlock_write(&n1->c.lock);
bch2_btree_update_add_new_node(as, n1);
bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
if (parent) if (parent)
@ -1723,7 +1723,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_btree_interior_update_will_free_node(as, m); bch2_btree_interior_update_will_free_node(as, m);
n = bch2_btree_node_alloc(as, b->c.level); n = bch2_btree_node_alloc(as, b->c.level);
bch2_btree_update_add_new_node(as, n);
SET_BTREE_NODE_SEQ(n->data, SET_BTREE_NODE_SEQ(n->data,
max(BTREE_NODE_SEQ(b->data), max(BTREE_NODE_SEQ(b->data),
@ -1731,8 +1730,10 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
btree_set_min(n, prev->data->min_key); btree_set_min(n, prev->data->min_key);
btree_set_max(n, next->data->max_key); btree_set_max(n, next->data->max_key);
n->data->format = new_f;
bch2_btree_update_add_new_node(as, n);
n->data->format = new_f;
btree_node_set_format(n, new_f); btree_node_set_format(n, new_f);
bch2_btree_sort_into(c, n, prev); bch2_btree_sort_into(c, n, prev);
@ -1797,10 +1798,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
as = bch2_btree_update_start(trans, iter->path, b->c.level, as = bch2_btree_update_start(trans, iter->path, b->c.level,
false, flags); false, flags);
ret = PTR_ERR_OR_ZERO(as); ret = PTR_ERR_OR_ZERO(as);
if (ret) { if (ret)
trace_btree_gc_rewrite_node_fail(c, b);
goto out; goto out;
}
bch2_btree_interior_update_will_free_node(as, b); bch2_btree_interior_update_will_free_node(as, b);
@ -1810,7 +1809,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
bch2_btree_build_aux_trees(n); bch2_btree_build_aux_trees(n);
six_unlock_write(&n->c.lock); six_unlock_write(&n->c.lock);
trace_btree_gc_rewrite_node(c, b); trace_btree_rewrite(c, b);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
@ -1915,11 +1914,13 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
int ret; int ret;
if (!skip_triggers) { if (!skip_triggers) {
ret = bch2_trans_mark_new(trans, new_key, 0); ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1,
bkey_i_to_s_c(&b->key), 0);
if (ret) if (ret)
return ret; return ret;
ret = bch2_trans_mark_old(trans, bkey_i_to_s_c(&b->key), 0); ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1,
new_key, 0);
if (ret) if (ret)
return ret; return ret;
} }
@ -1956,7 +1957,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
} else { } else {
BUG_ON(btree_node_root(c, b) != b); BUG_ON(btree_node_root(c, b) != b);
ret = darray_make_room(trans->extra_journal_entries, ret = darray_make_room(&trans->extra_journal_entries,
jset_u64s(new_key->k.u64s)); jset_u64s(new_key->k.u64s));
if (ret) if (ret)
return ret; return ret;
@ -2158,19 +2159,27 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
} }
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c) static bool bch2_btree_interior_updates_pending(struct bch_fs *c)
{ {
size_t ret = 0; bool ret;
struct list_head *i;
mutex_lock(&c->btree_interior_update_lock); mutex_lock(&c->btree_interior_update_lock);
list_for_each(i, &c->btree_interior_update_list) ret = !list_empty(&c->btree_interior_update_list);
ret++;
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
return ret; return ret;
} }
bool bch2_btree_interior_updates_flush(struct bch_fs *c)
{
bool ret = bch2_btree_interior_updates_pending(c);
if (ret)
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_pending(c));
return ret;
}
void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset) void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset)
{ {
struct btree_root *r; struct btree_root *r;

View File

@ -309,7 +309,7 @@ static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *); void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *); bool bch2_btree_interior_updates_flush(struct bch_fs *);
void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *); void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,

View File

@ -478,16 +478,16 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
i->overwrite_trigger_run = true; i->overwrite_trigger_run = true;
i->insert_trigger_run = true; i->insert_trigger_run = true;
return bch2_trans_mark_key(trans, old, i->k, return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k,
BTREE_TRIGGER_INSERT| BTREE_TRIGGER_INSERT|
BTREE_TRIGGER_OVERWRITE| BTREE_TRIGGER_OVERWRITE|
i->flags) ?: 1; i->flags) ?: 1;
} else if (overwrite && !i->overwrite_trigger_run) { } else if (overwrite && !i->overwrite_trigger_run) {
i->overwrite_trigger_run = true; i->overwrite_trigger_run = true;
return bch2_trans_mark_old(trans, old, i->flags) ?: 1; return bch2_trans_mark_old(trans, i->btree_id, i->level, old, i->flags) ?: 1;
} else if (!overwrite && !i->insert_trigger_run) { } else if (!overwrite && !i->insert_trigger_run) {
i->insert_trigger_run = true; i->insert_trigger_run = true;
return bch2_trans_mark_new(trans, i->k, i->flags) ?: 1; return bch2_trans_mark_new(trans, i->btree_id, i->level, i->k, i->flags) ?: 1;
} else { } else {
return 0; return 0;
} }
@ -1111,6 +1111,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
goto out_reset; goto out_reset;
} }
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
memset(&trans->journal_preres, 0, sizeof(trans->journal_preres)); memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
trans->journal_u64s = trans->extra_journal_entries.nr; trans->journal_u64s = trans->extra_journal_entries.nr;
@ -1159,6 +1161,8 @@ retry:
if (ret) if (ret)
goto err; goto err;
trace_transaction_commit(trans->fn, _RET_IP_);
out: out:
bch2_journal_preres_put(&c->journal, &trans->journal_preres); bch2_journal_preres_put(&c->journal, &trans->journal_preres);
@ -1753,7 +1757,7 @@ int bch2_trans_log_msg(struct btree_trans *trans, const char *msg)
struct jset_entry_log *l; struct jset_entry_log *l;
int ret; int ret;
ret = darray_make_room(trans->extra_journal_entries, jset_u64s(u64s)); ret = darray_make_room(&trans->extra_journal_entries, jset_u64s(u64s));
if (ret) if (ret)
return ret; return ret;

View File

@ -378,10 +378,9 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
idx = bch2_replicas_entry_idx(c, r); idx = bch2_replicas_entry_idx(c, r);
if (idx < 0 && if (idx < 0 &&
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err(c, "no replicas entry\n" fsck_err(c, "no replicas entry\n"
" while marking %s", " while marking %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
ret = bch2_mark_replicas(c, r); ret = bch2_mark_replicas(c, r);
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
@ -596,9 +595,6 @@ int bch2_mark_alloc(struct btree_trans *trans,
bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors"); bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
return ret; return ret;
} }
trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
old_a.cached_sectors);
} }
return 0; return 0;
@ -1447,6 +1443,7 @@ err:
} }
int bch2_trans_mark_extent(struct btree_trans *trans, int bch2_trans_mark_extent(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_i *new, struct bkey_s_c old, struct bkey_i *new,
unsigned flags) unsigned flags)
{ {
@ -1585,6 +1582,7 @@ err:
} }
int bch2_trans_mark_stripe(struct btree_trans *trans, int bch2_trans_mark_stripe(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_i *new, struct bkey_s_c old, struct bkey_i *new,
unsigned flags) unsigned flags)
{ {
@ -1655,6 +1653,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans,
} }
int bch2_trans_mark_inode(struct btree_trans *trans, int bch2_trans_mark_inode(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_s_c old,
struct bkey_i *new, struct bkey_i *new,
unsigned flags) unsigned flags)
@ -1671,6 +1670,7 @@ int bch2_trans_mark_inode(struct btree_trans *trans,
} }
int bch2_trans_mark_reservation(struct btree_trans *trans, int bch2_trans_mark_reservation(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_s_c old,
struct bkey_i *new, struct bkey_i *new,
unsigned flags) unsigned flags)
@ -1772,6 +1772,7 @@ err:
} }
int bch2_trans_mark_reflink_p(struct btree_trans *trans, int bch2_trans_mark_reflink_p(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_s_c old,
struct bkey_i *new, struct bkey_i *new,
unsigned flags) unsigned flags)

View File

@ -202,41 +202,14 @@ int bch2_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsi
int bch2_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); int bch2_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); int bch2_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_trans_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
struct bkey_i *, unsigned);
static inline int bch2_trans_mark_old(struct btree_trans *trans,
struct bkey_s_c old, unsigned flags)
{
struct bkey_i deleted;
bkey_init(&deleted.k);
deleted.k.p = old.k->p;
return bch2_trans_mark_key(trans, old, &deleted,
BTREE_TRIGGER_OVERWRITE|flags);
}
static inline int bch2_trans_mark_new(struct btree_trans *trans,
struct bkey_i *new, unsigned flags)
{
struct bkey_i deleted;
bkey_init(&deleted.k);
deleted.k.p = new->k.p;
return bch2_trans_mark_key(trans, bkey_i_to_s_c(&deleted), new,
BTREE_TRIGGER_INSERT|flags);
}
int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,

107
libbcachefs/counters.c Normal file
View File

@ -0,0 +1,107 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "super-io.h"
#include "counters.h"
/* BCH_SB_FIELD_counters */
const char * const bch2_counter_names[] = {
#define x(t, n, ...) (#t),
BCH_PERSISTENT_COUNTERS()
#undef x
NULL
};
static size_t bch2_sb_counter_nr_entries(struct bch_sb_field_counters *ctrs)
{
if (!ctrs)
return 0;
return (__le64 *) vstruct_end(&ctrs->field) - &ctrs->d[0];
};
static int bch2_sb_counters_validate(struct bch_sb *sb,
struct bch_sb_field *f,
struct printbuf *err)
{
return 0;
};
void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_counters *ctrs = field_to_type(f, counters);
unsigned int i;
unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
for (i = 0; i < nr; i++) {
if (i < BCH_COUNTER_NR)
pr_buf(out, "%s", bch2_counter_names[i]);
else
pr_buf(out, "(unknown)");
pr_tab(out);
pr_buf(out, "%llu", le64_to_cpu(ctrs->d[i]));
pr_newline(out);
};
};
int bch2_sb_counters_to_cpu(struct bch_fs *c)
{
struct bch_sb_field_counters *ctrs = bch2_sb_get_counters(c->disk_sb.sb);
unsigned int i;
unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
u64 val = 0;
for (i = 0; i < BCH_COUNTER_NR; i++)
c->counters_on_mount[i] = 0;
for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) {
val = le64_to_cpu(ctrs->d[i]);
percpu_u64_set(&c->counters[i], val);
c->counters_on_mount[i] = val;
}
return 0;
};
int bch2_sb_counters_from_cpu(struct bch_fs *c)
{
struct bch_sb_field_counters *ctrs = bch2_sb_get_counters(c->disk_sb.sb);
struct bch_sb_field_counters *ret;
unsigned int i;
unsigned int nr = bch2_sb_counter_nr_entries(ctrs);
if (nr < BCH_COUNTER_NR) {
ret = bch2_sb_resize_counters(&c->disk_sb,
sizeof(*ctrs) / sizeof(u64) + BCH_COUNTER_NR);
if (ret) {
ctrs = ret;
nr = bch2_sb_counter_nr_entries(ctrs);
}
}
for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++)
ctrs->d[i] = cpu_to_le64(percpu_u64_get(&c->counters[i]));
return 0;
}
int bch2_fs_counters_init(struct bch_fs *c)
{
int ret = 0;
c->counters = __alloc_percpu(sizeof(u64) * BCH_COUNTER_NR, sizeof(u64));
if (!c->counters)
return -ENOMEM;
ret = bch2_sb_counters_to_cpu(c);
return ret;
}
const struct bch_sb_field_ops bch_sb_field_ops_counters = {
.validate = bch2_sb_counters_validate,
.to_text = bch2_sb_counters_to_text,
};

17
libbcachefs/counters.h Normal file
View File

@ -0,0 +1,17 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_COUNTERS_H
#define _BCACHEFS_COUNTERS_H
#include "bcachefs.h"
#include "super-io.h"
int bch2_sb_counters_to_cpu(struct bch_fs *c);
int bch2_sb_counters_from_cpu(struct bch_fs *c);
int bch2_fs_counters_init(struct bch_fs *c);
extern const struct bch_sb_field_ops bch_sb_field_ops_counters;
#endif // _BCACHEFS_COUNTERS_H

View File

@ -36,7 +36,7 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more)
} }
#define darray_make_room(_d, _more) \ #define darray_make_room(_d, _more) \
__darray_make_room((darray_void *) &(_d), sizeof((_d).data[0]), (_more)) __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more))
#define darray_top(_d) ((_d).data[(_d).nr]) #define darray_top(_d) ((_d).data[(_d).nr])
@ -45,7 +45,7 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more)
int _ret = darray_make_room((_d), 1); \ int _ret = darray_make_room((_d), 1); \
\ \
if (!_ret) \ if (!_ret) \
(_d).data[(_d).nr++] = (_item); \ (_d)->data[(_d)->nr++] = (_item); \
_ret; \ _ret; \
}) })
@ -54,7 +54,7 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more)
int _ret = darray_make_room((_d), 1); \ int _ret = darray_make_room((_d), 1); \
\ \
if (!_ret) \ if (!_ret) \
array_insert_item((_d).data, (_d).nr, (_pos), (_item)); \ array_insert_item((_d)->data, (_d)->nr, (_pos), (_item));\
_ret; \ _ret; \
}) })
@ -63,13 +63,13 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more)
#define darray_init(_d) \ #define darray_init(_d) \
do { \ do { \
(_d).data = NULL; \ (_d)->data = NULL; \
(_d).nr = (_d).size = 0; \ (_d)->nr = (_d)->size = 0; \
} while (0) } while (0)
#define darray_exit(_d) \ #define darray_exit(_d) \
do { \ do { \
kfree((_d).data); \ kfree((_d)->data); \
darray_init(_d); \ darray_init(_d); \
} while (0) } while (0)

View File

@ -443,6 +443,11 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *
bch2_flags_to_text(out, bch2_btree_node_flags, b->flags); bch2_flags_to_text(out, bch2_btree_node_flags, b->flags);
pr_newline(out); pr_newline(out);
pr_buf(out, "pcpu read locks: ");
pr_tab(out);
pr_buf(out, "%u", b->c.lock.readers != NULL);
pr_newline(out);
pr_buf(out, "written:"); pr_buf(out, "written:");
pr_tab(out); pr_tab(out);
pr_buf(out, "%u", b->written); pr_buf(out, "%u", b->written);

View File

@ -308,8 +308,20 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
lp.crc.uncompressed_size + lp.crc.uncompressed_size +
rp.crc.uncompressed_size > (c->opts.encoded_extent_max >> 9)) rp.crc.uncompressed_size > (c->opts.encoded_extent_max >> 9))
return false; return false;
}
if (lp.crc.uncompressed_size + rp.crc.uncompressed_size > en_l = extent_entry_next(en_l);
en_r = extent_entry_next(en_r);
}
en_l = l_ptrs.start;
en_r = r_ptrs.start;
while (en_l < l_ptrs.end && en_r < r_ptrs.end) {
if (extent_entry_is_crc(en_l)) {
struct bch_extent_crc_unpacked crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
struct bch_extent_crc_unpacked crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
if (crc_l.uncompressed_size + crc_r.uncompressed_size >
bch2_crc_field_size_max[extent_entry_type(en_l)]) bch2_crc_field_size_max[extent_entry_type(en_l)])
return false; return false;
} }

View File

@ -232,7 +232,10 @@ static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
return; return;
mutex_lock(&inode->ei_quota_lock); mutex_lock(&inode->ei_quota_lock);
BUG_ON((s64) inode->v.i_blocks + sectors < 0); bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c,
"inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks, sectors,
inode->ei_inode.bi_sectors);
inode->v.i_blocks += sectors; inode->v.i_blocks += sectors;
#ifdef CONFIG_BCACHEFS_QUOTA #ifdef CONFIG_BCACHEFS_QUOTA
@ -2710,9 +2713,11 @@ int bch2_truncate(struct user_namespace *mnt_userns,
U64_MAX, &i_sectors_delta); U64_MAX, &i_sectors_delta);
i_sectors_acct(c, inode, NULL, i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta);
WARN_ON(!inode->v.i_size && inode->v.i_blocks && bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks &&
!bch2_journal_error(&c->journal)); !bch2_journal_error(&c->journal), c,
"inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
inode->v.i_ino, (u64) inode->v.i_blocks,
inode->ei_inode.bi_sectors);
if (unlikely(ret)) if (unlikely(ret))
goto err; goto err;

View File

@ -560,7 +560,7 @@ struct inode_walker {
static void inode_walker_exit(struct inode_walker *w) static void inode_walker_exit(struct inode_walker *w)
{ {
darray_exit(w->inodes); darray_exit(&w->inodes);
} }
static struct inode_walker inode_walker_init(void) static struct inode_walker inode_walker_init(void)
@ -575,7 +575,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
BUG_ON(bch2_inode_unpack(inode, &u)); BUG_ON(bch2_inode_unpack(inode, &u));
return darray_push(w->inodes, ((struct inode_walker_entry) { return darray_push(&w->inodes, ((struct inode_walker_entry) {
.inode = u, .inode = u,
.snapshot = snapshot_t(c, inode.k->p.snapshot)->equiv, .snapshot = snapshot_t(c, inode.k->p.snapshot)->equiv,
})); }));
@ -628,7 +628,7 @@ found:
while (i && w->inodes.data[i - 1].snapshot > pos.snapshot) while (i && w->inodes.data[i - 1].snapshot > pos.snapshot)
--i; --i;
ret = darray_insert_item(w->inodes, i, w->inodes.data[ancestor_pos]); ret = darray_insert_item(&w->inodes, i, w->inodes.data[ancestor_pos]);
if (ret) if (ret)
return ret; return ret;
@ -740,7 +740,8 @@ static int hash_check_key(struct btree_trans *trans,
if (hash_k.k->p.offset < hash) if (hash_k.k->p.offset < hash)
goto bad_hash; goto bad_hash;
for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash), for_each_btree_key_norestart(trans, iter, desc.btree_id,
POS(hash_k.k->p.inode, hash),
BTREE_ITER_SLOTS, k, ret) { BTREE_ITER_SLOTS, k, ret) {
if (!bkey_cmp(k.k->p, hash_k.k->p)) if (!bkey_cmp(k.k->p, hash_k.k->p))
break; break;
@ -759,16 +760,15 @@ static int hash_check_key(struct btree_trans *trans,
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
goto bad_hash; goto bad_hash;
} }
} }
out: out:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf); printbuf_exit(&buf);
return ret; return ret;
bad_hash: bad_hash:
if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, " if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, "
"hashed to %llu\n%s", "hashed to %llu\n%s",
desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset, hash, bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash,
(printbuf_reset(&buf), (printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)) == FSCK_ERR_IGNORE) bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)) == FSCK_ERR_IGNORE)
return 0; return 0;
@ -1405,8 +1405,8 @@ static int check_dirent_target(struct btree_trans *trans,
if (fsck_err_on(backpointer_exists && if (fsck_err_on(backpointer_exists &&
!target->bi_nlink, c, !target->bi_nlink, c,
"inode %llu has multiple links but i_nlink 0", "inode %llu type %s has multiple links but i_nlink 0",
target->bi_inum)) { target->bi_inum, bch2_d_types[d.v->d_type])) {
target->bi_nlink++; target->bi_nlink++;
target->bi_flags &= ~BCH_INODE_UNLINKED; target->bi_flags &= ~BCH_INODE_UNLINKED;
@ -1879,7 +1879,7 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
static int path_down(struct bch_fs *c, pathbuf *p, static int path_down(struct bch_fs *c, pathbuf *p,
u64 inum, u32 snapshot) u64 inum, u32 snapshot)
{ {
int ret = darray_push(*p, ((struct pathbuf_entry) { int ret = darray_push(p, ((struct pathbuf_entry) {
.inum = inum, .inum = inum,
.snapshot = snapshot, .snapshot = snapshot,
})); }));
@ -2037,7 +2037,7 @@ static int check_directory_structure(struct bch_fs *c)
BUG_ON(ret == -EINTR); BUG_ON(ret == -EINTR);
darray_exit(path); darray_exit(&path);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
@ -2254,8 +2254,8 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
} }
if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c, if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
"inode %llu has wrong i_nlink (type %u i_nlink %u, should be %u)", "inode %llu type %s has wrong i_nlink (%u, should be %u)",
u.bi_inum, mode_to_type(u.bi_mode), u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
bch2_inode_nlink_get(&u), link->count)) { bch2_inode_nlink_get(&u), link->count)) {
bch2_inode_nlink_set(&u, link->count); bch2_inode_nlink_set(&u, link->count);

View File

@ -1288,6 +1288,7 @@ void bch2_write(struct closure *cl)
goto err; goto err;
} }
this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio));
bch2_increment_clock(c, bio_sectors(bio), WRITE); bch2_increment_clock(c, bio_sectors(bio), WRITE);
data_len = min_t(u64, bio->bi_iter.bi_size, data_len = min_t(u64, bio->bi_iter.bi_size,
@ -2200,6 +2201,7 @@ get_bio:
if (rbio->bounce) if (rbio->bounce)
trace_read_bounce(&rbio->bio); trace_read_bounce(&rbio->bio);
this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio));
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
/* /*

View File

@ -792,8 +792,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
int ret = 0; int ret = 0;
if (c) { if (c) {
bch2_journal_block(&c->journal);
bch2_journal_flush_all_pins(&c->journal); bch2_journal_flush_all_pins(&c->journal);
bch2_journal_block(&c->journal);
} }
bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL); bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL);

View File

@ -146,8 +146,6 @@ static inline u64 journal_last_unwritten_seq(struct journal *j)
return j->seq_ondisk + 1; return j->seq_ondisk + 1;
} }
void bch2_journal_set_has_inum(struct journal *, u64, u64);
static inline int journal_state_count(union journal_res_state s, int idx) static inline int journal_state_count(union journal_res_state s, int idx)
{ {
switch (idx) { switch (idx) {

View File

@ -1055,7 +1055,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
jlist.ret = 0; jlist.ret = 0;
for_each_member_device(ca, c, iter) { for_each_member_device(ca, c, iter) {
if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && if (!c->opts.fsck &&
!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal))) !(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_journal)))
continue; continue;
@ -1212,10 +1212,9 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
bch2_replicas_entry_to_text(&buf, &replicas.e); bch2_replicas_entry_to_text(&buf, &replicas.e);
if (!degraded && if (!degraded &&
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c, fsck_err_on(!bch2_replicas_marked(c, &replicas.e), c,
"superblock not marked as containing replicas %s", "superblock not marked as containing replicas %s",
buf.buf))) { buf.buf)) {
ret = bch2_mark_replicas(c, &replicas.e); ret = bch2_mark_replicas(c, &replicas.e);
if (ret) if (ret)
goto err; goto err;
@ -1442,6 +1441,7 @@ static void journal_write_done(struct closure *cl)
* Must come before signaling write completion, for * Must come before signaling write completion, for
* bch2_fs_journal_stop(): * bch2_fs_journal_stop():
*/ */
if (j->watermark)
journal_reclaim_kick(&c->journal); journal_reclaim_kick(&c->journal);
/* also must come before signalling write completion: */ /* also must come before signalling write completion: */

View File

@ -589,7 +589,7 @@ static u64 journal_seq_to_flush(struct journal *j)
* 512 journal entries or 25% of all journal buckets, then * 512 journal entries or 25% of all journal buckets, then
* journal_next_bucket() should not stall. * journal_next_bucket() should not stall.
*/ */
static int __bch2_journal_reclaim(struct journal *j, bool direct) static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool kthread = (current->flags & PF_KTHREAD) != 0; bool kthread = (current->flags & PF_KTHREAD) != 0;
@ -638,8 +638,10 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used) if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
min_nr = 1; min_nr = 1;
trace_journal_reclaim_start(c, min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
min_nr,
trace_journal_reclaim_start(c, direct, kicked,
min_nr, min_key_cache,
j->prereserved.reserved, j->prereserved.reserved,
j->prereserved.remaining, j->prereserved.remaining,
atomic_read(&c->btree_cache.dirty), atomic_read(&c->btree_cache.dirty),
@ -647,8 +649,6 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
atomic_long_read(&c->btree_key_cache.nr_dirty), atomic_long_read(&c->btree_key_cache.nr_dirty),
atomic_long_read(&c->btree_key_cache.nr_keys)); atomic_long_read(&c->btree_key_cache.nr_keys));
min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
nr_flushed = journal_flush_pins(j, seq_to_flush, nr_flushed = journal_flush_pins(j, seq_to_flush,
min_nr, min_key_cache); min_nr, min_key_cache);
@ -669,7 +669,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
int bch2_journal_reclaim(struct journal *j) int bch2_journal_reclaim(struct journal *j)
{ {
return __bch2_journal_reclaim(j, true); return __bch2_journal_reclaim(j, true, true);
} }
static int bch2_journal_reclaim_thread(void *arg) static int bch2_journal_reclaim_thread(void *arg)
@ -685,10 +685,12 @@ static int bch2_journal_reclaim_thread(void *arg)
j->last_flushed = jiffies; j->last_flushed = jiffies;
while (!ret && !kthread_should_stop()) { while (!ret && !kthread_should_stop()) {
bool kicked = j->reclaim_kicked;
j->reclaim_kicked = false; j->reclaim_kicked = false;
mutex_lock(&j->reclaim_lock); mutex_lock(&j->reclaim_lock);
ret = __bch2_journal_reclaim(j, false); ret = __bch2_journal_reclaim(j, false, kicked);
mutex_unlock(&j->reclaim_lock); mutex_unlock(&j->reclaim_lock);
now = jiffies; now = jiffies;

View File

@ -2,6 +2,7 @@
#include "bcachefs.h" #include "bcachefs.h"
#include "journal_sb.h" #include "journal_sb.h"
#include "darray.h"
#include <linux/sort.h> #include <linux/sort.h>
@ -142,12 +143,6 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb,
} }
for (i = 0; i + 1 < nr; i++) { for (i = 0; i + 1 < nr; i++) {
if (b[i].end == b[i + 1].start) {
pr_buf(err, "contiguous journal buckets ranges %llu-%llu, %llu-%llu",
b[i].start, b[i].end, b[i + 1].start, b[i + 1].end);
goto err;
}
if (b[i].end > b[i + 1].start) { if (b[i].end > b[i + 1].start) {
pr_buf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu", pr_buf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu",
b[i].start, b[i].end, b[i + 1].start, b[i + 1].end); b[i].start, b[i].end, b[i + 1].start, b[i + 1].end);
@ -219,5 +214,7 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca)
} }
} }
BUG_ON(dst + 1 != nr);
return 0; return 0;
} }

View File

@ -204,7 +204,9 @@ int bch2_check_lrus(struct bch_fs *c, bool initial)
for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN, for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) { BTREE_ITER_PREFETCH, k, ret) {
ret = __bch2_trans_do(&trans, NULL, NULL, 0, ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW,
bch2_check_lru_key(&trans, &iter, initial)); bch2_check_lru_key(&trans, &iter, initial));
if (ret) if (ret)
break; break;

View File

@ -175,10 +175,7 @@ next:
goto err; goto err;
} }
/* flush relevant btree updates */ bch2_btree_interior_updates_flush(c);
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = 0; ret = 0;
err: err:
bch2_trans_exit(&trans); bch2_trans_exit(&trans);

View File

@ -125,7 +125,7 @@ next:
} }
} }
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
darray_exit(s.ids); darray_exit(&s.ids);
return ret; return ret;
} }
@ -574,6 +574,7 @@ static int bch2_move_extent(struct btree_trans *trans,
atomic64_inc(&ctxt->stats->keys_moved); atomic64_inc(&ctxt->stats->keys_moved);
atomic64_add(k.k->size, &ctxt->stats->sectors_moved); atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
this_cpu_add(c->counters[BCH_COUNTER_io_move], k.k->size);
trace_move_extent(k.k); trace_move_extent(k.k);
@ -596,7 +597,7 @@ err_free_pages:
err_free: err_free:
kfree(io); kfree(io);
err: err:
trace_move_alloc_fail(k.k); trace_move_alloc_mem_fail(k.k);
return ret; return ret;
} }
@ -941,9 +942,7 @@ next:
if (ret) if (ret)
bch_err(c, "error %i in bch2_move_btree", ret); bch_err(c, "error %i in bch2_move_btree", ret);
/* flush relevant btree updates */ bch2_btree_interior_updates_flush(c);
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
progress_list_del(c, stats); progress_list_del(c, stats);
return ret; return ret;

View File

@ -316,11 +316,6 @@ enum opt_type {
OPT_BOOL(), \ OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \ BCH2_NO_SB_OPT, false, \
NULL, "Don't replay the journal") \ NULL, "Don't replay the journal") \
x(rebuild_replicas, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Rebuild the superblock replicas section") \
x(keep_journal, u8, \ x(keep_journal, u8, \
0, \ 0, \
OPT_BOOL(), \ OPT_BOOL(), \

View File

@ -147,7 +147,7 @@ static void journal_iters_fix(struct bch_fs *c)
/* /*
* If an iterator points one after the key we just inserted, * If an iterator points one after the key we just inserted,
* and the key we just inserted compares >= the iterator's position, * and the key we just inserted compares > the iterator's position,
* decrement the iterator so it points at the key we just inserted: * decrement the iterator so it points at the key we just inserted:
*/ */
list_for_each_entry(iter, &c->journal_iters, journal.list) list_for_each_entry(iter, &c->journal_iters, journal.list)
@ -155,7 +155,7 @@ static void journal_iters_fix(struct bch_fs *c)
iter->last && iter->last &&
iter->b->c.btree_id == n->btree_id && iter->b->c.btree_id == n->btree_id &&
iter->b->c.level == n->level && iter->b->c.level == n->level &&
bpos_cmp(n->k->k.p, iter->unpacked.p) >= 0) bpos_cmp(n->k->k.p, iter->unpacked.p) > 0)
iter->journal.idx = keys->gap - 1; iter->journal.idx = keys->gap - 1;
} }
@ -994,7 +994,6 @@ static int bch2_fs_initialize_subvolumes(struct bch_fs *c)
if (ret) if (ret)
return ret; return ret;
bkey_subvolume_init(&root_volume.k_i); bkey_subvolume_init(&root_volume.k_i);
root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL; root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
root_volume.v.flags = 0; root_volume.v.flags = 0;
@ -1087,12 +1086,6 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.fix_errors = FSCK_OPT_YES; c->opts.fix_errors = FSCK_OPT_YES;
} }
if (!c->replicas.entries ||
c->opts.rebuild_replicas) {
bch_info(c, "building replicas info");
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
}
if (!c->opts.nochanges) { if (!c->opts.nochanges) {
if (c->sb.version < bcachefs_metadata_version_new_data_types) { if (c->sb.version < bcachefs_metadata_version_new_data_types) {
bch_info(c, "version prior to new_data_types, upgrade and fsck required"); bch_info(c, "version prior to new_data_types, upgrade and fsck required");
@ -1102,6 +1095,12 @@ int bch2_fs_recovery(struct bch_fs *c)
} }
} }
if (c->opts.fsck && c->opts.norecovery) {
bch_err(c, "cannot select both norecovery and fsck");
ret = -EINVAL;
goto err;
}
ret = bch2_blacklist_table_initialize(c); ret = bch2_blacklist_table_initialize(c);
if (ret) { if (ret) {
bch_err(c, "error initializing blacklist table"); bch_err(c, "error initializing blacklist table");
@ -1195,6 +1194,13 @@ use_clean:
if (ret) if (ret)
goto err; goto err;
/*
* Skip past versions that might have possibly been used (as nonces),
* but hadn't had their pointers written:
*/
if (c->sb.encryption_type && !c->sb.clean)
atomic64_add(1 << 16, &c->key_version);
ret = read_btree_roots(c); ret = read_btree_roots(c);
if (ret) if (ret)
goto err; goto err;
@ -1217,17 +1223,9 @@ use_clean:
goto err; goto err;
bch_verbose(c, "stripes_read done"); bch_verbose(c, "stripes_read done");
/* bch2_stripes_heap_start(c);
* If we're not running fsck, this ensures bch2_fsck_err() calls are
* instead interpreted as bch2_inconsistent_err() calls:
*/
if (!c->opts.fsck)
set_bit(BCH_FS_FSCK_DONE, &c->flags);
if (c->opts.fsck || if (c->opts.fsck) {
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_info)) ||
!(c->sb.compat & (1ULL << BCH_COMPAT_alloc_metadata)) ||
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bool metadata_only = c->opts.norecovery; bool metadata_only = c->opts.norecovery;
bch_info(c, "checking allocations"); bch_info(c, "checking allocations");
@ -1236,33 +1234,52 @@ use_clean:
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "done checking allocations"); bch_verbose(c, "done checking allocations");
}
if (c->opts.fsck) { set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
bch_info(c, "checking need_discard and freespace btrees"); bch_info(c, "checking need_discard and freespace btrees");
err = "error checking need_discard and freespace btrees"; err = "error checking need_discard and freespace btrees";
ret = bch2_check_alloc_info(c); ret = bch2_check_alloc_info(c);
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "done checking need_discard and freespace btrees");
set_bit(BCH_FS_MAY_GO_RW, &c->flags);
bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr);
err = "journal replay failed";
ret = bch2_journal_replay(c);
if (ret)
goto err;
if (c->opts.verbose || !c->sb.clean)
bch_info(c, "journal replay done");
bch_info(c, "checking lrus");
err = "error checking lrus";
ret = bch2_check_lrus(c, true);
if (ret)
goto err;
bch_verbose(c, "done checking lrus");
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
bch_info(c, "checking alloc to lru refs");
err = "error checking alloc to lru refs";
ret = bch2_check_alloc_to_lru_refs(c);
if (ret)
goto err;
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
ret = bch2_check_lrus(c, true); ret = bch2_check_lrus(c, true);
if (ret) if (ret)
goto err; goto err;
bch_verbose(c, "done checking need_discard and freespace btrees"); bch_verbose(c, "done checking alloc to lru refs");
} } else {
bch2_stripes_heap_start(c);
clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
set_bit(BCH_FS_MAY_GO_RW, &c->flags); set_bit(BCH_FS_MAY_GO_RW, &c->flags);
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
/* set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
* Skip past versions that might have possibly been used (as nonces), set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
* but hadn't had their pointers written: set_bit(BCH_FS_FSCK_DONE, &c->flags);
*/
if (c->sb.encryption_type && !c->sb.clean)
atomic64_add(1 << 16, &c->key_version);
if (c->opts.norecovery) if (c->opts.norecovery)
goto out; goto out;
@ -1274,25 +1291,13 @@ use_clean:
goto err; goto err;
if (c->opts.verbose || !c->sb.clean) if (c->opts.verbose || !c->sb.clean)
bch_info(c, "journal replay done"); bch_info(c, "journal replay done");
}
err = "error initializing freespace"; err = "error initializing freespace";
ret = bch2_fs_freespace_init(c); ret = bch2_fs_freespace_init(c);
if (ret) if (ret)
goto err; goto err;
if (c->opts.fsck) {
bch_info(c, "checking alloc to lru refs");
err = "error checking alloc to lru refs";
ret = bch2_check_alloc_to_lru_refs(c);
if (ret)
goto err;
ret = bch2_check_lrus(c, true);
if (ret)
goto err;
bch_verbose(c, "done checking alloc to lru refs");
}
if (c->sb.version < bcachefs_metadata_version_snapshot_2) { if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
bch2_fs_lazy_rw(c); bch2_fs_lazy_rw(c);

View File

@ -110,6 +110,7 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
} }
int bch2_trans_mark_reflink_v(struct btree_trans *trans, int bch2_trans_mark_reflink_v(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_i *new, struct bkey_s_c old, struct bkey_i *new,
unsigned flags) unsigned flags)
{ {
@ -124,7 +125,7 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans,
} }
} }
return bch2_trans_mark_extent(trans, old, new, flags); return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags);
} }
/* indirect inline data */ /* indirect inline data */
@ -153,6 +154,7 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out,
} }
int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans, int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_i *new, struct bkey_s_c old, struct bkey_i *new,
unsigned flags) unsigned flags)
{ {

View File

@ -20,8 +20,8 @@ int bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c,
int, struct printbuf *); int, struct printbuf *);
void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c); struct bkey_s_c);
int bch2_trans_mark_reflink_v(struct btree_trans *, struct bkey_s_c, int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
struct bkey_i *, unsigned); struct bkey_s_c, struct bkey_i *, unsigned);
#define bch2_bkey_ops_reflink_v (struct bkey_ops) { \ #define bch2_bkey_ops_reflink_v (struct bkey_ops) { \
.key_invalid = bch2_reflink_v_invalid, \ .key_invalid = bch2_reflink_v_invalid, \
@ -36,6 +36,7 @@ int bch2_indirect_inline_data_invalid(const struct bch_fs *, struct bkey_s_c,
void bch2_indirect_inline_data_to_text(struct printbuf *, void bch2_indirect_inline_data_to_text(struct printbuf *,
struct bch_fs *, struct bkey_s_c); struct bch_fs *, struct bkey_s_c);
int bch2_trans_mark_indirect_inline_data(struct btree_trans *, int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
enum btree_id, unsigned,
struct bkey_s_c, struct bkey_i *, struct bkey_s_c, struct bkey_i *,
unsigned); unsigned);

View File

@ -565,7 +565,7 @@ static int snapshot_id_add(snapshot_id_list *s, u32 id)
{ {
BUG_ON(snapshot_list_has_id(s, id)); BUG_ON(snapshot_list_has_id(s, id));
return darray_push(*s, id); return darray_push(s, id);
} }
static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans, static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
@ -622,7 +622,7 @@ static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
} }
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
darray_exit(equiv_seen); darray_exit(&equiv_seen);
return ret; return ret;
} }
@ -722,7 +722,7 @@ static void bch2_delete_dead_snapshots_work(struct work_struct *work)
} }
} }
err: err:
darray_exit(deleted); darray_exit(&deleted);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);
} }
@ -888,7 +888,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
while (!ret) { while (!ret) {
mutex_lock(&c->snapshots_unlinked_lock); mutex_lock(&c->snapshots_unlinked_lock);
s = c->snapshots_unlinked; s = c->snapshots_unlinked;
darray_init(c->snapshots_unlinked); darray_init(&c->snapshots_unlinked);
mutex_unlock(&c->snapshots_unlinked_lock); mutex_unlock(&c->snapshots_unlinked_lock);
if (!s.nr) if (!s.nr)
@ -905,7 +905,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
} }
} }
darray_exit(s); darray_exit(&s);
} }
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);

View File

@ -76,7 +76,7 @@ static inline void snapshots_seen_init(struct snapshots_seen *s)
static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id) static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id)
{ {
int ret = darray_push(s->ids, id); int ret = darray_push(&s->ids, id);
if (ret) if (ret)
bch_err(c, "error reallocating snapshots_seen table (size %zu)", bch_err(c, "error reallocating snapshots_seen table (size %zu)",
s->ids.size); s->ids.size);

View File

@ -17,6 +17,7 @@
#include "super-io.h" #include "super-io.h"
#include "super.h" #include "super.h"
#include "vstructs.h" #include "vstructs.h"
#include "counters.h"
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/sort.h> #include <linux/sort.h>
@ -818,6 +819,8 @@ int bch2_write_super(struct bch_fs *c)
SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN);
bch2_sb_counters_from_cpu(c);
for_each_online_member(ca, c, i) for_each_online_member(ca, c, i)
bch2_sb_from_fs(c, ca); bch2_sb_from_fs(c, ca);

View File

@ -44,6 +44,7 @@
#include "super.h" #include "super.h"
#include "super-io.h" #include "super-io.h"
#include "sysfs.h" #include "sysfs.h"
#include "counters.h"
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
@ -71,6 +72,9 @@ struct kobj_type type ## _ktype = { \
static void bch2_fs_release(struct kobject *); static void bch2_fs_release(struct kobject *);
static void bch2_dev_release(struct kobject *); static void bch2_dev_release(struct kobject *);
static void bch2_fs_counters_release(struct kobject *k)
{
}
static void bch2_fs_internal_release(struct kobject *k) static void bch2_fs_internal_release(struct kobject *k)
{ {
@ -85,6 +89,7 @@ static void bch2_fs_time_stats_release(struct kobject *k)
} }
static KTYPE(bch2_fs); static KTYPE(bch2_fs);
static KTYPE(bch2_fs_counters);
static KTYPE(bch2_fs_internal); static KTYPE(bch2_fs_internal);
static KTYPE(bch2_fs_opts_dir); static KTYPE(bch2_fs_opts_dir);
static KTYPE(bch2_fs_time_stats); static KTYPE(bch2_fs_time_stats);
@ -188,57 +193,33 @@ static void __bch2_fs_read_only(struct bch_fs *c)
{ {
struct bch_dev *ca; struct bch_dev *ca;
unsigned i, clean_passes = 0; unsigned i, clean_passes = 0;
u64 seq = 0;
bch2_rebalance_stop(c); bch2_rebalance_stop(c);
bch2_copygc_stop(c); bch2_copygc_stop(c);
bch2_gc_thread_stop(c); bch2_gc_thread_stop(c);
/*
* Flush journal before stopping allocators, because flushing journal
* blacklist entries involves allocating new btree nodes:
*/
bch2_journal_flush_all_pins(&c->journal);
bch_verbose(c, "flushing journal and stopping allocators"); bch_verbose(c, "flushing journal and stopping allocators");
bch2_journal_flush_all_pins(&c->journal);
do { do {
clean_passes++; clean_passes++;
if (bch2_journal_flush_all_pins(&c->journal)) if (bch2_btree_interior_updates_flush(c) ||
clean_passes = 0; bch2_journal_flush_all_pins(&c->journal) ||
bch2_btree_flush_all_writes(c) ||
/* seq != atomic64_read(&c->journal.seq)) {
* In flight interior btree updates will generate more journal seq = atomic64_read(&c->journal.seq);
* updates and btree updates (alloc btree):
*/
if (bch2_btree_interior_updates_nr_pending(c)) {
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
clean_passes = 0; clean_passes = 0;
} }
flush_work(&c->btree_interior_update_work);
if (bch2_journal_flush_all_pins(&c->journal))
clean_passes = 0;
} while (clean_passes < 2); } while (clean_passes < 2);
bch_verbose(c, "flushing journal and stopping allocators complete"); bch_verbose(c, "flushing journal and stopping allocators complete");
set_bit(BCH_FS_ALLOC_CLEAN, &c->flags); if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
closure_wait_event(&c->btree_interior_update_wait, set_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
!bch2_btree_interior_updates_nr_pending(c));
flush_work(&c->btree_interior_update_work);
bch2_fs_journal_stop(&c->journal); bch2_fs_journal_stop(&c->journal);
/*
* the journal kicks off btree writes via reclaim - wait for in flight
* writes after stopping journal:
*/
bch2_btree_flush_all_writes(c);
/* /*
* After stopping journal: * After stopping journal:
*/ */
@ -297,7 +278,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_ERROR, &c->flags) && !test_bit(BCH_FS_ERROR, &c->flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
test_bit(BCH_FS_STARTED, &c->flags) && test_bit(BCH_FS_STARTED, &c->flags) &&
test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) && test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags) &&
!c->opts.norecovery) { !c->opts.norecovery) {
bch_verbose(c, "marking filesystem clean"); bch_verbose(c, "marking filesystem clean");
bch2_fs_mark_clean(c); bch2_fs_mark_clean(c);
@ -388,7 +369,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
if (ret) if (ret)
goto err; goto err;
clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags); clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
for_each_rw_member(ca, c, i) for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca); bch2_dev_allocator_add(c, ca);
@ -517,6 +498,7 @@ void __bch2_fs_stop(struct bch_fs *c)
bch2_fs_debug_exit(c); bch2_fs_debug_exit(c);
bch2_fs_chardev_exit(c); bch2_fs_chardev_exit(c);
kobject_put(&c->counters_kobj);
kobject_put(&c->time_stats); kobject_put(&c->time_stats);
kobject_put(&c->opts_dir); kobject_put(&c->opts_dir);
kobject_put(&c->internal); kobject_put(&c->internal);
@ -585,6 +567,7 @@ static int bch2_fs_online(struct bch_fs *c)
kobject_add(&c->internal, &c->kobj, "internal") ?: kobject_add(&c->internal, &c->kobj, "internal") ?:
kobject_add(&c->opts_dir, &c->kobj, "options") ?: kobject_add(&c->opts_dir, &c->kobj, "options") ?:
kobject_add(&c->time_stats, &c->kobj, "time_stats") ?: kobject_add(&c->time_stats, &c->kobj, "time_stats") ?:
kobject_add(&c->counters_kobj, &c->kobj, "counters") ?:
bch2_opts_create_sysfs_files(&c->opts_dir); bch2_opts_create_sysfs_files(&c->opts_dir);
if (ret) { if (ret) {
bch_err(c, "error creating sysfs objects"); bch_err(c, "error creating sysfs objects");
@ -633,6 +616,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
kobject_init(&c->internal, &bch2_fs_internal_ktype); kobject_init(&c->internal, &bch2_fs_internal_ktype);
kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype); kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype); kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
kobject_init(&c->counters_kobj, &bch2_fs_counters_ktype);
c->minor = -1; c->minor = -1;
c->disk_sb.fs_sb = true; c->disk_sb.fs_sb = true;
@ -796,7 +780,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_encryption_init(c) ?: bch2_fs_encryption_init(c) ?:
bch2_fs_compress_init(c) ?: bch2_fs_compress_init(c) ?:
bch2_fs_ec_init(c) ?: bch2_fs_ec_init(c) ?:
bch2_fs_fsio_init(c); bch2_fs_fsio_init(c) ?:
bch2_fs_counters_init(c);
if (ret) if (ret)
goto err; goto err;

View File

@ -40,7 +40,7 @@
#include "util.h" #include "util.h"
#define SYSFS_OPS(type) \ #define SYSFS_OPS(type) \
struct sysfs_ops type ## _sysfs_ops = { \ const struct sysfs_ops type ## _sysfs_ops = { \
.show = type ## _show, \ .show = type ## _show, \
.store = type ## _store \ .store = type ## _store \
} }
@ -55,6 +55,9 @@ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
struct printbuf out = PRINTBUF; \ struct printbuf out = PRINTBUF; \
ssize_t ret = fn ## _to_text(&out, kobj, attr); \ ssize_t ret = fn ## _to_text(&out, kobj, attr); \
\ \
if (out.pos && out.buf[out.pos - 1] != '\n') \
pr_newline(&out); \
\
if (!ret && out.allocation_failure) \ if (!ret && out.allocation_failure) \
ret = -ENOMEM; \ ret = -ENOMEM; \
\ \
@ -191,6 +194,10 @@ read_attribute(extent_migrate_done);
read_attribute(extent_migrate_raced); read_attribute(extent_migrate_raced);
read_attribute(bucket_alloc_fail); read_attribute(bucket_alloc_fail);
#define x(t, n, ...) read_attribute(t);
BCH_PERSISTENT_COUNTERS()
#undef x
rw_attribute(discard); rw_attribute(discard);
rw_attribute(label); rw_attribute(label);
@ -544,6 +551,47 @@ struct attribute *bch2_fs_files[] = {
NULL NULL
}; };
/* counters dir */
SHOW(bch2_fs_counters)
{
struct bch_fs *c = container_of(kobj, struct bch_fs, counters_kobj);
u64 counter = 0;
u64 counter_since_mount = 0;
out->tabstops[0] = 32;
#define x(t, ...) \
if (attr == &sysfs_##t) { \
counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\
counter_since_mount = counter - c->counters_on_mount[BCH_COUNTER_##t];\
pr_buf(out, "since mount:"); \
pr_tab(out); \
bch2_hprint(out, counter_since_mount << 9); \
pr_newline(out); \
\
pr_buf(out, "since filesystem creation:"); \
pr_tab(out); \
bch2_hprint(out, counter << 9); \
pr_newline(out); \
}
BCH_PERSISTENT_COUNTERS()
#undef x
return 0;
}
STORE(bch2_fs_counters) {
return 0;
}
SYSFS_OPS(bch2_fs_counters);
struct attribute *bch2_fs_counters_files[] = {
#define x(t, ...) \
&sysfs_##t,
BCH_PERSISTENT_COUNTERS()
#undef x
NULL
};
/* internal dir - just a wrapper */ /* internal dir - just a wrapper */
SHOW(bch2_fs_internal) SHOW(bch2_fs_internal)
@ -614,7 +662,7 @@ STORE(bch2_fs_opts_dir)
{ {
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
const struct bch_option *opt = container_of(attr, struct bch_option, attr); const struct bch_option *opt = container_of(attr, struct bch_option, attr);
int ret = size, id = opt - bch2_opt_table; int ret, id = opt - bch2_opt_table;
char *tmp; char *tmp;
u64 v; u64 v;
@ -649,6 +697,8 @@ STORE(bch2_fs_opts_dir)
bch2_rebalance_add_work(c, S64_MAX); bch2_rebalance_add_work(c, S64_MAX);
rebalance_wakeup(c); rebalance_wakeup(c);
} }
ret = size;
err: err:
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);
return ret; return ret;

View File

@ -10,28 +10,32 @@ struct attribute;
struct sysfs_ops; struct sysfs_ops;
extern struct attribute *bch2_fs_files[]; extern struct attribute *bch2_fs_files[];
extern struct attribute *bch2_fs_counters_files[];
extern struct attribute *bch2_fs_internal_files[]; extern struct attribute *bch2_fs_internal_files[];
extern struct attribute *bch2_fs_opts_dir_files[]; extern struct attribute *bch2_fs_opts_dir_files[];
extern struct attribute *bch2_fs_time_stats_files[]; extern struct attribute *bch2_fs_time_stats_files[];
extern struct attribute *bch2_dev_files[]; extern struct attribute *bch2_dev_files[];
extern struct sysfs_ops bch2_fs_sysfs_ops; extern const struct sysfs_ops bch2_fs_sysfs_ops;
extern struct sysfs_ops bch2_fs_internal_sysfs_ops; extern const struct sysfs_ops bch2_fs_counters_sysfs_ops;
extern struct sysfs_ops bch2_fs_opts_dir_sysfs_ops; extern const struct sysfs_ops bch2_fs_internal_sysfs_ops;
extern struct sysfs_ops bch2_fs_time_stats_sysfs_ops; extern const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
extern struct sysfs_ops bch2_dev_sysfs_ops; extern const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;
extern const struct sysfs_ops bch2_dev_sysfs_ops;
int bch2_opts_create_sysfs_files(struct kobject *); int bch2_opts_create_sysfs_files(struct kobject *);
#else #else
static struct attribute *bch2_fs_files[] = {}; static struct attribute *bch2_fs_files[] = {};
static struct attribute *bch2_fs_counters_files[] = {};
static struct attribute *bch2_fs_internal_files[] = {}; static struct attribute *bch2_fs_internal_files[] = {};
static struct attribute *bch2_fs_opts_dir_files[] = {}; static struct attribute *bch2_fs_opts_dir_files[] = {};
static struct attribute *bch2_fs_time_stats_files[] = {}; static struct attribute *bch2_fs_time_stats_files[] = {};
static struct attribute *bch2_dev_files[] = {}; static struct attribute *bch2_dev_files[] = {};
static const struct sysfs_ops bch2_fs_sysfs_ops; static const struct sysfs_ops bch2_fs_sysfs_ops;
static const struct sysfs_ops bch2_fs_counters_sysfs_ops;
static const struct sysfs_ops bch2_fs_internal_sysfs_ops; static const struct sysfs_ops bch2_fs_internal_sysfs_ops;
static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops; static const struct sysfs_ops bch2_fs_opts_dir_sysfs_ops;
static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops; static const struct sysfs_ops bch2_fs_time_stats_sysfs_ops;

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/math.h>
#include <linux/string.h> #include <linux/string.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>

View File

@ -306,10 +306,10 @@ void ranges_sort_merge(ranges *r)
if (t && t->end >= i->start) if (t && t->end >= i->start)
t->end = max(t->end, i->end); t->end = max(t->end, i->end);
else else
darray_push(tmp, *i); darray_push(&tmp, *i);
} }
darray_exit(*r); darray_exit(r);
*r = tmp; *r = tmp;
} }

View File

@ -76,7 +76,7 @@ typedef DARRAY(struct range) ranges;
static inline void range_add(ranges *data, u64 offset, u64 size) static inline void range_add(ranges *data, u64 offset, u64 size)
{ {
darray_push(*data, ((struct range) { darray_push(data, ((struct range) {
.start = offset, .start = offset,
.end = offset + size .end = offset + size
})); }));