Update bcachefs sources to a5c0e1bb30 bcachefs: Clean up bch2_btree_and_journal_walk()

This commit is contained in:
Kent Overstreet 2021-04-30 16:48:21 -04:00
parent a14d39d7ac
commit bb74624daa
34 changed files with 297 additions and 188 deletions

View File

@ -1 +1 @@
8d3093bd9b9254957badce4a4ff178baeb3632ed
a5c0e1bb306e79b40b2432a22f164697c8b22110

View File

@ -8,12 +8,43 @@
#include <linux/atomic.h>
#include <linux/types.h>
#include <linux/bvec.h>
#include <linux/kobject.h>
struct bio_set;
struct bio;
struct block_device;
typedef void (bio_end_io_t) (struct bio *);
#define BDEVNAME_SIZE 32
struct request_queue {
struct backing_dev_info *backing_dev_info;
};
struct gendisk {
};
struct hd_struct {
struct kobject kobj;
};
struct block_device {
struct kobject kobj;
dev_t bd_dev;
char name[BDEVNAME_SIZE];
struct inode *bd_inode;
struct request_queue queue;
void *bd_holder;
struct gendisk * bd_disk;
struct gendisk __bd_disk;
int bd_fd;
int bd_sync_fd;
struct backing_dev_info *bd_bdi;
struct backing_dev_info __bd_bdi;
};
#define bdev_kobj(_bdev) (&((_bdev)->kobj))
/*
* Block error status values. See block/blk-core:blk_errors for the details.
*/

View File

@ -59,36 +59,8 @@ static inline struct inode *file_inode(const struct file *f)
return f->f_inode;
}
#define BDEVNAME_SIZE 32
struct request_queue {
struct backing_dev_info *backing_dev_info;
};
struct gendisk {
};
struct hd_struct {
struct kobject kobj;
};
#define part_to_dev(part) (part)
struct block_device {
char name[BDEVNAME_SIZE];
struct inode *bd_inode;
struct request_queue queue;
void *bd_holder;
struct hd_struct *bd_part;
struct gendisk *bd_disk;
struct gendisk __bd_disk;
int bd_fd;
int bd_sync_fd;
struct backing_dev_info *bd_bdi;
struct backing_dev_info __bd_bdi;
};
void generic_make_request(struct bio *);
int submit_bio_wait(struct bio *);
@ -111,7 +83,7 @@ sector_t get_capacity(struct gendisk *disk);
void blkdev_put(struct block_device *bdev, fmode_t mode);
void bdput(struct block_device *bdev);
struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder);
struct block_device *lookup_bdev(const char *path);
int lookup_bdev(const char *path, dev_t *);
struct super_block {
void *s_fs_info;

View File

@ -4,5 +4,6 @@
#define try_to_freeze()
#define set_freezable()
#define freezing(task) false
#define freezable_schedule_timeout(_t) schedule_timeout(_t);
#endif /* __TOOLS_LINUX_FREEZER_H */

View File

@ -27,11 +27,7 @@
#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA)
/********** mm/page_poison.c **********/
#ifdef CONFIG_PAGE_POISONING_ZERO
#define PAGE_POISON 0x00
#else
#define PAGE_POISON 0xaa
#endif
/********** mm/page_alloc.c ************/

View File

@ -528,6 +528,62 @@ TRACE_EVENT(copygc,
__entry->buckets_moved, __entry->buckets_not_moved)
);
TRACE_EVENT(trans_get_iter,
TP_PROTO(unsigned long caller, unsigned long ip,
enum btree_id btree_id,
struct bpos *pos_want,
unsigned locks_want,
struct bpos *pos_found,
unsigned locks_found,
unsigned uptodate),
TP_ARGS(caller, ip, btree_id,
pos_want, locks_want,
pos_found, locks_found,
uptodate),
TP_STRUCT__entry(
__field(unsigned long, caller )
__field(unsigned long, ip )
__field(u8, btree_id )
__field(u8, uptodate )
__field(u8, locks_want )
__field(u8, locks_found )
__field(u64, pos_want_inode )
__field(u64, pos_want_offset )
__field(u32, pos_want_snapshot )
__field(u64, pos_found_inode )
__field(u64, pos_found_offset )
__field(u32, pos_found_snapshot )
),
TP_fast_assign(
__entry->caller = caller;
__entry->ip = ip;
__entry->btree_id = btree_id;
__entry->uptodate = uptodate;
__entry->pos_want_inode = pos_want->inode;
__entry->pos_want_offset = pos_want->offset;
__entry->pos_want_snapshot = pos_want->snapshot;
__entry->pos_found_inode = pos_found->inode;
__entry->pos_found_offset = pos_found->offset;
__entry->pos_found_snapshot = pos_found->snapshot;
),
TP_printk("%ps %pS btree %u uptodate %u want %llu:%llu:%u locks %u found %llu:%llu:%u locks %u",
(void *) __entry->caller,
(void *) __entry->ip,
__entry->btree_id,
__entry->uptodate,
__entry->pos_want_inode,
__entry->pos_want_offset,
__entry->pos_want_snapshot,
__entry->locks_want,
__entry->pos_found_inode,
__entry->pos_found_offset,
__entry->pos_found_snapshot,
__entry->locks_found)
);
TRACE_EVENT(transaction_restart_ip,
TP_PROTO(unsigned long caller, unsigned long ip),
TP_ARGS(caller, ip),
@ -565,6 +621,11 @@ DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused,
TP_ARGS(ip)
);
DEFINE_EVENT(transaction_restart, trans_blocked_journal_reclaim,
TP_PROTO(unsigned long ip),
TP_ARGS(ip)
);
TRACE_EVENT(trans_restart_would_deadlock,
TP_PROTO(unsigned long trans_ip,
unsigned long caller_ip,

View File

@ -261,16 +261,14 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
#undef x
}
static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k)
static int bch2_alloc_read_fn(struct bch_fs *c, struct bkey_s_c k)
{
struct bch_dev *ca;
struct bucket *g;
struct bkey_alloc_unpacked u;
if (level ||
(k.k->type != KEY_TYPE_alloc &&
k.k->type != KEY_TYPE_alloc_v2))
if (k.k->type != KEY_TYPE_alloc &&
k.k->type != KEY_TYPE_alloc_v2)
return 0;
ca = bch_dev_bkey_exists(c, k.k->p.inode);
@ -289,13 +287,12 @@ static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
return 0;
}
int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
int bch2_alloc_read(struct bch_fs *c)
{
int ret;
down_read(&c->gc_lock);
ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_alloc,
NULL, bch2_alloc_read_fn);
ret = bch2_btree_and_journal_walk(c, BTREE_ID_alloc, bch2_alloc_read_fn);
up_read(&c->gc_lock);
if (ret) {
bch_err(c, "error reading alloc info: %i", ret);

View File

@ -91,8 +91,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.val_to_text = bch2_alloc_to_text, \
}
struct journal_keys;
int bch2_alloc_read(struct bch_fs *, struct journal_keys *);
int bch2_alloc_read(struct bch_fs *);
static inline void bch2_wake_allocator(struct bch_dev *ca)
{

View File

@ -605,11 +605,13 @@ struct bch_fs {
u64 time_base_lo;
u32 time_base_hi;
u32 time_precision;
unsigned time_units_per_sec;
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
} sb;
struct bch_sb_handle disk_sb;
unsigned short block_bits; /* ilog2(block_size) */
@ -873,19 +875,22 @@ static inline unsigned block_bytes(const struct bch_fs *c)
return c->opts.block_size << 9;
}
static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, u64 time)
static inline struct timespec64 bch2_time_to_timespec(struct bch_fs *c, s64 time)
{
return ns_to_timespec64(time * c->sb.time_precision + c->sb.time_base_lo);
struct timespec64 t;
s32 rem;
time += c->sb.time_base_lo;
t.tv_sec = div_s64_rem(time, c->sb.time_units_per_sec, &rem);
t.tv_nsec = rem * c->sb.nsec_per_time_unit;
return t;
}
static inline s64 timespec_to_bch2_time(struct bch_fs *c, struct timespec64 ts)
{
s64 ns = timespec64_to_ns(&ts) - c->sb.time_base_lo;
if (c->sb.time_precision == 1)
return ns;
return div_s64(ns, c->sb.time_precision);
return (ts.tv_sec * c->sb.time_units_per_sec +
(int) ts.tv_nsec / c->sb.nsec_per_time_unit) - c->sb.time_base_lo;
}
static inline s64 bch2_current_time(struct bch_fs *c)

View File

@ -581,6 +581,7 @@ out:
b->sib_u64s[1] = 0;
b->whiteout_u64s = 0;
bch2_btree_keys_init(b);
set_btree_node_accessed(b);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc],
start_time);
@ -653,9 +654,13 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
return NULL;
}
/* Unlock before doing IO: */
if (iter && sync)
bch2_trans_unlock(iter->trans);
/*
* Unlock before doing IO:
*
* XXX: ideally should be dropping all btree node locks here
*/
if (iter && btree_node_read_locked(iter, level + 1))
btree_node_unlock(iter, level + 1);
bch2_btree_node_read(c, b, sync);
@ -666,16 +671,6 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
return NULL;
}
/*
* XXX: this will probably always fail because btree_iter_relock()
* currently fails for iterators that aren't pointed at a valid btree
* node
*/
if (iter && !bch2_trans_relock(iter->trans)) {
six_unlock_intent(&b->c.lock);
return ERR_PTR(-EINTR);
}
if (lock_type == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock);
@ -817,22 +812,9 @@ lock_node:
}
}
if (unlikely(btree_node_read_in_flight(b))) {
six_unlock_type(&b->c.lock, lock_type);
bch2_trans_unlock(iter->trans);
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
TASK_UNINTERRUPTIBLE);
/*
* XXX: check if this always fails - btree_iter_relock()
* currently fails for iterators that aren't pointed at a valid
* btree node
*/
if (iter && !bch2_trans_relock(iter->trans))
return ERR_PTR(-EINTR);
goto retry;
}
/* XXX: waiting on IO with btree locks held: */
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
TASK_UNINTERRUPTIBLE);
prefetch(b->aux_data);

View File

@ -2013,6 +2013,13 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
unsigned flags)
{
struct btree_iter *iter, *best = NULL;
struct bpos real_pos, pos_min = POS_MIN;
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NOT_EXTENTS) &&
!(flags & BTREE_ITER_ALL_SNAPSHOTS))
flags |= BTREE_ITER_IS_EXTENTS;
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
!btree_type_has_snapshots(btree_id))
@ -2022,6 +2029,12 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
pos.snapshot = btree_type_has_snapshots(btree_id)
? U32_MAX : 0;
real_pos = pos;
if ((flags & BTREE_ITER_IS_EXTENTS) &&
bkey_cmp(pos, POS_MAX))
real_pos = bpos_nosnap_successor(pos);
trans_for_each_iter(trans, iter) {
if (btree_iter_type(iter) != (flags & BTREE_ITER_TYPE))
continue;
@ -2030,8 +2043,8 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
continue;
if (best) {
int cmp = bkey_cmp(bpos_diff(best->real_pos, pos),
bpos_diff(iter->real_pos, pos));
int cmp = bkey_cmp(bpos_diff(best->real_pos, real_pos),
bpos_diff(iter->real_pos, real_pos));
if (cmp < 0 ||
((cmp == 0 && btree_iter_keep(trans, iter))))
@ -2041,6 +2054,13 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
best = iter;
}
trace_trans_get_iter(_RET_IP_, trans->ip,
btree_id,
&real_pos, locks_want,
best ? &best->real_pos : &pos_min,
best ? best->locks_want : 0,
best ? best->uptodate : BTREE_ITER_NEED_TRAVERSE);
if (!best) {
iter = btree_trans_iter_alloc(trans);
bch2_btree_iter_init(trans, iter, btree_id);
@ -2054,12 +2074,6 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
trans->iters_live |= 1ULL << iter->idx;
trans->iters_touched |= 1ULL << iter->idx;
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NOT_EXTENTS) &&
!(flags & BTREE_ITER_ALL_SNAPSHOTS))
flags |= BTREE_ITER_IS_EXTENTS;
iter->flags = flags;
iter->snapshot = pos.snapshot;
@ -2078,19 +2092,20 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
btree_iter_get_locks(iter, true, false);
}
while (iter->level < depth) {
while (iter->level != depth) {
btree_node_unlock(iter, iter->level);
iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
iter->level++;
iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
if (iter->level < depth)
iter->level++;
else
iter->level--;
}
while (iter->level > depth)
iter->l[--iter->level].b = BTREE_ITER_NO_NODE_INIT;
iter->min_depth = depth;
bch2_btree_iter_set_pos(iter, pos);
btree_iter_set_search_pos(iter, btree_iter_search_key(iter));
btree_iter_set_search_pos(iter, real_pos);
return iter;
}

View File

@ -645,8 +645,10 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
struct bch_fs *c = container_of(shrink, struct bch_fs,
btree_key_cache.shrink);
struct btree_key_cache *bc = &c->btree_key_cache;
long nr = atomic_long_read(&bc->nr_keys) -
atomic_long_read(&bc->nr_dirty);
return atomic_long_read(&bc->nr_keys);
return max(0L, nr);
}
void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)

View File

@ -725,6 +725,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
bch2_trans_unlock(trans);
trace_trans_blocked_journal_reclaim(trans->ip);
wait_event_freezable(c->journal.reclaim_wait,
(ret = journal_reclaim_wait_done(c)));
if (ret < 0)

View File

@ -898,6 +898,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
spin_unlock(&c->ec_stripes_heap_lock);
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
(u64) p.idx);
bch2_inconsistent_error(c);
return -EIO;
}
@ -1015,6 +1016,7 @@ static int bch2_mark_stripe(struct bch_fs *c,
if (!m || (old_s && !m->alive)) {
bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
idx);
bch2_inconsistent_error(c);
return -1;
}
@ -1499,6 +1501,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
bch2_fs_inconsistent(c,
"pointer to nonexistent stripe %llu",
(u64) p.ec.idx);
bch2_inconsistent_error(c);
ret = -EIO;
goto out;
}
@ -1739,6 +1742,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
bch2_inconsistent_error(c);
ret = -EIO;
goto err;
}

View File

@ -1630,26 +1630,22 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags)
return ret;
}
static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k)
static int bch2_stripes_read_fn(struct bch_fs *c, struct bkey_s_c k)
{
int ret = 0;
if (k.k->type == KEY_TYPE_stripe) {
if (k.k->type == KEY_TYPE_stripe)
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
bch2_mark_key(c, k, 0, 0, NULL, 0,
BTREE_TRIGGER_NOATOMIC);
if (ret)
return ret;
}
return ret;
}
int bch2_stripes_read(struct bch_fs *c, struct journal_keys *journal_keys)
int bch2_stripes_read(struct bch_fs *c)
{
int ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_stripes,
NULL, bch2_stripes_read_fn);
int ret = bch2_btree_and_journal_walk(c, BTREE_ID_stripes,
bch2_stripes_read_fn);
if (ret)
bch_err(c, "error reading stripes: %i", ret);

View File

@ -215,8 +215,7 @@ void bch2_ec_flush_new_stripes(struct bch_fs *);
void bch2_stripes_heap_start(struct bch_fs *);
struct journal_keys;
int bch2_stripes_read(struct bch_fs *, struct journal_keys *);
int bch2_stripes_read(struct bch_fs *);
int bch2_stripes_write(struct bch_fs *, unsigned);
int bch2_ec_mem_alloc(struct bch_fs *, bool);

View File

@ -1931,8 +1931,9 @@ loop:
i_size_write(&inode->v, req->ki_pos);
spin_unlock(&inode->v.i_lock);
bio_for_each_segment_all(bv, bio, iter)
put_page(bv->bv_page);
if (likely(!bio_flagged(bio, BIO_NO_PAGE_REF)))
bio_for_each_segment_all(bv, bio, iter)
put_page(bv->bv_page);
if (dio->op.error) {
set_bit(EI_INODE_ERROR, &inode->ei_flags);
@ -2393,6 +2394,15 @@ err:
/* fallocate: */
static int inode_update_times_fn(struct bch_inode_info *inode,
struct bch_inode_unpacked *bi, void *p)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
bi->bi_mtime = bi->bi_ctime = bch2_current_time(c);
return 0;
}
static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
@ -2430,6 +2440,11 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len
&i_sectors_delta);
i_sectors_acct(c, inode, NULL, i_sectors_delta);
}
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
ATTR_MTIME|ATTR_CTIME) ?: ret;
mutex_unlock(&inode->ei_update_lock);
err:
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);

View File

@ -143,7 +143,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
struct bch_inode_unpacked inode_u;
int ret;
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_init(&trans, c, 0, 256);
retry:
bch2_trans_begin(&trans);
@ -998,10 +998,7 @@ static const struct file_operations bch_file_operations = {
.open = generic_file_open,
.fsync = bch2_fsync,
.splice_read = generic_file_splice_read,
#if 0
/* Busted: */
.splice_write = iter_file_splice_write,
#endif
.fallocate = bch2_fallocate_dispatch,
.unlocked_ioctl = bch2_fs_file_ioctl,
#ifdef CONFIG_COMPAT
@ -1293,16 +1290,17 @@ static int bch2_sync_fs(struct super_block *sb, int wait)
return bch2_journal_flush(&c->journal);
}
static struct bch_fs *bch2_path_to_fs(const char *dev)
static struct bch_fs *bch2_path_to_fs(const char *path)
{
struct bch_fs *c;
struct block_device *bdev = lookup_bdev(dev);
dev_t dev;
int ret;
if (IS_ERR(bdev))
return ERR_CAST(bdev);
ret = lookup_bdev(path, &dev);
if (ret)
return ERR_PTR(ret);
c = bch2_bdev_to_fs(bdev);
bdput(bdev);
c = bch2_dev_to_fs(dev);
if (c)
closure_put(&c->cl);
return c ?: ERR_PTR(-ENOENT);
@ -1554,7 +1552,9 @@ got_sb:
#endif
sb->s_xattr = bch2_xattr_handlers;
sb->s_magic = BCACHEFS_STATFS_MAGIC;
sb->s_time_gran = c->sb.time_precision;
sb->s_time_gran = c->sb.nsec_per_time_unit;
sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
c->vfs_sb = sb;
strlcpy(sb->s_id, c->name, sizeof(sb->s_id));

View File

@ -1968,6 +1968,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
k.k->type != KEY_TYPE_indirect_inline_data) {
bch_err_inum_ratelimited(trans->c, orig_k->k->k.p.inode,
"pointer to nonexistent indirect extent");
bch2_inconsistent_error(trans->c);
ret = -EIO;
goto err;
}

View File

@ -1187,6 +1187,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
"nr noflush writes:\t%llu\n"
"nr direct reclaim:\t%llu\n"
"nr background reclaim:\t%llu\n"
"reclaim kicked:\t\t%u\n"
"reclaim runs in:\t%u ms\n"
"current entry sectors:\t%u\n"
"current entry error:\t%u\n"
"current entry:\t\t",
@ -1202,6 +1204,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
j->nr_noflush_writes,
j->nr_direct_reclaim,
j->nr_background_reclaim,
j->reclaim_kicked,
jiffies_to_msecs(j->next_reclaim - jiffies),
j->cur_entry_sectors,
j->cur_entry_error);

View File

@ -677,13 +677,15 @@ int bch2_journal_reclaim(struct journal *j)
static int bch2_journal_reclaim_thread(void *arg)
{
struct journal *j = arg;
unsigned long next;
unsigned long delay, now;
int ret = 0;
set_freezable();
kthread_wait_freezable(test_bit(JOURNAL_RECLAIM_STARTED, &j->flags));
j->last_flushed = jiffies;
while (!ret && !kthread_should_stop()) {
j->reclaim_kicked = false;
@ -691,7 +693,12 @@ static int bch2_journal_reclaim_thread(void *arg)
ret = __bch2_journal_reclaim(j, false);
mutex_unlock(&j->reclaim_lock);
next = j->last_flushed + msecs_to_jiffies(j->reclaim_delay_ms);
now = jiffies;
delay = msecs_to_jiffies(j->reclaim_delay_ms);
j->next_reclaim = j->last_flushed + delay;
if (!time_in_range(j->next_reclaim, now, now + delay))
j->next_reclaim = now + delay;
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
@ -699,10 +706,9 @@ static int bch2_journal_reclaim_thread(void *arg)
break;
if (j->reclaim_kicked)
break;
if (time_after_eq(jiffies, next))
if (time_after_eq(jiffies, j->next_reclaim))
break;
schedule_timeout(next - jiffies);
try_to_freeze();
freezable_schedule_timeout(j->next_reclaim - jiffies);
}
__set_current_state(TASK_RUNNING);

View File

@ -8,11 +8,9 @@ static inline void journal_reclaim_kick(struct journal *j)
{
struct task_struct *p = READ_ONCE(j->reclaim_thread);
if (p && !j->reclaim_kicked) {
j->reclaim_kicked = true;
if (p)
wake_up_process(p);
}
j->reclaim_kicked = true;
if (p)
wake_up_process(p);
}
unsigned bch2_journal_dev_buckets_available(struct journal *,

View File

@ -248,6 +248,7 @@ struct journal {
wait_queue_head_t reclaim_wait;
struct task_struct *reclaim_thread;
bool reclaim_kicked;
unsigned long next_reclaim;
u64 nr_direct_reclaim;
u64 nr_background_reclaim;

View File

@ -293,17 +293,19 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned dev_idx;
u64 fragmented_allowed = 0, fragmented = 0;
s64 wait = S64_MAX, fragmented_allowed, fragmented;
for_each_rw_member(ca, c, dev_idx) {
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
fragmented_allowed += ((__dev_buckets_reclaimable(ca, usage) *
fragmented_allowed = ((__dev_buckets_reclaimable(ca, usage) *
ca->mi.bucket_size) >> 1);
fragmented += usage.d[BCH_DATA_user].fragmented;
fragmented = usage.d[BCH_DATA_user].fragmented;
wait = min(wait, max(0LL, fragmented_allowed - fragmented));
}
return max_t(s64, 0, fragmented_allowed - fragmented);
return wait;
}
static int bch2_copygc_thread(void *arg)

View File

@ -372,6 +372,7 @@ static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
if (ret)
break;
}
bch2_trans_iter_put(&trans, iter);
return bch2_trans_exit(&trans) ?: ret;
}
@ -449,6 +450,8 @@ int bch2_fs_quota_read(struct bch_fs *c)
KEY_TYPE_QUOTA_NOCHECK);
}
}
bch2_trans_iter_put(&trans, iter);
return bch2_trans_exit(&trans) ?: ret;
}

View File

@ -323,9 +323,7 @@ static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
}
static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b,
struct journal_keys *journal_keys,
enum btree_id btree_id,
btree_walk_node_fn node_fn,
btree_walk_key_fn key_fn)
{
struct btree_and_journal_iter iter;
@ -338,15 +336,9 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
ret = key_fn(c, btree_id, b->c.level, k);
if (ret)
break;
if (b->c.level) {
bch2_bkey_buf_reassemble(&tmp, c, k);
bch2_btree_and_journal_iter_advance(&iter);
child = bch2_btree_node_get_noiter(c, tmp.k,
b->c.btree_id, b->c.level - 1,
false);
@ -357,16 +349,17 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
btree_and_journal_iter_prefetch(c, b, iter);
ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, child,
journal_keys, btree_id, node_fn, key_fn);
ret = bch2_btree_and_journal_walk_recurse(c, child,
btree_id, key_fn);
six_unlock_read(&child->c.lock);
if (ret)
break;
} else {
bch2_btree_and_journal_iter_advance(&iter);
ret = key_fn(c, k);
}
if (ret)
break;
bch2_btree_and_journal_iter_advance(&iter);
}
bch2_btree_and_journal_iter_exit(&iter);
@ -374,9 +367,7 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
return ret;
}
int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_keys,
enum btree_id btree_id,
btree_walk_node_fn node_fn,
int bch2_btree_and_journal_walk(struct bch_fs *c, enum btree_id btree_id,
btree_walk_key_fn key_fn)
{
struct btree *b = c->btree_roots[btree_id].b;
@ -386,10 +377,7 @@ int bch2_btree_and_journal_walk(struct bch_fs *c, struct journal_keys *journal_k
return 0;
six_lock_read(&b->c.lock, NULL, NULL);
ret = (node_fn ? node_fn(c, b) : 0) ?:
bch2_btree_and_journal_walk_recurse(c, b, journal_keys, btree_id,
node_fn, key_fn) ?:
key_fn(c, btree_id, b->c.level + 1, bkey_i_to_s_c(&b->key));
ret = bch2_btree_and_journal_walk_recurse(c, b, btree_id, key_fn);
six_unlock_read(&b->c.lock);
return ret;
@ -1120,14 +1108,14 @@ use_clean:
bch_verbose(c, "starting alloc read");
err = "error reading allocation information";
ret = bch2_alloc_read(c, &c->journal_keys);
ret = bch2_alloc_read(c);
if (ret)
goto err;
bch_verbose(c, "alloc read done");
bch_verbose(c, "starting stripes_read");
err = "error reading stripes";
ret = bch2_stripes_read(c, &c->journal_keys);
ret = bch2_stripes_read(c);
if (ret)
goto err;
bch_verbose(c, "stripes_read done");

View File

@ -45,12 +45,9 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
struct bch_fs *,
struct btree *);
typedef int (*btree_walk_node_fn)(struct bch_fs *c, struct btree *b);
typedef int (*btree_walk_key_fn)(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k);
typedef int (*btree_walk_key_fn)(struct bch_fs *c, struct bkey_s_c k);
int bch2_btree_and_journal_walk(struct bch_fs *, struct journal_keys *, enum btree_id,
btree_walk_node_fn, btree_walk_key_fn);
int bch2_btree_and_journal_walk(struct bch_fs *, enum btree_id, btree_walk_key_fn);
void bch2_journal_keys_free(struct journal_keys *);
void bch2_journal_entries_free(struct list_head *);

29
libbcachefs/s128.h Normal file
View File

@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_S128_H
#define _BCACHEFS_S128_H
#include <linux/math64.h>
typedef struct {
s64 lo;
s64 hi;
} s128;
typedef struct {
s64 lo;
s32 hi;
} s96;
static inline s128 s128_mul(s128 a, s128 b)
{
return a.lo
}
static inline s96 s96_mul(s96 a, s96 b)
{
return a.lo
}
#endif /* _BCACHEFS_S128_H */

View File

@ -12,7 +12,7 @@
#include <linux/crc32c.h>
#include <crypto/hash.h>
#include <crypto/sha.h>
#include <crypto/sha2.h>
static inline enum bch_str_hash_type
bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)

View File

@ -367,9 +367,15 @@ static void bch2_sb_update(struct bch_fs *c)
c->sb.clean = BCH_SB_CLEAN(src);
c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src);
c->sb.encoded_extent_max= 1 << BCH_SB_ENCODED_EXTENT_MAX_BITS(src);
c->sb.time_base_lo = le64_to_cpu(src->time_base_lo);
c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision);
c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit;
/* XXX this is wrong, we need a 96 or 128 bit integer type */
c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo),
c->sb.nsec_per_time_unit);
c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
c->sb.time_precision = le32_to_cpu(src->time_precision);
c->sb.features = le64_to_cpu(src->features[0]);
c->sb.compat = le64_to_cpu(src->compat[0]);

View File

@ -99,7 +99,7 @@ static int bch2_dev_alloc(struct bch_fs *, unsigned);
static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev)
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
struct bch_fs *c;
struct bch_dev *ca;
@ -110,7 +110,7 @@ struct bch_fs *bch2_bdev_to_fs(struct block_device *bdev)
list_for_each_entry(c, &bch_fs_list, list)
for_each_member_device_rcu(ca, c, i, NULL)
if (ca->disk_sb.bdev == bdev) {
if (ca->disk_sb.bdev->bd_dev == dev) {
closure_get(&c->cl);
goto found;
}
@ -544,8 +544,7 @@ void __bch2_fs_stop(struct bch_fs *c)
for_each_member_device(ca, c, i)
if (ca->kobj.state_in_sysfs &&
ca->disk_sb.bdev)
sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
"bcachefs");
sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
if (c->kobj.state_in_sysfs)
kobject_del(&c->kobj);
@ -1017,8 +1016,7 @@ static void bch2_dev_free(struct bch_dev *ca)
if (ca->kobj.state_in_sysfs &&
ca->disk_sb.bdev)
sysfs_remove_link(&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
"bcachefs");
sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
if (ca->kobj.state_in_sysfs)
kobject_del(&ca->kobj);
@ -1054,10 +1052,7 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
wait_for_completion(&ca->io_ref_completion);
if (ca->kobj.state_in_sysfs) {
struct kobject *block =
&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
sysfs_remove_link(block, "bcachefs");
sysfs_remove_link(bdev_kobj(ca->disk_sb.bdev), "bcachefs");
sysfs_remove_link(&ca->kobj, "block");
}
@ -1094,12 +1089,12 @@ static int bch2_dev_sysfs_online(struct bch_fs *c, struct bch_dev *ca)
}
if (ca->disk_sb.bdev) {
struct kobject *block =
&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj;
struct kobject *block = bdev_kobj(ca->disk_sb.bdev);
ret = sysfs_create_link(block, &ca->kobj, "bcachefs");
if (ret)
return ret;
ret = sysfs_create_link(&ca->kobj, block, "block");
if (ret)
return ret;
@ -1837,20 +1832,21 @@ err:
/* return with ref on ca->ref: */
struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
{
struct block_device *bdev = lookup_bdev(path);
struct bch_dev *ca;
dev_t dev;
unsigned i;
int ret;
if (IS_ERR(bdev))
return ERR_CAST(bdev);
ret = lookup_bdev(path, &dev);
if (ret)
return ERR_PTR(ret);
for_each_member_device(ca, c, i)
if (ca->disk_sb.bdev == bdev)
if (ca->disk_sb.bdev->bd_dev == dev)
goto found;
ca = ERR_PTR(-ENOENT);
found:
bdput(bdev);
return ca;
}

View File

@ -197,7 +197,7 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
return devs;
}
struct bch_fs *bch2_bdev_to_fs(struct block_device *);
struct bch_fs *bch2_dev_to_fs(dev_t);
struct bch_fs *bch2_uuid_to_fs(uuid_le);
bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,

View File

@ -215,6 +215,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
strncpy(bdev->name, path, sizeof(bdev->name));
bdev->name[sizeof(bdev->name) - 1] = '\0';
bdev->bd_dev = xfstat(fd).st_rdev;
bdev->bd_fd = fd;
bdev->bd_sync_fd = sync_fd;
bdev->bd_holder = holder;
@ -230,9 +231,9 @@ void bdput(struct block_device *bdev)
BUG();
}
struct block_device *lookup_bdev(const char *path)
int lookup_bdev(const char *path, dev_t *dev)
{
return ERR_PTR(-EINVAL);
return -EINVAL;
}
static int aio_completion_thread(void *arg)