Update bcachefs sources to cd779e0cc5 bcachefs: Skip inode unpack/pack in bch2_extent_update()

This commit is contained in:
Kent Overstreet 2022-10-22 13:25:25 -04:00
parent 494421ee6e
commit 188b6d0c8e
60 changed files with 1033 additions and 522 deletions

View File

@ -1 +1 @@
44be8c1da2e1d4edb23d5dcf3b522971c245c3f6 cd779e0cc51cb232d17eec4537cb4769af202b5f

View File

@ -122,7 +122,7 @@ static void update_inode(struct bch_fs *c,
struct bkey_inode_buf packed; struct bkey_inode_buf packed;
int ret; int ret;
bch2_inode_pack(c, &packed, inode); bch2_inode_pack(&packed, inode);
packed.inode.k.p.snapshot = U32_MAX; packed.inode.k.p.snapshot = U32_MAX;
ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i, ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
NULL, NULL, 0); NULL, NULL, 0);

View File

@ -2,6 +2,7 @@
#define __TOOLS_LINUX_BUG_H #define __TOOLS_LINUX_BUG_H
#include <assert.h> #include <assert.h>
#include <stdio.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#ifdef CONFIG_VALGRIND #ifdef CONFIG_VALGRIND

View File

@ -4,4 +4,7 @@
#define prefetch(p) \ #define prefetch(p) \
({ __maybe_unused typeof(p) __var = (p); }) ({ __maybe_unused typeof(p) __var = (p); })
#define prefetchw(p) \
({ __maybe_unused typeof(p) __var = (p); })
#endif /* _LINUX_PREFETCH_H */ #endif /* _LINUX_PREFETCH_H */

View File

@ -6,6 +6,7 @@
#include <linux/types.h> /* for size_t */ #include <linux/types.h> /* for size_t */
extern size_t strlcpy(char *dest, const char *src, size_t size); extern size_t strlcpy(char *dest, const char *src, size_t size);
extern ssize_t strscpy(char *dest, const char *src, size_t count);
extern char *strim(char *); extern char *strim(char *);
extern void memzero_explicit(void *, size_t); extern void memzero_explicit(void *, size_t);
int match_string(const char * const *, size_t, const char *); int match_string(const char * const *, size_t, const char *);

View File

@ -173,7 +173,7 @@ bch2_acl_to_xattr(struct btree_trans *trans,
bkey_xattr_init(&xattr->k_i); bkey_xattr_init(&xattr->k_i);
xattr->k.u64s = u64s; xattr->k.u64s = u64s;
xattr->v.x_type = acl_to_xattr_type(type); xattr->v.x_type = acl_to_xattr_type(type);
xattr->v.x_name_len = 0, xattr->v.x_name_len = 0;
xattr->v.x_val_len = cpu_to_le16(acl_len); xattr->v.x_val_len = cpu_to_le16(acl_len);
acl_header = xattr_val(&xattr->v); acl_header = xattr_val(&xattr->v);

View File

@ -210,31 +210,6 @@ static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
return ret; return ret;
} }
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
struct bpos pos)
{
struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
int ret;
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
BTREE_ITER_WITH_UPDATES|
BTREE_ITER_CACHED|
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret) {
bch2_trans_iter_exit(trans, iter);
return ERR_PTR(ret);
}
a = bch2_alloc_to_v4_mut(trans, k);
if (IS_ERR(a))
bch2_trans_iter_exit(trans, iter);
return a;
}
static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
{ {
unsigned i, bytes = offsetof(struct bch_alloc, data); unsigned i, bytes = offsetof(struct bch_alloc, data);
@ -475,12 +450,13 @@ void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
} }
} }
struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) static noinline struct bkey_i_alloc_v4 *
__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
{ {
struct bkey_i_alloc_v4 *ret;
unsigned bytes = k.k->type == KEY_TYPE_alloc_v4 unsigned bytes = k.k->type == KEY_TYPE_alloc_v4
? bkey_bytes(k.k) ? bkey_bytes(k.k)
: sizeof(struct bkey_i_alloc_v4); : sizeof(struct bkey_i_alloc_v4);
struct bkey_i_alloc_v4 *ret;
/* /*
* Reserve space for one more backpointer here: * Reserve space for one more backpointer here:
@ -491,20 +467,18 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct b
return ret; return ret;
if (k.k->type == KEY_TYPE_alloc_v4) { if (k.k->type == KEY_TYPE_alloc_v4) {
struct bch_backpointer *src, *dst;
bkey_reassemble(&ret->k_i, k); bkey_reassemble(&ret->k_i, k);
if (BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v) < BCH_ALLOC_V4_U64s) { src = alloc_v4_backpointers(&ret->v);
struct bch_backpointer *src, *dst; SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
dst = alloc_v4_backpointers(&ret->v);
src = alloc_v4_backpointers(&ret->v); memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s); sizeof(struct bch_backpointer));
dst = alloc_v4_backpointers(&ret->v); memset(src, 0, dst - src);
set_alloc_v4_u64s(ret);
memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
sizeof(struct bch_backpointer));
memset(src, 0, dst - src);
set_alloc_v4_u64s(ret);
}
} else { } else {
bkey_alloc_v4_init(&ret->k_i); bkey_alloc_v4_init(&ret->k_i);
ret->k.p = k.k->p; ret->k.p = k.k->p;
@ -513,6 +487,54 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct b
return ret; return ret;
} }
static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k)
{
if (likely(k.k->type == KEY_TYPE_alloc_v4) &&
BCH_ALLOC_V4_BACKPOINTERS_START(bkey_s_c_to_alloc_v4(k).v) == BCH_ALLOC_V4_U64s) {
/*
* Reserve space for one more backpointer here:
* Not sketchy at doing it this way, nope...
*/
struct bkey_i_alloc_v4 *ret =
bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(struct bch_backpointer));
if (!IS_ERR(ret))
bkey_reassemble(&ret->k_i, k);
return ret;
}
return __bch2_alloc_to_v4_mut(trans, k);
}
struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
{
return bch2_alloc_to_v4_mut_inlined(trans, k);
}
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
struct bpos pos)
{
struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
int ret;
bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
BTREE_ITER_WITH_UPDATES|
BTREE_ITER_CACHED|
BTREE_ITER_INTENT);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret) {
bch2_trans_iter_exit(trans, iter);
return ERR_PTR(ret);
}
a = bch2_alloc_to_v4_mut_inlined(trans, k);
if (IS_ERR(a))
bch2_trans_iter_exit(trans, iter);
return a;
}
int bch2_alloc_read(struct bch_fs *c) int bch2_alloc_read(struct bch_fs *c)
{ {
struct btree_trans trans; struct btree_trans trans;

View File

@ -489,16 +489,16 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
* bch_bucket_alloc - allocate a single bucket from a specific device * bch_bucket_alloc - allocate a single bucket from a specific device
* *
* Returns index of bucket on success, 0 on failure * Returns index of bucket on success, 0 on failure
* */ */
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct bch_dev *ca, struct bch_dev *ca,
enum alloc_reserve reserve, enum alloc_reserve reserve,
bool may_alloc_partial, bool may_alloc_partial,
struct closure *cl) struct closure *cl,
struct bch_dev_usage *usage)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct open_bucket *ob = NULL; struct open_bucket *ob = NULL;
struct bch_dev_usage usage;
bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized); bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor; u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor;
u64 avail; u64 avail;
@ -509,16 +509,16 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
u64 skipped_nouse = 0; u64 skipped_nouse = 0;
bool waiting = false; bool waiting = false;
again: again:
usage = bch2_dev_usage_read(ca); bch2_dev_usage_read_fast(ca, usage);
avail = dev_buckets_free(ca, usage, reserve); avail = dev_buckets_free(ca, *usage, reserve);
if (usage.d[BCH_DATA_need_discard].buckets > avail) if (usage->d[BCH_DATA_need_discard].buckets > avail)
bch2_do_discards(c); bch2_do_discards(c);
if (usage.d[BCH_DATA_need_gc_gens].buckets > avail) if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
bch2_do_gc_gens(c); bch2_do_gc_gens(c);
if (should_invalidate_buckets(ca, usage)) if (should_invalidate_buckets(ca, *usage))
bch2_do_invalidates(c); bch2_do_invalidates(c);
if (!avail) { if (!avail) {
@ -577,10 +577,10 @@ err:
if (!IS_ERR(ob)) if (!IS_ERR(ob))
trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve], trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve],
may_alloc_partial, ob->bucket); may_alloc_partial, ob->bucket);
else else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
trace_and_count(c, bucket_alloc_fail, trace_and_count(c, bucket_alloc_fail,
ca, bch2_alloc_reserves[reserve], ca, bch2_alloc_reserves[reserve],
usage.d[BCH_DATA_free].buckets, usage->d[BCH_DATA_free].buckets,
avail, avail,
bch2_copygc_wait_amount(c), bch2_copygc_wait_amount(c),
c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
@ -599,11 +599,12 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
bool may_alloc_partial, bool may_alloc_partial,
struct closure *cl) struct closure *cl)
{ {
struct bch_dev_usage usage;
struct open_bucket *ob; struct open_bucket *ob;
bch2_trans_do(c, NULL, NULL, 0, bch2_trans_do(c, NULL, NULL, 0,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
may_alloc_partial, cl))); may_alloc_partial, cl, &usage)));
return ob; return ob;
} }
@ -630,8 +631,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
return ret; return ret;
} }
void bch2_dev_stripe_increment(struct bch_dev *ca, static inline void bch2_dev_stripe_increment_inlined(struct bch_dev *ca,
struct dev_stripe_state *stripe) struct dev_stripe_state *stripe,
struct bch_dev_usage *usage)
{ {
u64 *v = stripe->next_alloc + ca->dev_idx; u64 *v = stripe->next_alloc + ca->dev_idx;
u64 free_space = dev_buckets_available(ca, RESERVE_none); u64 free_space = dev_buckets_available(ca, RESERVE_none);
@ -650,6 +652,15 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
*v = *v < scale ? 0 : *v - scale; *v = *v < scale ? 0 : *v - scale;
} }
void bch2_dev_stripe_increment(struct bch_dev *ca,
struct dev_stripe_state *stripe)
{
struct bch_dev_usage usage;
bch2_dev_usage_read_fast(ca, &usage);
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
}
#define BUCKET_MAY_ALLOC_PARTIAL (1 << 0) #define BUCKET_MAY_ALLOC_PARTIAL (1 << 0)
#define BUCKET_ALLOC_USE_DURABILITY (1 << 1) #define BUCKET_ALLOC_USE_DURABILITY (1 << 1)
@ -694,6 +705,7 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
BUG_ON(*nr_effective >= nr_replicas); BUG_ON(*nr_effective >= nr_replicas);
for (i = 0; i < devs_sorted.nr; i++) { for (i = 0; i < devs_sorted.nr; i++) {
struct bch_dev_usage usage;
struct open_bucket *ob; struct open_bucket *ob;
dev = devs_sorted.devs[i]; dev = devs_sorted.devs[i];
@ -713,9 +725,9 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
} }
ob = bch2_bucket_alloc_trans(trans, ca, reserve, ob = bch2_bucket_alloc_trans(trans, ca, reserve,
flags & BUCKET_MAY_ALLOC_PARTIAL, cl); flags & BUCKET_MAY_ALLOC_PARTIAL, cl, &usage);
if (!IS_ERR(ob)) if (!IS_ERR(ob))
bch2_dev_stripe_increment(ca, stripe); bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
percpu_ref_put(&ca->ref); percpu_ref_put(&ca->ref);
if (IS_ERR(ob)) { if (IS_ERR(ob)) {
@ -1110,7 +1122,7 @@ restart_find_oldest:
hlist_add_head_rcu(&wp->node, head); hlist_add_head_rcu(&wp->node, head);
mutex_unlock(&c->write_points_hash_lock); mutex_unlock(&c->write_points_hash_lock);
out: out:
wp->last_used = sched_clock(); wp->last_used = local_clock();
return wp; return wp;
} }
@ -1356,7 +1368,7 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c)
wp < c->write_points + c->write_points_nr; wp++) { wp < c->write_points + c->write_points_nr; wp++) {
writepoint_init(wp, BCH_DATA_user); writepoint_init(wp, BCH_DATA_user);
wp->last_used = sched_clock(); wp->last_used = local_clock();
wp->write_point = (unsigned long) wp; wp->write_point = (unsigned long) wp;
hlist_add_head_rcu(&wp->node, hlist_add_head_rcu(&wp->node,
writepoint_hash(c, wp->write_point)); writepoint_hash(c, wp->write_point));

View File

@ -535,7 +535,7 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
if (bp.level == c->btree_roots[bp.btree_id].level + 1) if (bp.level == c->btree_roots[bp.btree_id].level + 1)
k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key); k = bkey_i_to_s_c(&c->btree_roots[bp.btree_id].key);
if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
return k; return k;
bch2_trans_iter_exit(trans, iter); bch2_trans_iter_exit(trans, iter);
@ -585,12 +585,12 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
if (IS_ERR(b)) if (IS_ERR(b))
goto err; goto err;
if (extent_matches_bp(c, bp.btree_id, bp.level, if (b && extent_matches_bp(c, bp.btree_id, bp.level,
bkey_i_to_s_c(&b->key), bkey_i_to_s_c(&b->key),
bucket, bp)) bucket, bp))
return b; return b;
if (btree_node_will_make_reachable(b)) { if (b && btree_node_will_make_reachable(b)) {
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
} else { } else {
backpointer_not_found(trans, bucket, bp_offset, bp, backpointer_not_found(trans, bucket, bp_offset, bp,

View File

@ -2,6 +2,8 @@
#ifndef _BCACHEFS_BBPOS_H #ifndef _BCACHEFS_BBPOS_H
#define _BCACHEFS_BBPOS_H #define _BCACHEFS_BBPOS_H
#include "bkey_methods.h"
struct bbpos { struct bbpos {
enum btree_id btree; enum btree_id btree;
struct bpos pos; struct bpos pos;

View File

@ -107,7 +107,7 @@
* *
* BTREE NODES: * BTREE NODES:
* *
* Our unit of allocation is a bucket, and we we can't arbitrarily allocate and * Our unit of allocation is a bucket, and we can't arbitrarily allocate and
* free smaller than a bucket - so, that's how big our btree nodes are. * free smaller than a bucket - so, that's how big our btree nodes are.
* *
* (If buckets are really big we'll only use part of the bucket for a btree node * (If buckets are really big we'll only use part of the bucket for a btree node
@ -930,7 +930,6 @@ struct bch_fs {
struct time_stats times[BCH_TIME_STAT_NR]; struct time_stats times[BCH_TIME_STAT_NR];
const char *btree_transaction_fns[BCH_TRANSACTIONS_NR];
struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
}; };

View File

@ -336,7 +336,7 @@ static inline void bkey_init(struct bkey *k)
* number. * number.
* *
* - WHITEOUT: for hash table btrees * - WHITEOUT: for hash table btrees
*/ */
#define BCH_BKEY_TYPES() \ #define BCH_BKEY_TYPES() \
x(deleted, 0) \ x(deleted, 0) \
x(whiteout, 1) \ x(whiteout, 1) \
@ -366,7 +366,8 @@ static inline void bkey_init(struct bkey *k)
x(set, 25) \ x(set, 25) \
x(lru, 26) \ x(lru, 26) \
x(alloc_v4, 27) \ x(alloc_v4, 27) \
x(backpointer, 28) x(backpointer, 28) \
x(inode_v3, 29)
enum bch_bkey_type { enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name = nr, #define x(name, nr) KEY_TYPE_##name = nr,
@ -717,6 +718,21 @@ struct bch_inode_v2 {
__u8 fields[0]; __u8 fields[0];
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
struct bch_inode_v3 {
struct bch_val v;
__le64 bi_journal_seq;
__le64 bi_hash_seed;
__le64 bi_flags;
__le64 bi_sectors;
__le64 bi_size;
__le64 bi_version;
__u8 fields[0];
} __attribute__((packed, aligned(8)));
#define INODEv3_FIELDS_START_INITIAL 6
#define INODEv3_FIELDS_START_CUR (offsetof(struct bch_inode_v3, fields) / sizeof(u64))
struct bch_inode_generation { struct bch_inode_generation {
struct bch_val v; struct bch_val v;
@ -728,7 +744,7 @@ struct bch_inode_generation {
* bi_subvol and bi_parent_subvol are only set for subvolume roots: * bi_subvol and bi_parent_subvol are only set for subvolume roots:
*/ */
#define BCH_INODE_FIELDS() \ #define BCH_INODE_FIELDS_v2() \
x(bi_atime, 96) \ x(bi_atime, 96) \
x(bi_ctime, 96) \ x(bi_ctime, 96) \
x(bi_mtime, 96) \ x(bi_mtime, 96) \
@ -755,6 +771,31 @@ struct bch_inode_generation {
x(bi_subvol, 32) \ x(bi_subvol, 32) \
x(bi_parent_subvol, 32) x(bi_parent_subvol, 32)
#define BCH_INODE_FIELDS_v3() \
x(bi_atime, 96) \
x(bi_ctime, 96) \
x(bi_mtime, 96) \
x(bi_otime, 96) \
x(bi_uid, 32) \
x(bi_gid, 32) \
x(bi_nlink, 32) \
x(bi_generation, 32) \
x(bi_dev, 32) \
x(bi_data_checksum, 8) \
x(bi_compression, 8) \
x(bi_project, 32) \
x(bi_background_compression, 8) \
x(bi_data_replicas, 8) \
x(bi_promote_target, 16) \
x(bi_foreground_target, 16) \
x(bi_background_target, 16) \
x(bi_erasure_code, 16) \
x(bi_fields_set, 16) \
x(bi_dir, 64) \
x(bi_dir_offset, 64) \
x(bi_subvol, 32) \
x(bi_parent_subvol, 32)
/* subset of BCH_INODE_FIELDS */ /* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \ #define BCH_INODE_OPTS() \
x(data_checksum, 8) \ x(data_checksum, 8) \
@ -780,16 +821,16 @@ enum {
* User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
* flags) * flags)
*/ */
__BCH_INODE_SYNC = 0, __BCH_INODE_SYNC = 0,
__BCH_INODE_IMMUTABLE = 1, __BCH_INODE_IMMUTABLE = 1,
__BCH_INODE_APPEND = 2, __BCH_INODE_APPEND = 2,
__BCH_INODE_NODUMP = 3, __BCH_INODE_NODUMP = 3,
__BCH_INODE_NOATIME = 4, __BCH_INODE_NOATIME = 4,
__BCH_INODE_I_SIZE_DIRTY= 5, __BCH_INODE_I_SIZE_DIRTY = 5,
__BCH_INODE_I_SECTORS_DIRTY= 6, __BCH_INODE_I_SECTORS_DIRTY = 6,
__BCH_INODE_UNLINKED = 7, __BCH_INODE_UNLINKED = 7,
__BCH_INODE_BACKPTR_UNTRUSTED = 8, __BCH_INODE_BACKPTR_UNTRUSTED = 8,
/* bits 20+ reserved for packed fields below: */ /* bits 20+ reserved for packed fields below: */
}; };
@ -811,6 +852,13 @@ LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24); LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31); LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);
LE64_BITMASK(INODEv3_STR_HASH, struct bch_inode_v3, bi_flags, 20, 24);
LE64_BITMASK(INODEv3_NR_FIELDS, struct bch_inode_v3, bi_flags, 24, 31);
LE64_BITMASK(INODEv3_FIELDS_START,
struct bch_inode_v3, bi_flags, 31, 36);
LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52);
/* Dirents */ /* Dirents */
/* /*
@ -1494,7 +1542,8 @@ struct bch_sb_field_journal_seq_blacklist {
x(freespace, 19) \ x(freespace, 19) \
x(alloc_v4, 20) \ x(alloc_v4, 20) \
x(new_data_types, 21) \ x(new_data_types, 21) \
x(backpointers, 22) x(backpointers, 22) \
x(inode_v3, 23)
enum bcachefs_metadata_version { enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9, bcachefs_metadata_version_min = 9,

View File

@ -2,6 +2,7 @@
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey.h" #include "bkey.h"
#include "bkey_cmp.h"
#include "bkey_methods.h" #include "bkey_methods.h"
#include "bset.h" #include "bset.h"
#include "util.h" #include "util.h"
@ -763,50 +764,6 @@ unsigned bch2_bkey_ffs(const struct btree *b, const struct bkey_packed *k)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
unsigned nr_key_bits)
{
long d0, d1, d2, d3;
int cmp;
/* we shouldn't need asm for this, but gcc is being retarded: */
asm(".intel_syntax noprefix;"
"xor eax, eax;"
"xor edx, edx;"
"1:;"
"mov r8, [rdi];"
"mov r9, [rsi];"
"sub ecx, 64;"
"jl 2f;"
"cmp r8, r9;"
"jnz 3f;"
"lea rdi, [rdi - 8];"
"lea rsi, [rsi - 8];"
"jmp 1b;"
"2:;"
"not ecx;"
"shr r8, 1;"
"shr r9, 1;"
"shr r8, cl;"
"shr r9, cl;"
"cmp r8, r9;"
"3:\n"
"seta al;"
"setb dl;"
"sub eax, edx;"
".att_syntax prefix;"
: "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
: "0" (l), "1" (r), "3" (nr_key_bits)
: "r8", "r9", "cc", "memory");
return cmp;
}
#define I(_x) (*(out)++ = (_x)) #define I(_x) (*(out)++ = (_x))
#define I1(i0) I(i0) #define I1(i0) I(i0)
#define I2(i0, i1) (I1(i0), I(i1)) #define I2(i0, i1) (I1(i0), I(i1))
@ -1037,40 +994,6 @@ int bch2_compile_bkey_format(const struct bkey_format *format, void *_out)
} }
#else #else
static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
unsigned nr_key_bits)
{
u64 l_v, r_v;
if (!nr_key_bits)
return 0;
/* for big endian, skip past header */
nr_key_bits += high_bit_offset;
l_v = *l & (~0ULL >> high_bit_offset);
r_v = *r & (~0ULL >> high_bit_offset);
while (1) {
if (nr_key_bits < 64) {
l_v >>= 64 - nr_key_bits;
r_v >>= 64 - nr_key_bits;
nr_key_bits = 0;
} else {
nr_key_bits -= 64;
}
if (!nr_key_bits || l_v != r_v)
break;
l = next_word(l);
r = next_word(r);
l_v = *l;
r_v = *r;
}
return cmp_int(l_v, r_v);
}
#endif #endif
__pure __pure
@ -1078,19 +1001,7 @@ int __bch2_bkey_cmp_packed_format_checked(const struct bkey_packed *l,
const struct bkey_packed *r, const struct bkey_packed *r,
const struct btree *b) const struct btree *b)
{ {
const struct bkey_format *f = &b->format; return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
int ret;
EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
ret = __bkey_cmp_bits(high_word(f, l),
high_word(f, r),
b->nr_key_bits);
EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
bkey_unpack_pos(b, r)));
return ret;
} }
__pure __flatten __pure __flatten
@ -1106,20 +1017,7 @@ int bch2_bkey_cmp_packed(const struct btree *b,
const struct bkey_packed *l, const struct bkey_packed *l,
const struct bkey_packed *r) const struct bkey_packed *r)
{ {
struct bkey unpacked; return bch2_bkey_cmp_packed_inlined(b, l, r);
if (likely(bkey_packed(l) && bkey_packed(r)))
return __bch2_bkey_cmp_packed_format_checked(l, r, b);
if (bkey_packed(l)) {
__bkey_unpack_key_format_checked(b, &unpacked, l);
l = (void*) &unpacked;
} else if (bkey_packed(r)) {
__bkey_unpack_key_format_checked(b, &unpacked, r);
r = (void*) &unpacked;
}
return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
} }
__pure __flatten __pure __flatten

View File

@ -5,6 +5,7 @@
#include <linux/bug.h> #include <linux/bug.h>
#include "bcachefs_format.h" #include "bcachefs_format.h"
#include "btree_types.h"
#include "util.h" #include "util.h"
#include "vstructs.h" #include "vstructs.h"
@ -134,8 +135,9 @@ int bkey_cmp_left_packed(const struct btree *b,
} }
/* /*
* we prefer to pass bpos by ref, but it's often enough terribly convenient to * The compiler generates better code when we pass bpos by ref, but it's often
* pass it by by val... as much as I hate c++, const ref would be nice here: * enough terribly convenient to pass it by val... as much as I hate c++, const
* ref would be nice here:
*/ */
__pure __flatten __pure __flatten
static inline int bkey_cmp_left_packed_byval(const struct btree *b, static inline int bkey_cmp_left_packed_byval(const struct btree *b,
@ -356,6 +358,99 @@ void bch2_bkey_unpack(const struct btree *, struct bkey_i *,
bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *, bool bch2_bkey_pack(struct bkey_packed *, const struct bkey_i *,
const struct bkey_format *); const struct bkey_format *);
typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
static inline void
__bkey_unpack_key_format_checked(const struct btree *b,
struct bkey *dst,
const struct bkey_packed *src)
{
if (IS_ENABLED(HAVE_BCACHEFS_COMPILED_UNPACK)) {
compiled_unpack_fn unpack_fn = b->aux_data;
unpack_fn(dst, src);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
bch2_expensive_debug_checks) {
struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
}
} else {
*dst = __bch2_bkey_unpack_key(&b->format, src);
}
}
static inline struct bkey
bkey_unpack_key_format_checked(const struct btree *b,
const struct bkey_packed *src)
{
struct bkey dst;
__bkey_unpack_key_format_checked(b, &dst, src);
return dst;
}
static inline void __bkey_unpack_key(const struct btree *b,
struct bkey *dst,
const struct bkey_packed *src)
{
if (likely(bkey_packed(src)))
__bkey_unpack_key_format_checked(b, dst, src);
else
*dst = *packed_to_bkey_c(src);
}
/**
* bkey_unpack_key -- unpack just the key, not the value
*/
static inline struct bkey bkey_unpack_key(const struct btree *b,
const struct bkey_packed *src)
{
return likely(bkey_packed(src))
? bkey_unpack_key_format_checked(b, src)
: *packed_to_bkey_c(src);
}
static inline struct bpos
bkey_unpack_pos_format_checked(const struct btree *b,
const struct bkey_packed *src)
{
#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
return bkey_unpack_key_format_checked(b, src).p;
#else
return __bkey_unpack_pos(&b->format, src);
#endif
}
static inline struct bpos bkey_unpack_pos(const struct btree *b,
const struct bkey_packed *src)
{
return likely(bkey_packed(src))
? bkey_unpack_pos_format_checked(b, src)
: packed_to_bkey_c(src)->p;
}
/* Disassembled bkeys */
static inline struct bkey_s_c bkey_disassemble(struct btree *b,
const struct bkey_packed *k,
struct bkey *u)
{
__bkey_unpack_key(b, u, k);
return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
}
/* non const version: */
static inline struct bkey_s __bkey_disassemble(struct btree *b,
struct bkey_packed *k,
struct bkey *u)
{
__bkey_unpack_key(b, u, k);
return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
}
static inline u64 bkey_field_max(const struct bkey_format *f, static inline u64 bkey_field_max(const struct bkey_format *f,
enum bch_bkey_fields nr) enum bch_bkey_fields nr)
{ {

View File

@ -3,6 +3,7 @@
#define _BCACHEFS_BKEY_BUF_H #define _BCACHEFS_BKEY_BUF_H
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey.h"
struct bkey_buf { struct bkey_buf {
struct bkey_i *k; struct bkey_i *k;

129
libbcachefs/bkey_cmp.h Normal file
View File

@ -0,0 +1,129 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BKEY_CMP_H
#define _BCACHEFS_BKEY_CMP_H
#include "bkey.h"
#ifdef CONFIG_X86_64
static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
unsigned nr_key_bits)
{
long d0, d1, d2, d3;
int cmp;
/* we shouldn't need asm for this, but gcc is being retarded: */
asm(".intel_syntax noprefix;"
"xor eax, eax;"
"xor edx, edx;"
"1:;"
"mov r8, [rdi];"
"mov r9, [rsi];"
"sub ecx, 64;"
"jl 2f;"
"cmp r8, r9;"
"jnz 3f;"
"lea rdi, [rdi - 8];"
"lea rsi, [rsi - 8];"
"jmp 1b;"
"2:;"
"not ecx;"
"shr r8, 1;"
"shr r9, 1;"
"shr r8, cl;"
"shr r9, cl;"
"cmp r8, r9;"
"3:\n"
"seta al;"
"setb dl;"
"sub eax, edx;"
".att_syntax prefix;"
: "=&D" (d0), "=&S" (d1), "=&d" (d2), "=&c" (d3), "=&a" (cmp)
: "0" (l), "1" (r), "3" (nr_key_bits)
: "r8", "r9", "cc", "memory");
return cmp;
}
#else
static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
unsigned nr_key_bits)
{
u64 l_v, r_v;
if (!nr_key_bits)
return 0;
/* for big endian, skip past header */
nr_key_bits += high_bit_offset;
l_v = *l & (~0ULL >> high_bit_offset);
r_v = *r & (~0ULL >> high_bit_offset);
while (1) {
if (nr_key_bits < 64) {
l_v >>= 64 - nr_key_bits;
r_v >>= 64 - nr_key_bits;
nr_key_bits = 0;
} else {
nr_key_bits -= 64;
}
if (!nr_key_bits || l_v != r_v)
break;
l = next_word(l);
r = next_word(r);
l_v = *l;
r_v = *r;
}
return cmp_int(l_v, r_v);
}
#endif
static inline __pure __flatten
int __bch2_bkey_cmp_packed_format_checked_inlined(const struct bkey_packed *l,
const struct bkey_packed *r,
const struct btree *b)
{
const struct bkey_format *f = &b->format;
int ret;
EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
EBUG_ON(b->nr_key_bits != bkey_format_key_bits(f));
ret = __bkey_cmp_bits(high_word(f, l),
high_word(f, r),
b->nr_key_bits);
EBUG_ON(ret != bpos_cmp(bkey_unpack_pos(b, l),
bkey_unpack_pos(b, r)));
return ret;
}
static inline __pure __flatten
int bch2_bkey_cmp_packed_inlined(const struct btree *b,
const struct bkey_packed *l,
const struct bkey_packed *r)
{
struct bkey unpacked;
if (likely(bkey_packed(l) && bkey_packed(r)))
return __bch2_bkey_cmp_packed_format_checked_inlined(l, r, b);
if (bkey_packed(l)) {
__bkey_unpack_key_format_checked(b, &unpacked, l);
l = (void *) &unpacked;
} else if (bkey_packed(r)) {
__bkey_unpack_key_format_checked(b, &unpacked, r);
r = (void *) &unpacked;
}
return bpos_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
}
#endif /* _BCACHEFS_BKEY_CMP_H */

View File

@ -149,6 +149,7 @@ static unsigned bch2_key_types_allowed[] = {
(1U << KEY_TYPE_whiteout)| (1U << KEY_TYPE_whiteout)|
(1U << KEY_TYPE_inode)| (1U << KEY_TYPE_inode)|
(1U << KEY_TYPE_inode_v2)| (1U << KEY_TYPE_inode_v2)|
(1U << KEY_TYPE_inode_v3)|
(1U << KEY_TYPE_inode_generation), (1U << KEY_TYPE_inode_generation),
[BKEY_TYPE_dirents] = [BKEY_TYPE_dirents] =
(1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_deleted)|

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey_buf.h" #include "bkey_buf.h"
#include "bkey_cmp.h"
#include "bkey_sort.h" #include "bkey_sort.h"
#include "bset.h" #include "bset.h"
#include "extents.h" #include "extents.h"
@ -155,7 +156,7 @@ static inline int sort_keys_cmp(struct btree *b,
struct bkey_packed *l, struct bkey_packed *l,
struct bkey_packed *r) struct bkey_packed *r)
{ {
return bch2_bkey_cmp_packed(b, l, r) ?: return bch2_bkey_cmp_packed_inlined(b, l, r) ?:
(int) bkey_deleted(r) - (int) bkey_deleted(l) ?: (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
(int) l->needs_whiteout - (int) r->needs_whiteout; (int) l->needs_whiteout - (int) r->needs_whiteout;
} }

View File

@ -965,7 +965,7 @@ static void bch2_bset_fix_lookup_table(struct btree *b,
t->size -= j - l; t->size -= j - l;
for (j = l; j < t->size; j++) for (j = l; j < t->size; j++)
rw_aux_tree(b, t)[j].offset += shift; rw_aux_tree(b, t)[j].offset += shift;
EBUG_ON(l < t->size && EBUG_ON(l < t->size &&
rw_aux_tree(b, t)[l].offset == rw_aux_tree(b, t)[l].offset ==
@ -1266,7 +1266,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter,
bch2_btree_node_iter_sort(iter, b); bch2_btree_node_iter_sort(iter, b);
} }
noinline __flatten __attribute__((cold)) noinline __flatten __cold
static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
struct btree *b, struct bpos *search) struct btree *b, struct bpos *search)
{ {
@ -1441,7 +1441,10 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
EBUG_ON(iter->data->k > iter->data->end); EBUG_ON(iter->data->k > iter->data->end);
if (unlikely(__btree_node_iter_set_end(iter, 0))) { if (unlikely(__btree_node_iter_set_end(iter, 0))) {
bch2_btree_node_iter_set_drop(iter, iter->data); /* avoid an expensive memmove call: */
iter->data[0] = iter->data[1];
iter->data[1] = iter->data[2];
iter->data[2] = (struct btree_node_iter_set) { 0, 0 };
return; return;
} }

View File

@ -205,100 +205,6 @@ static inline size_t btree_aux_data_u64s(const struct btree *b)
return btree_aux_data_bytes(b) / sizeof(u64); return btree_aux_data_bytes(b) / sizeof(u64);
} }
typedef void (*compiled_unpack_fn)(struct bkey *, const struct bkey_packed *);
static inline void
__bkey_unpack_key_format_checked(const struct btree *b,
struct bkey *dst,
const struct bkey_packed *src)
{
#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
{
compiled_unpack_fn unpack_fn = b->aux_data;
unpack_fn(dst, src);
if (bch2_expensive_debug_checks) {
struct bkey dst2 = __bch2_bkey_unpack_key(&b->format, src);
BUG_ON(memcmp(dst, &dst2, sizeof(*dst)));
}
}
#else
*dst = __bch2_bkey_unpack_key(&b->format, src);
#endif
}
static inline struct bkey
bkey_unpack_key_format_checked(const struct btree *b,
const struct bkey_packed *src)
{
struct bkey dst;
__bkey_unpack_key_format_checked(b, &dst, src);
return dst;
}
static inline void __bkey_unpack_key(const struct btree *b,
struct bkey *dst,
const struct bkey_packed *src)
{
if (likely(bkey_packed(src)))
__bkey_unpack_key_format_checked(b, dst, src);
else
*dst = *packed_to_bkey_c(src);
}
/**
* bkey_unpack_key -- unpack just the key, not the value
*/
static inline struct bkey bkey_unpack_key(const struct btree *b,
const struct bkey_packed *src)
{
return likely(bkey_packed(src))
? bkey_unpack_key_format_checked(b, src)
: *packed_to_bkey_c(src);
}
static inline struct bpos
bkey_unpack_pos_format_checked(const struct btree *b,
const struct bkey_packed *src)
{
#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
return bkey_unpack_key_format_checked(b, src).p;
#else
return __bkey_unpack_pos(&b->format, src);
#endif
}
static inline struct bpos bkey_unpack_pos(const struct btree *b,
const struct bkey_packed *src)
{
return likely(bkey_packed(src))
? bkey_unpack_pos_format_checked(b, src)
: packed_to_bkey_c(src)->p;
}
/* Disassembled bkeys */
static inline struct bkey_s_c bkey_disassemble(struct btree *b,
const struct bkey_packed *k,
struct bkey *u)
{
__bkey_unpack_key(b, u, k);
return (struct bkey_s_c) { u, bkeyp_val(&b->format, k), };
}
/* non const version: */
static inline struct bkey_s __bkey_disassemble(struct btree *b,
struct bkey_packed *k,
struct bkey *u)
{
__bkey_unpack_key(b, u, k);
return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
}
#define for_each_bset(_b, _t) \ #define for_each_bset(_b, _t) \
for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++) for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)

View File

@ -14,6 +14,12 @@
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <trace/events/bcachefs.h> #include <trace/events/bcachefs.h>
#define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \
do { \
if (shrinker_counter) \
bc->not_freed_##counter++; \
} while (0)
const char * const bch2_btree_node_flags[] = { const char * const bch2_btree_node_flags[] = {
#define x(f) #f, #define x(f) #f,
BTREE_FLAGS() BTREE_FLAGS()
@ -175,7 +181,7 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b,
mutex_lock(&bc->lock); mutex_lock(&bc->lock);
ret = __bch2_btree_node_hash_insert(bc, b); ret = __bch2_btree_node_hash_insert(bc, b);
if (!ret) if (!ret)
list_add(&b->list, &bc->live); list_add_tail(&b->list, &bc->live);
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
return ret; return ret;
@ -194,7 +200,7 @@ static inline struct btree *btree_cache_find(struct btree_cache *bc,
* this version is for btree nodes that have already been freed (we're not * this version is for btree nodes that have already been freed (we're not
* reaping a real btree node) * reaping a real btree node)
*/ */
static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter)
{ {
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
int ret = 0; int ret = 0;
@ -204,38 +210,64 @@ wait_on_io:
if (b->flags & ((1U << BTREE_NODE_dirty)| if (b->flags & ((1U << BTREE_NODE_dirty)|
(1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_read_in_flight)|
(1U << BTREE_NODE_write_in_flight))) { (1U << BTREE_NODE_write_in_flight))) {
if (!flush) if (!flush) {
if (btree_node_dirty(b))
BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
else if (btree_node_read_in_flight(b))
BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
else if (btree_node_write_in_flight(b))
BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
return -ENOMEM; return -ENOMEM;
}
/* XXX: waiting on IO with btree cache lock held */ /* XXX: waiting on IO with btree cache lock held */
bch2_btree_node_wait_on_read(b); bch2_btree_node_wait_on_read(b);
bch2_btree_node_wait_on_write(b); bch2_btree_node_wait_on_write(b);
} }
if (!six_trylock_intent(&b->c.lock)) if (!six_trylock_intent(&b->c.lock)) {
BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent);
return -ENOMEM; return -ENOMEM;
}
if (!six_trylock_write(&b->c.lock)) if (!six_trylock_write(&b->c.lock)) {
BTREE_CACHE_NOT_FREED_INCREMENT(lock_write);
goto out_unlock_intent; goto out_unlock_intent;
}
/* recheck under lock */ /* recheck under lock */
if (b->flags & ((1U << BTREE_NODE_read_in_flight)| if (b->flags & ((1U << BTREE_NODE_read_in_flight)|
(1U << BTREE_NODE_write_in_flight))) { (1U << BTREE_NODE_write_in_flight))) {
if (!flush) if (!flush) {
if (btree_node_read_in_flight(b))
BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight);
else if (btree_node_write_in_flight(b))
BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight);
goto out_unlock; goto out_unlock;
}
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock); six_unlock_intent(&b->c.lock);
goto wait_on_io; goto wait_on_io;
} }
if (btree_node_noevict(b) || if (btree_node_noevict(b)) {
btree_node_write_blocked(b) || BTREE_CACHE_NOT_FREED_INCREMENT(noevict);
btree_node_will_make_reachable(b))
goto out_unlock; goto out_unlock;
}
if (btree_node_write_blocked(b)) {
BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked);
goto out_unlock;
}
if (btree_node_will_make_reachable(b)) {
BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable);
goto out_unlock;
}
if (btree_node_dirty(b)) { if (btree_node_dirty(b)) {
if (!flush) if (!flush) {
BTREE_CACHE_NOT_FREED_INCREMENT(dirty);
goto out_unlock; goto out_unlock;
}
/* /*
* Using the underscore version because we don't want to compact * Using the underscore version because we don't want to compact
* bsets after the write, since this node is about to be evicted * bsets after the write, since this node is about to be evicted
@ -263,14 +295,14 @@ out_unlock_intent:
goto out; goto out;
} }
static int btree_node_reclaim(struct bch_fs *c, struct btree *b) static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter)
{ {
return __btree_node_reclaim(c, b, false); return __btree_node_reclaim(c, b, false, shrinker_counter);
} }
static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b)
{ {
return __btree_node_reclaim(c, b, true); return __btree_node_reclaim(c, b, true, false);
} }
static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
@ -319,11 +351,12 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
if (touched >= nr) if (touched >= nr)
goto out; goto out;
if (!btree_node_reclaim(c, b)) { if (!btree_node_reclaim(c, b, true)) {
btree_node_data_free(c, b); btree_node_data_free(c, b);
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock); six_unlock_intent(&b->c.lock);
freed++; freed++;
bc->freed++;
} }
} }
restart: restart:
@ -332,9 +365,11 @@ restart:
if (btree_node_accessed(b)) { if (btree_node_accessed(b)) {
clear_btree_node_accessed(b); clear_btree_node_accessed(b);
} else if (!btree_node_reclaim(c, b)) { bc->not_freed_access_bit++;
} else if (!btree_node_reclaim(c, b, true)) {
freed++; freed++;
btree_node_data_free(c, b); btree_node_data_free(c, b);
bc->freed++;
bch2_btree_node_hash_remove(bc, b); bch2_btree_node_hash_remove(bc, b);
six_unlock_write(&b->c.lock); six_unlock_write(&b->c.lock);
@ -390,7 +425,7 @@ static void bch2_btree_cache_shrinker_to_text(struct printbuf *out, struct shrin
struct bch_fs *c = container_of(shrink, struct bch_fs, struct bch_fs *c = container_of(shrink, struct bch_fs,
btree_cache.shrink); btree_cache.shrink);
bch2_btree_cache_to_text(out, c); bch2_btree_cache_to_text(out, &c->btree_cache);
} }
void bch2_fs_btree_cache_exit(struct bch_fs *c) void bch2_fs_btree_cache_exit(struct bch_fs *c)
@ -548,7 +583,7 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c)
struct btree *b; struct btree *b;
list_for_each_entry_reverse(b, &bc->live, list) list_for_each_entry_reverse(b, &bc->live, list)
if (!btree_node_reclaim(c, b)) if (!btree_node_reclaim(c, b, false))
return b; return b;
while (1) { while (1) {
@ -583,7 +618,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c, bool pcpu_read_locks)
* disk node. Check the freed list before allocating a new one: * disk node. Check the freed list before allocating a new one:
*/ */
list_for_each_entry(b, freed, list) list_for_each_entry(b, freed, list)
if (!btree_node_reclaim(c, b)) { if (!btree_node_reclaim(c, b, false)) {
list_del_init(&b->list); list_del_init(&b->list);
goto got_node; goto got_node;
} }
@ -609,7 +644,7 @@ got_node:
* the list. Check if there's any freed nodes there: * the list. Check if there's any freed nodes there:
*/ */
list_for_each_entry(b2, &bc->freeable, list) list_for_each_entry(b2, &bc->freeable, list)
if (!btree_node_reclaim(c, b2)) { if (!btree_node_reclaim(c, b2, false)) {
swap(b->data, b2->data); swap(b->data, b2->data);
swap(b->aux_data, b2->aux_data); swap(b->aux_data, b2->aux_data);
btree_node_to_freedlist(bc, b2); btree_node_to_freedlist(bc, b2);
@ -830,7 +865,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
if (likely(c->opts.btree_node_mem_ptr_optimization && if (likely(c->opts.btree_node_mem_ptr_optimization &&
b && b &&
b->hash_val == btree_ptr_hash_val(k))) b->hash_val == btree_ptr_hash_val(k)))
goto lock_node; goto lock_node;
retry: retry:
b = btree_cache_find(bc, k); b = btree_cache_find(bc, k);
if (unlikely(!b)) { if (unlikely(!b)) {
@ -1070,7 +1105,7 @@ wait_on_io:
/* XXX we're called from btree_gc which will be holding other btree /* XXX we're called from btree_gc which will be holding other btree
* nodes locked * nodes locked
* */ */
__bch2_btree_node_wait_on_read(b); __bch2_btree_node_wait_on_read(b);
__bch2_btree_node_wait_on_write(b); __bch2_btree_node_wait_on_write(b);
@ -1141,9 +1176,21 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
stats.failed); stats.failed);
} }
void bch2_btree_cache_to_text(struct printbuf *out, struct bch_fs *c) void bch2_btree_cache_to_text(struct printbuf *out, struct btree_cache *bc)
{ {
prt_printf(out, "nr nodes:\t\t%u\n", c->btree_cache.used); prt_printf(out, "nr nodes:\t\t%u\n", bc->used);
prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&c->btree_cache.dirty)); prt_printf(out, "nr dirty:\t\t%u\n", atomic_read(&bc->dirty));
prt_printf(out, "cannibalize lock:\t%p\n", c->btree_cache.alloc_lock); prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock);
prt_printf(out, "freed:\t\t\t\t%u\n", bc->freed);
prt_printf(out, "not freed, dirty:\t\t%u\n", bc->not_freed_dirty);
prt_printf(out, "not freed, write in flight:\t%u\n", bc->not_freed_write_in_flight);
prt_printf(out, "not freed, read in flight:\t%u\n", bc->not_freed_read_in_flight);
prt_printf(out, "not freed, lock intent failed:\t%u\n", bc->not_freed_lock_intent);
prt_printf(out, "not freed, lock write failed:\t%u\n", bc->not_freed_lock_write);
prt_printf(out, "not freed, access bit:\t\t%u\n", bc->not_freed_access_bit);
prt_printf(out, "not freed, no evict failed:\t%u\n", bc->not_freed_noevict);
prt_printf(out, "not freed, write blocked:\t%u\n", bc->not_freed_write_blocked);
prt_printf(out, "not freed, will make reachable:\t%u\n", bc->not_freed_will_make_reachable);
} }

View File

@ -4,6 +4,7 @@
#include "bcachefs.h" #include "bcachefs.h"
#include "btree_types.h" #include "btree_types.h"
#include "bkey_methods.h"
extern const char * const bch2_btree_node_flags[]; extern const char * const bch2_btree_node_flags[];
@ -100,6 +101,6 @@ static inline unsigned btree_blocks(struct bch_fs *c)
void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *,
struct btree *); struct btree *);
void bch2_btree_cache_to_text(struct printbuf *, struct bch_fs *); void bch2_btree_cache_to_text(struct printbuf *, struct btree_cache *);
#endif /* _BCACHEFS_BTREE_CACHE_H */ #endif /* _BCACHEFS_BTREE_CACHE_H */

View File

@ -318,7 +318,7 @@ static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
" node %s", " node %s",
bch2_btree_ids[b->c.btree_id], b->c.level, bch2_btree_ids[b->c.btree_id], b->c.level,
buf1.buf, buf2.buf)) buf1.buf, buf2.buf))
ret = set_node_min(c, cur, expected_start); ret = set_node_min(c, cur, expected_start);
} }
out: out:
fsck_err: fsck_err:

View File

@ -22,6 +22,8 @@
static void btree_trans_verify_sorted(struct btree_trans *); static void btree_trans_verify_sorted(struct btree_trans *);
inline void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int); inline void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int);
static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *,
struct btree_path *, int);
static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *); static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
static inline void btree_path_list_add(struct btree_trans *, struct btree_path *, static inline void btree_path_list_add(struct btree_trans *, struct btree_path *,
@ -1004,14 +1006,9 @@ err:
return ret; return ret;
} }
static inline bool btree_path_good_node(struct btree_trans *trans, static inline bool btree_path_check_pos_in_node(struct btree_path *path,
struct btree_path *path, unsigned l, int check_pos)
unsigned l, int check_pos)
{ {
if (!is_btree_node(path, l) ||
!bch2_btree_node_relock(trans, path, l))
return false;
if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b)) if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
return false; return false;
if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b)) if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
@ -1019,6 +1016,15 @@ static inline bool btree_path_good_node(struct btree_trans *trans,
return true; return true;
} }
static inline bool btree_path_good_node(struct btree_trans *trans,
struct btree_path *path,
unsigned l, int check_pos)
{
return is_btree_node(path, l) &&
bch2_btree_node_relock(trans, path, l) &&
btree_path_check_pos_in_node(path, l, check_pos);
}
static void btree_path_set_level_down(struct btree_trans *trans, static void btree_path_set_level_down(struct btree_trans *trans,
struct btree_path *path, struct btree_path *path,
unsigned new_level) unsigned new_level)
@ -1035,9 +1041,9 @@ static void btree_path_set_level_down(struct btree_trans *trans,
bch2_btree_path_verify(trans, path); bch2_btree_path_verify(trans, path);
} }
static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans, static noinline unsigned __btree_path_up_until_good_node(struct btree_trans *trans,
struct btree_path *path, struct btree_path *path,
int check_pos) int check_pos)
{ {
unsigned i, l = path->level; unsigned i, l = path->level;
again: again:
@ -1058,6 +1064,16 @@ again:
return l; return l;
} }
static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
struct btree_path *path,
int check_pos)
{
return likely(btree_node_locked(path, path->level) &&
btree_path_check_pos_in_node(path, path->level, check_pos))
? path->level
: __btree_path_up_until_good_node(trans, path, check_pos);
}
/* /*
* This is the main state machine for walking down the btree - walks down to a * This is the main state machine for walking down the btree - walks down to a
* specified depth * specified depth
@ -1158,17 +1174,21 @@ static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
struct btree_path *src) struct btree_path *src)
{ {
unsigned i, offset = offsetof(struct btree_path, pos); unsigned i, offset = offsetof(struct btree_path, pos);
int cmp = btree_path_cmp(dst, src);
memcpy((void *) dst + offset, memcpy((void *) dst + offset,
(void *) src + offset, (void *) src + offset,
sizeof(struct btree_path) - offset); sizeof(struct btree_path) - offset);
for (i = 0; i < BTREE_MAX_DEPTH; i++) for (i = 0; i < BTREE_MAX_DEPTH; i++) {
if (btree_node_locked(dst, i)) unsigned t = btree_node_locked_type(dst, i);
six_lock_increment(&dst->l[i].b->c.lock,
__btree_lock_want(dst, i));
bch2_btree_path_check_sort(trans, dst, 0); if (t != BTREE_NODE_UNLOCKED)
six_lock_increment(&dst->l[i].b->c.lock, t);
}
if (cmp)
bch2_btree_path_check_sort_fast(trans, dst, cmp);
} }
static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src, static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btree_path *src,
@ -1181,8 +1201,7 @@ static struct btree_path *btree_path_clone(struct btree_trans *trans, struct btr
return new; return new;
} }
inline struct btree_path * __must_check struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *trans,
bch2_btree_path_make_mut(struct btree_trans *trans,
struct btree_path *path, bool intent, struct btree_path *path, bool intent,
unsigned long ip) unsigned long ip)
{ {
@ -1218,7 +1237,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
path->pos = new_pos; path->pos = new_pos;
bch2_btree_path_check_sort(trans, path, cmp); bch2_btree_path_check_sort_fast(trans, path, cmp);
if (unlikely(path->cached)) { if (unlikely(path->cached)) {
btree_node_unlock(trans, path, 0); btree_node_unlock(trans, path, 0);
@ -1242,7 +1261,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
__btree_path_level_init(path, l); __btree_path_level_init(path, l);
} }
if (l != path->level) { if (unlikely(l != path->level)) {
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
__bch2_btree_path_unlock(trans, path); __bch2_btree_path_unlock(trans, path);
} }
@ -2518,6 +2537,25 @@ static inline void btree_path_swap(struct btree_trans *trans,
btree_path_verify_sorted_ref(trans, r); btree_path_verify_sorted_ref(trans, r);
} }
static __always_inline void bch2_btree_path_check_sort_fast(struct btree_trans *trans,
struct btree_path *path,
int cmp)
{
struct btree_path *n;
int cmp2;
EBUG_ON(!cmp);
while ((n = cmp < 0
? prev_btree_path(trans, path)
: next_btree_path(trans, path)) &&
(cmp2 = btree_path_cmp(n, path)) &&
cmp2 != cmp)
btree_path_swap(trans, n, path);
btree_trans_verify_sorted(trans);
}
inline void bch2_btree_path_check_sort(struct btree_trans *trans, struct btree_path *path, inline void bch2_btree_path_check_sort(struct btree_trans *trans, struct btree_path *path,
int cmp) int cmp)
{ {
@ -2612,7 +2650,7 @@ static inline void __bch2_trans_iter_init(struct btree_trans *trans,
unsigned flags, unsigned flags,
unsigned long ip) unsigned long ip)
{ {
if (trans->restarted) if (unlikely(trans->restarted))
panic("bch2_trans_iter_init(): in transaction restart, %s by %pS\n", panic("bch2_trans_iter_init(): in transaction restart, %s by %pS\n",
bch2_err_str(trans->restarted), bch2_err_str(trans->restarted),
(void *) trans->last_restarted_ip); (void *) trans->last_restarted_ip);
@ -2632,7 +2670,7 @@ static inline void __bch2_trans_iter_init(struct btree_trans *trans,
btree_type_has_snapshots(btree_id)) btree_type_has_snapshots(btree_id))
flags |= BTREE_ITER_FILTER_SNAPSHOTS; flags |= BTREE_ITER_FILTER_SNAPSHOTS;
if (!test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags)) if (trans->journal_replay_not_finished)
flags |= BTREE_ITER_WITH_JOURNAL; flags |= BTREE_ITER_WITH_JOURNAL;
iter->trans = trans; iter->trans = trans;
@ -2816,7 +2854,7 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
BUG_ON(trans->used_mempool); BUG_ON(trans->used_mempool);
#ifdef __KERNEL__ #ifdef __KERNEL__
p = this_cpu_xchg(c->btree_paths_bufs->path , NULL); p = this_cpu_xchg(c->btree_paths_bufs->path, NULL);
#endif #endif
if (!p) if (!p)
p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS); p = mempool_alloc(&trans->c->btree_paths_pool, GFP_NOFS);
@ -2825,15 +2863,16 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
trans->updates = p; p += updates_bytes; trans->updates = p; p += updates_bytes;
} }
static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c, const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
const char *fn)
unsigned bch2_trans_get_fn_idx(const char *fn)
{ {
unsigned i; unsigned i;
for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++) for (i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
if (!c->btree_transaction_fns[i] || if (!bch2_btree_transaction_fns[i] ||
c->btree_transaction_fns[i] == fn) { bch2_btree_transaction_fns[i] == fn) {
c->btree_transaction_fns[i] = fn; bch2_btree_transaction_fns[i] = fn;
return i; return i;
} }
@ -2841,7 +2880,7 @@ static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct b
return i; return i;
} }
void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *fn) void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_idx)
__acquires(&c->btree_trans_barrier) __acquires(&c->btree_trans_barrier)
{ {
struct btree_transaction_stats *s; struct btree_transaction_stats *s;
@ -2851,10 +2890,13 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, const char *
memset(trans, 0, sizeof(*trans)); memset(trans, 0, sizeof(*trans));
trans->c = c; trans->c = c;
trans->fn = fn; trans->fn = fn_idx < ARRAY_SIZE(bch2_btree_transaction_fns)
? bch2_btree_transaction_fns[fn_idx] : NULL;
trans->last_begin_time = local_clock(); trans->last_begin_time = local_clock();
trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn); trans->fn_idx = fn_idx;
trans->locking_wait.task = current; trans->locking_wait.task = current;
trans->journal_replay_not_finished =
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
closure_init_stack(&trans->ref); closure_init_stack(&trans->ref);
bch2_trans_alloc_paths(trans, c); bch2_trans_alloc_paths(trans, c);
@ -2979,7 +3021,7 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
rcu_read_lock(); rcu_read_lock();
owner = READ_ONCE(b->lock.owner); owner = READ_ONCE(b->lock.owner);
pid = owner ? owner->pid : 0;; pid = owner ? owner->pid : 0;
rcu_read_unlock(); rcu_read_unlock();
prt_tab(out); prt_tab(out);

View File

@ -131,9 +131,20 @@ __trans_next_path_with_node(struct btree_trans *trans, struct btree *b,
_path = __trans_next_path_with_node((_trans), (_b), \ _path = __trans_next_path_with_node((_trans), (_b), \
(_path)->idx + 1)) (_path)->idx + 1))
struct btree_path * __must_check struct btree_path *__bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *,
bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *,
bool, unsigned long); bool, unsigned long);
static inline struct btree_path * __must_check
bch2_btree_path_make_mut(struct btree_trans *trans,
struct btree_path *path, bool intent,
unsigned long ip)
{
if (path->ref > 1 || path->preserve)
path = __bch2_btree_path_make_mut(trans, path, intent, ip);
path->should_be_locked = false;
return path;
}
struct btree_path * __must_check struct btree_path * __must_check
bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *, bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *,
struct bpos, bool, unsigned long); struct bpos, bool, unsigned long);
@ -551,10 +562,21 @@ void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
void bch2_dump_trans_updates(struct btree_trans *); void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *);
void __bch2_trans_init(struct btree_trans *, struct bch_fs *, const char *); void __bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned);
void bch2_trans_exit(struct btree_trans *); void bch2_trans_exit(struct btree_trans *);
#define bch2_trans_init(_trans, _c, _nr_iters, _mem) __bch2_trans_init(_trans, _c, __func__) extern const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR];
unsigned bch2_trans_get_fn_idx(const char *);
#define bch2_trans_init(_trans, _c, _nr_iters, _mem) \
do { \
static unsigned trans_fn_idx; \
\
if (unlikely(!trans_fn_idx)) \
trans_fn_idx = bch2_trans_get_fn_idx(__func__); \
\
__bch2_trans_init(_trans, _c, trans_fn_idx); \
} while (0)
void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);

View File

@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "btree_cache.h" #include "btree_cache.h"
@ -103,6 +104,22 @@ static void bkey_cached_free(struct btree_key_cache *bc,
six_unlock_intent(&ck->c.lock); six_unlock_intent(&ck->c.lock);
} }
static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
struct bkey_cached *ck)
{
struct bkey_cached *pos;
list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
pos->btree_trans_barrier_seq)) {
list_move(&ck->list, &pos->list);
return;
}
}
list_move(&ck->list, &bc->freed_nonpcpu);
}
static void bkey_cached_move_to_freelist(struct btree_key_cache *bc, static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
struct bkey_cached *ck) struct bkey_cached *ck)
{ {
@ -130,11 +147,11 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
while (f->nr > ARRAY_SIZE(f->objs) / 2) { while (f->nr > ARRAY_SIZE(f->objs) / 2) {
struct bkey_cached *ck2 = f->objs[--f->nr]; struct bkey_cached *ck2 = f->objs[--f->nr];
list_move_tail(&ck2->list, &bc->freed_nonpcpu); __bkey_cached_move_to_freelist_ordered(bc, ck2);
} }
preempt_enable(); preempt_enable();
list_move_tail(&ck->list, &bc->freed_nonpcpu); __bkey_cached_move_to_freelist_ordered(bc, ck);
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
} }
#else #else
@ -295,7 +312,7 @@ btree_key_cache_create(struct btree_trans *trans, struct btree_path *path)
bool was_new = true; bool was_new = true;
ck = bkey_cached_alloc(trans, path); ck = bkey_cached_alloc(trans, path);
if (unlikely(IS_ERR(ck))) if (IS_ERR(ck))
return ck; return ck;
if (unlikely(!ck)) { if (unlikely(!ck)) {
@ -416,7 +433,7 @@ err:
return ret; return ret;
} }
noinline static int static noinline int
bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path, bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path,
unsigned flags) unsigned flags)
{ {
@ -597,7 +614,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
* Since journal reclaim depends on us making progress here, and the * Since journal reclaim depends on us making progress here, and the
* allocator/copygc depend on journal reclaim making progress, we need * allocator/copygc depend on journal reclaim making progress, we need
* to be using alloc reserves: * to be using alloc reserves:
* */ */
ret = bch2_btree_iter_traverse(&b_iter) ?: ret = bch2_btree_iter_traverse(&b_iter) ?:
bch2_trans_update(trans, &b_iter, ck->k, bch2_trans_update(trans, &b_iter, ck->k,
BTREE_UPDATE_KEY_CACHE_RECLAIM| BTREE_UPDATE_KEY_CACHE_RECLAIM|
@ -982,7 +999,7 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
bc->table_init_done = true; bc->table_init_done = true;
bc->shrink.seeks = 1; bc->shrink.seeks = 0;
bc->shrink.count_objects = bch2_btree_key_cache_count; bc->shrink.count_objects = bch2_btree_key_cache_count;
bc->shrink.scan_objects = bch2_btree_key_cache_scan; bc->shrink.scan_objects = bch2_btree_key_cache_scan;
bc->shrink.to_text = bch2_btree_key_cache_shrinker_to_text; bc->shrink.to_text = bch2_btree_key_cache_shrinker_to_text;
@ -991,15 +1008,17 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c) void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
{ {
prt_printf(out, "nr_freed:\t%zu\n", atomic_long_read(&c->nr_freed)); prt_printf(out, "nr_freed:\t%zu", atomic_long_read(&c->nr_freed));
prt_printf(out, "nr_keys:\t%lu\n", atomic_long_read(&c->nr_keys)); prt_newline(out);
prt_printf(out, "nr_dirty:\t%lu\n", atomic_long_read(&c->nr_dirty)); prt_printf(out, "nr_keys:\t%lu", atomic_long_read(&c->nr_keys));
prt_newline(out);
prt_printf(out, "nr_dirty:\t%lu", atomic_long_read(&c->nr_dirty));
prt_newline(out);
} }
void bch2_btree_key_cache_exit(void) void bch2_btree_key_cache_exit(void)
{ {
if (bch2_key_cache) kmem_cache_destroy(bch2_key_cache);
kmem_cache_destroy(bch2_key_cache);
} }
int __init bch2_btree_key_cache_init(void) int __init bch2_btree_key_cache_init(void)

View File

@ -274,7 +274,7 @@ next:
b = &READ_ONCE(path->l[top->level].b)->c; b = &READ_ONCE(path->l[top->level].b)->c;
if (unlikely(IS_ERR_OR_NULL(b))) { if (IS_ERR_OR_NULL(b)) {
BUG_ON(!lock_graph_remove_non_waiters(&g)); BUG_ON(!lock_graph_remove_non_waiters(&g));
goto next; goto next;
} }
@ -605,7 +605,7 @@ int bch2_trans_relock(struct btree_trans *trans)
struct btree_path *path; struct btree_path *path;
if (unlikely(trans->restarted)) if (unlikely(trans->restarted))
return - ((int) trans->restarted); return -((int) trans->restarted);
trans_for_each_path(trans, path) trans_for_each_path(trans, path)
if (path->should_be_locked && if (path->should_be_locked &&

View File

@ -6,7 +6,7 @@
#include <linux/rhashtable.h> #include <linux/rhashtable.h>
#include <linux/six.h> #include <linux/six.h>
#include "bkey_methods.h" //#include "bkey_methods.h"
#include "buckets_types.h" #include "buckets_types.h"
#include "darray.h" #include "darray.h"
#include "journal_types.h" #include "journal_types.h"
@ -160,6 +160,16 @@ struct btree_cache {
/* Number of elements in live + freeable lists */ /* Number of elements in live + freeable lists */
unsigned used; unsigned used;
unsigned reserve; unsigned reserve;
unsigned freed;
unsigned not_freed_lock_intent;
unsigned not_freed_lock_write;
unsigned not_freed_dirty;
unsigned not_freed_read_in_flight;
unsigned not_freed_write_in_flight;
unsigned not_freed_noevict;
unsigned not_freed_write_blocked;
unsigned not_freed_will_make_reachable;
unsigned not_freed_access_bit;
atomic_t dirty; atomic_t dirty;
struct shrinker shrink; struct shrinker shrink;
@ -408,6 +418,7 @@ struct btree_trans {
bool in_traverse_all:1; bool in_traverse_all:1;
bool memory_allocation_failure:1; bool memory_allocation_failure:1;
bool is_initial_gc:1; bool is_initial_gc:1;
bool journal_replay_not_finished:1;
enum bch_errcode restarted:16; enum bch_errcode restarted:16;
u32 restart_count; u32 restart_count;
unsigned long last_restarted_ip; unsigned long last_restarted_ip;

View File

@ -2046,7 +2046,7 @@ static int async_btree_node_rewrite_trans(struct btree_trans *trans,
goto out; goto out;
ret = bch2_btree_node_rewrite(trans, &iter, b, 0); ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
out : out:
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;

View File

@ -339,7 +339,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_cached *ck = (void *) path->l[0].b; struct bkey_cached *ck = (void *) path->l[0].b;
unsigned old_u64s = ck->u64s, new_u64s; unsigned new_u64s;
struct bkey_i *new_k; struct bkey_i *new_k;
EBUG_ON(path->level); EBUG_ON(path->level);
@ -368,12 +368,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
ck->u64s = new_u64s; ck->u64s = new_u64s;
ck->k = new_k; ck->k = new_k;
/* return 0;
* Keys returned by peek() are no longer valid pointers, so we need a
* transaction restart:
*/
trace_and_count(c, trans_restart_key_cache_key_realloced, trans, _RET_IP_, path, old_u64s, new_u64s);
return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced);
} }
/* Triggers: */ /* Triggers: */
@ -1385,6 +1380,37 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
return ret; return ret;
} }
static int __must_check
bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path,
struct bkey_i *k, enum btree_update_flags flags,
unsigned long ip);
static noinline int flush_new_cached_update(struct btree_trans *trans,
struct btree_path *path,
struct btree_insert_entry *i,
enum btree_update_flags flags,
unsigned long ip)
{
struct btree_path *btree_path;
int ret;
i->key_cache_already_flushed = true;
i->flags |= BTREE_TRIGGER_NORUN;
btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
BTREE_ITER_INTENT, _THIS_IP_);
ret = bch2_btree_path_traverse(trans, btree_path, 0);
if (ret)
goto err;
btree_path_set_should_be_locked(btree_path);
ret = bch2_trans_update_by_path_trace(trans, btree_path, i->k, flags, ip);
err:
bch2_path_put(trans, btree_path, true);
return ret;
}
static int __must_check static int __must_check
bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path, bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path,
struct bkey_i *k, enum btree_update_flags flags, struct bkey_i *k, enum btree_update_flags flags,
@ -1392,7 +1418,6 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_insert_entry *i, n; struct btree_insert_entry *i, n;
int ret = 0;
BUG_ON(!path->should_be_locked); BUG_ON(!path->should_be_locked);
@ -1461,27 +1486,10 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa
* the key cache - but the key has to exist in the btree for that to * the key cache - but the key has to exist in the btree for that to
* work: * work:
*/ */
if (path->cached && if (unlikely(path->cached && bkey_deleted(&i->old_k)))
bkey_deleted(&i->old_k)) { return flush_new_cached_update(trans, path, i, flags, ip);
struct btree_path *btree_path;
i->key_cache_already_flushed = true; return 0;
i->flags |= BTREE_TRIGGER_NORUN;
btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0,
BTREE_ITER_INTENT, _THIS_IP_);
ret = bch2_btree_path_traverse(trans, btree_path, 0);
if (ret)
goto err;
btree_path_set_should_be_locked(btree_path);
ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip);
err:
bch2_path_put(trans, btree_path, true);
}
return ret;
} }
static int __must_check static int __must_check

View File

@ -89,20 +89,17 @@ static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca,
: ca->usage[journal_seq & JOURNAL_BUF_MASK]); : ca->usage[journal_seq & JOURNAL_BUF_MASK]);
} }
struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
{ {
struct bch_fs *c = ca->fs; struct bch_fs *c = ca->fs;
struct bch_dev_usage ret;
unsigned seq, i, u64s = dev_usage_u64s(); unsigned seq, i, u64s = dev_usage_u64s();
do { do {
seq = read_seqcount_begin(&c->usage_lock); seq = read_seqcount_begin(&c->usage_lock);
memcpy(&ret, ca->usage_base, u64s * sizeof(u64)); memcpy(usage, ca->usage_base, u64s * sizeof(u64));
for (i = 0; i < ARRAY_SIZE(ca->usage); i++) for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
acc_u64s_percpu((u64 *) &ret, (u64 __percpu *) ca->usage[i], u64s); acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage[i], u64s);
} while (read_seqcount_retry(&c->usage_lock, seq)); } while (read_seqcount_retry(&c->usage_lock, seq));
return ret;
} }
static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
@ -923,7 +920,7 @@ int bch2_mark_extent(struct btree_trans *trans,
{ {
u64 journal_seq = trans->journal_res.seq; u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry; const union bch_extent_entry *entry;
struct extent_ptr_decoded p; struct extent_ptr_decoded p;
@ -1115,10 +1112,10 @@ int bch2_mark_inode(struct btree_trans *trans,
u64 journal_seq = trans->journal_res.seq; u64 journal_seq = trans->journal_res.seq;
if (flags & BTREE_TRIGGER_INSERT) { if (flags & BTREE_TRIGGER_INSERT) {
struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v; struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v;
BUG_ON(!journal_seq); BUG_ON(!journal_seq);
BUG_ON(new.k->type != KEY_TYPE_inode_v2); BUG_ON(new.k->type != KEY_TYPE_inode_v3);
v->bi_journal_seq = cpu_to_le64(journal_seq); v->bi_journal_seq = cpu_to_le64(journal_seq);
} }
@ -1142,7 +1139,7 @@ int bch2_mark_reservation(struct btree_trans *trans,
unsigned flags) unsigned flags)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
struct bch_fs_usage __percpu *fs_usage; struct bch_fs_usage __percpu *fs_usage;
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
s64 sectors = (s64) k.k->size; s64 sectors = (s64) k.k->size;
@ -1221,7 +1218,7 @@ int bch2_mark_reflink_p(struct btree_trans *trans,
unsigned flags) unsigned flags)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
struct reflink_gc *ref; struct reflink_gc *ref;
size_t l, r, m; size_t l, r, m;
@ -2113,5 +2110,5 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
return -ENOMEM; return -ENOMEM;
} }
return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);; return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);
} }

View File

@ -139,7 +139,15 @@ static inline u8 ptr_stale(struct bch_dev *ca,
/* Device usage: */ /* Device usage: */
struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *); void bch2_dev_usage_read_fast(struct bch_dev *, struct bch_dev_usage *);
static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
{
struct bch_dev_usage ret;
bch2_dev_usage_read_fast(ca, &ret);
return ret;
}
void bch2_dev_usage_init(struct bch_dev *); void bch2_dev_usage_init(struct bch_dev *);
static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reserve reserve) static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reserve reserve)
@ -240,8 +248,6 @@ int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, struct
int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *, int bch2_trans_mark_metadata_bucket(struct btree_trans *, struct bch_dev *,

View File

@ -131,7 +131,7 @@ static inline int do_encrypt(struct crypto_sync_skcipher *tfm,
size_t orig_len = len; size_t orig_len = len;
int ret, i; int ret, i;
sg = kmalloc_array(sizeof(*sg), pages, GFP_KERNEL); sg = kmalloc_array(pages, sizeof(*sg), GFP_KERNEL);
if (!sg) if (!sg)
return -ENOMEM; return -ENOMEM;

View File

@ -377,7 +377,7 @@ static unsigned __bio_compress(struct bch_fs *c,
/* If it's only one block, don't bother trying to compress: */ /* If it's only one block, don't bother trying to compress: */
if (src->bi_iter.bi_size <= c->opts.block_size) if (src->bi_iter.bi_size <= c->opts.block_size)
return 0; return BCH_COMPRESSION_TYPE_incompressible;
dst_data = bio_map_or_bounce(c, dst, WRITE); dst_data = bio_map_or_bounce(c, dst, WRITE);
src_data = bio_map_or_bounce(c, src, READ); src_data = bio_map_or_bounce(c, src, READ);

View File

@ -312,7 +312,7 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
bch2_write_op_init(&m->op, c, io_opts); bch2_write_op_init(&m->op, c, io_opts);
m->op.pos = bkey_start_pos(k.k); m->op.pos = bkey_start_pos(k.k);
m->op.version = k.k->version; m->op.version = k.k->version;
m->op.target = data_opts.target, m->op.target = data_opts.target;
m->op.write_point = wp; m->op.write_point = wp;
m->op.flags |= BCH_WRITE_PAGES_STABLE| m->op.flags |= BCH_WRITE_PAGES_STABLE|
BCH_WRITE_PAGES_OWNED| BCH_WRITE_PAGES_OWNED|

View File

@ -477,7 +477,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
if (i->iter < tbl->size) { if (i->iter < tbl->size) {
rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash) rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
bch2_cached_btree_node_to_text(&i->buf, c, b); bch2_cached_btree_node_to_text(&i->buf, c, b);
i->iter++;; i->iter++;
} else { } else {
done = true; done = true;
} }
@ -637,11 +637,11 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
if (!i->size) if (!i->size)
break; break;
if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) || if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) ||
!c->btree_transaction_fns[i->iter]) !bch2_btree_transaction_fns[i->iter])
break; break;
prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]); prt_printf(&i->buf, "%s: ", bch2_btree_transaction_fns[i->iter]);
prt_newline(&i->buf); prt_newline(&i->buf);
printbuf_indent_add(&i->buf, 2); printbuf_indent_add(&i->buf, 2);

View File

@ -103,7 +103,7 @@ int bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k,
if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) { if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) {
prt_printf(err, "value too big (%zu > %u)", prt_printf(err, "value too big (%zu > %u)",
bkey_val_u64s(k.k),dirent_val_u64s(len)); bkey_val_u64s(k.k), dirent_val_u64s(len));
return -EINVAL; return -EINVAL;
} }

View File

@ -292,7 +292,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <= if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
lp.crc.uncompressed_size) { lp.crc.uncompressed_size) {
/* can use left extent's crc entry */ /* can use left extent's crc entry */
} else if (lp.crc.live_size <= rp.crc.offset ) { } else if (lp.crc.live_size <= rp.crc.offset) {
/* can use right extent's crc entry */ /* can use right extent's crc entry */
} else { } else {
/* check if checksums can be merged: */ /* check if checksums can be merged: */
@ -351,7 +351,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
if (crc_l.offset + crc_l.live_size + crc_r.live_size <= if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
crc_l.uncompressed_size) { crc_l.uncompressed_size) {
/* can use left extent's crc entry */ /* can use left extent's crc entry */
} else if (crc_l.live_size <= crc_r.offset ) { } else if (crc_l.live_size <= crc_r.offset) {
/* can use right extent's crc entry */ /* can use right extent's crc entry */
crc_r.offset -= crc_l.live_size; crc_r.offset -= crc_l.live_size;
bch2_extent_crc_pack(entry_to_crc(en_l), crc_r, bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,

View File

@ -487,11 +487,11 @@ int bch2_rename_trans(struct btree_trans *trans,
ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?: ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
(src_dir.inum != dst_dir.inum (src_dir.inum != dst_dir.inum
? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u) ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
: 0 ) ?: : 0) ?:
bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?: bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
(dst_inum.inum (dst_inum.inum
? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u) ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
: 0 ); : 0);
err: err:
bch2_trans_iter_exit(trans, &dst_inode_iter); bch2_trans_iter_exit(trans, &dst_inode_iter);
bch2_trans_iter_exit(trans, &src_inode_iter); bch2_trans_iter_exit(trans, &src_inode_iter);

View File

@ -1684,7 +1684,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
unsigned pg_len = min_t(unsigned, len - copied, unsigned pg_len = min_t(unsigned, len - copied,
PAGE_SIZE - pg_offset); PAGE_SIZE - pg_offset);
unsigned pg_copied = copy_page_from_iter_atomic(page, unsigned pg_copied = copy_page_from_iter_atomic(page,
pg_offset, pg_len,iter); pg_offset, pg_len, iter);
if (!pg_copied) if (!pg_copied)
break; break;
@ -2137,8 +2137,8 @@ static long bch2_dio_write_loop(struct dio_write *dio)
struct iovec *iov = dio->inline_vecs; struct iovec *iov = dio->inline_vecs;
if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
iov = kmalloc(dio->iter.nr_segs * sizeof(*iov), iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
GFP_KERNEL); GFP_KERNEL);
if (unlikely(!iov)) { if (unlikely(!iov)) {
dio->sync = sync = true; dio->sync = sync = true;
goto do_io; goto do_io;
@ -2713,7 +2713,7 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len
truncate_pagecache_range(&inode->v, offset, end - 1); truncate_pagecache_range(&inode->v, offset, end - 1);
if (block_start < block_end ) { if (block_start < block_end) {
s64 i_sectors_delta = 0; s64 i_sectors_delta = 0;
ret = bch2_fpunch(c, inode_inum(inode), ret = bch2_fpunch(c, inode_inum(inode),

View File

@ -528,7 +528,7 @@ static int bch2_symlink(struct user_namespace *mnt_userns,
inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0, inode = __bch2_create(mnt_userns, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
(subvol_inum) { 0 }, BCH_CREATE_TMPFILE); (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
if (unlikely(IS_ERR(inode))) if (IS_ERR(inode))
return bch2_err_class(PTR_ERR(inode)); return bch2_err_class(PTR_ERR(inode));
inode_lock(&inode->v); inode_lock(&inode->v);
@ -1846,7 +1846,7 @@ got_sb:
sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1; sb->s_time_min = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec); sb->s_time_max = div_s64(S64_MAX, c->sb.time_units_per_sec);
c->vfs_sb = sb; c->vfs_sb = sb;
strlcpy(sb->s_id, c->name, sizeof(sb->s_id)); strscpy(sb->s_id, c->name, sizeof(sb->s_id));
ret = super_setup_bdi(sb); ret = super_setup_bdi(sb);
if (ret) if (ret)
@ -1917,8 +1917,7 @@ MODULE_ALIAS_FS("bcachefs");
void bch2_vfs_exit(void) void bch2_vfs_exit(void)
{ {
unregister_filesystem(&bcache_fs_type); unregister_filesystem(&bcache_fs_type);
if (bch2_inode_cache) kmem_cache_destroy(bch2_inode_cache);
kmem_cache_destroy(bch2_inode_cache);
} }
int __init bch2_vfs_init(void) int __init bch2_vfs_init(void)

View File

@ -2044,7 +2044,8 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t,
{ {
if (t->nr == t->size) { if (t->nr == t->size) {
size_t new_size = max_t(size_t, 128UL, t->size * 2); size_t new_size = max_t(size_t, 128UL, t->size * 2);
void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL); void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
if (!d) { if (!d) {
bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
new_size); new_size);

View File

@ -60,11 +60,10 @@ static int inode_decode_field(const u8 *in, const u8 *end,
return bytes; return bytes;
} }
void bch2_inode_pack(struct bch_fs *c, static inline void bch2_inode_pack_inlined(struct bkey_inode_buf *packed,
struct bkey_inode_buf *packed, const struct bch_inode_unpacked *inode)
const struct bch_inode_unpacked *inode)
{ {
struct bkey_i_inode_v2 *k = &packed->inode; struct bkey_i_inode_v3 *k = &packed->inode;
u8 *out = k->v.fields; u8 *out = k->v.fields;
u8 *end = (void *) &packed[1]; u8 *end = (void *) &packed[1];
u8 *last_nonzero_field = out; u8 *last_nonzero_field = out;
@ -72,13 +71,17 @@ void bch2_inode_pack(struct bch_fs *c,
unsigned bytes; unsigned bytes;
int ret; int ret;
bkey_inode_v2_init(&packed->inode.k_i); bkey_inode_v3_init(&packed->inode.k_i);
packed->inode.k.p.offset = inode->bi_inum; packed->inode.k.p.offset = inode->bi_inum;
packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq); packed->inode.v.bi_journal_seq = cpu_to_le64(inode->bi_journal_seq);
packed->inode.v.bi_hash_seed = inode->bi_hash_seed; packed->inode.v.bi_hash_seed = inode->bi_hash_seed;
packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags);
packed->inode.v.bi_flags = cpu_to_le64(inode->bi_flags); packed->inode.v.bi_sectors = cpu_to_le64(inode->bi_sectors);
packed->inode.v.bi_mode = cpu_to_le16(inode->bi_mode); packed->inode.v.bi_size = cpu_to_le64(inode->bi_size);
packed->inode.v.bi_version = cpu_to_le64(inode->bi_version);
SET_INODEv3_MODE(&packed->inode.v, inode->bi_mode);
SET_INODEv3_FIELDS_START(&packed->inode.v, INODEv3_FIELDS_START_CUR);
#define x(_name, _bits) \ #define x(_name, _bits) \
nr_fields++; \ nr_fields++; \
@ -99,7 +102,7 @@ void bch2_inode_pack(struct bch_fs *c,
*out++ = 0; \ *out++ = 0; \
} }
BCH_INODE_FIELDS() BCH_INODE_FIELDS_v3()
#undef x #undef x
BUG_ON(out > end); BUG_ON(out > end);
@ -110,7 +113,7 @@ void bch2_inode_pack(struct bch_fs *c,
set_bkey_val_bytes(&packed->inode.k, bytes); set_bkey_val_bytes(&packed->inode.k, bytes);
memset_u64s_tail(&packed->inode.v, 0, bytes); memset_u64s_tail(&packed->inode.v, 0, bytes);
SET_INODEv2_NR_FIELDS(&k->v, nr_fields); SET_INODEv3_NR_FIELDS(&k->v, nr_fields);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
struct bch_inode_unpacked unpacked; struct bch_inode_unpacked unpacked;
@ -120,16 +123,25 @@ void bch2_inode_pack(struct bch_fs *c,
BUG_ON(ret); BUG_ON(ret);
BUG_ON(unpacked.bi_inum != inode->bi_inum); BUG_ON(unpacked.bi_inum != inode->bi_inum);
BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed); BUG_ON(unpacked.bi_hash_seed != inode->bi_hash_seed);
BUG_ON(unpacked.bi_sectors != inode->bi_sectors);
BUG_ON(unpacked.bi_size != inode->bi_size);
BUG_ON(unpacked.bi_version != inode->bi_version);
BUG_ON(unpacked.bi_mode != inode->bi_mode); BUG_ON(unpacked.bi_mode != inode->bi_mode);
#define x(_name, _bits) if (unpacked._name != inode->_name) \ #define x(_name, _bits) if (unpacked._name != inode->_name) \
panic("unpacked %llu should be %llu", \ panic("unpacked %llu should be %llu", \
(u64) unpacked._name, (u64) inode->_name); (u64) unpacked._name, (u64) inode->_name);
BCH_INODE_FIELDS() BCH_INODE_FIELDS_v3()
#undef x #undef x
} }
} }
void bch2_inode_pack(struct bkey_inode_buf *packed,
const struct bch_inode_unpacked *inode)
{
bch2_inode_pack_inlined(packed, inode);
}
static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode, static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
struct bch_inode_unpacked *unpacked) struct bch_inode_unpacked *unpacked)
{ {
@ -157,7 +169,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
unpacked->_name = field[1]; \ unpacked->_name = field[1]; \
in += ret; in += ret;
BCH_INODE_FIELDS() BCH_INODE_FIELDS_v2()
#undef x #undef x
/* XXX: signal if there were more fields than expected? */ /* XXX: signal if there were more fields than expected? */
@ -196,15 +208,66 @@ static int bch2_inode_unpack_v2(struct bch_inode_unpacked *unpacked,
return -1; \ return -1; \
fieldnr++; fieldnr++;
BCH_INODE_FIELDS() BCH_INODE_FIELDS_v2()
#undef x #undef x
/* XXX: signal if there were more fields than expected? */ /* XXX: signal if there were more fields than expected? */
return 0; return 0;
} }
int bch2_inode_unpack(struct bkey_s_c k, static int bch2_inode_unpack_v3(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked) struct bch_inode_unpacked *unpacked)
{
struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
const u8 *in = inode.v->fields;
const u8 *end = bkey_val_end(inode);
unsigned nr_fields = INODEv3_NR_FIELDS(inode.v);
unsigned fieldnr = 0;
int ret;
u64 v[2];
unpacked->bi_inum = inode.k->p.offset;
unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
unpacked->bi_sectors = le64_to_cpu(inode.v->bi_sectors);
unpacked->bi_size = le64_to_cpu(inode.v->bi_size);
unpacked->bi_version = le64_to_cpu(inode.v->bi_version);
unpacked->bi_mode = INODEv3_MODE(inode.v);
#define x(_name, _bits) \
if (fieldnr < nr_fields) { \
ret = bch2_varint_decode_fast(in, end, &v[0]); \
if (ret < 0) \
return ret; \
in += ret; \
\
if (_bits > 64) { \
ret = bch2_varint_decode_fast(in, end, &v[1]); \
if (ret < 0) \
return ret; \
in += ret; \
} else { \
v[1] = 0; \
} \
} else { \
v[0] = v[1] = 0; \
} \
\
unpacked->_name = v[0]; \
if (v[1] || v[0] != unpacked->_name) \
return -1; \
fieldnr++;
BCH_INODE_FIELDS_v3()
#undef x
/* XXX: signal if there were more fields than expected? */
return 0;
}
static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{ {
switch (k.k->type) { switch (k.k->type) {
case KEY_TYPE_inode: { case KEY_TYPE_inode: {
@ -243,6 +306,14 @@ int bch2_inode_unpack(struct bkey_s_c k,
} }
} }
int bch2_inode_unpack(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{
if (likely(k.k->type == KEY_TYPE_inode_v3))
return bch2_inode_unpack_v3(k, unpacked);
return bch2_inode_unpack_slowpath(k, unpacked);
}
int bch2_inode_peek(struct btree_trans *trans, int bch2_inode_peek(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bch_inode_unpacked *inode, struct bch_inode_unpacked *inode,
@ -288,11 +359,29 @@ int bch2_inode_write(struct btree_trans *trans,
if (IS_ERR(inode_p)) if (IS_ERR(inode_p))
return PTR_ERR(inode_p); return PTR_ERR(inode_p);
bch2_inode_pack(trans->c, inode_p, inode); bch2_inode_pack_inlined(inode_p, inode);
inode_p->inode.k.p.snapshot = iter->snapshot; inode_p->inode.k.p.snapshot = iter->snapshot;
return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); return bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
} }
struct bkey_s_c bch2_inode_to_v3(struct btree_trans *trans, struct bkey_s_c k)
{
struct bch_inode_unpacked u;
struct bkey_inode_buf *inode_p;
int ret;
inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p));
if (IS_ERR(inode_p))
return bkey_s_c_err(PTR_ERR(inode_p));
ret = bch2_inode_unpack(k, &u);
if (ret)
return bkey_s_c_err(ret);
bch2_inode_pack(inode_p, &u);
return bkey_i_to_s_c(&inode_p->inode.k_i);
}
static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err) static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
{ {
struct bch_inode_unpacked unpacked; struct bch_inode_unpacked unpacked;
@ -307,7 +396,7 @@ static int __bch2_inode_invalid(struct bkey_s_c k, struct printbuf *err)
return -EINVAL; return -EINVAL;
} }
if (bch2_inode_unpack(k, &unpacked)){ if (bch2_inode_unpack(k, &unpacked)) {
prt_printf(err, "invalid variable length fields"); prt_printf(err, "invalid variable length fields");
return -EINVAL; return -EINVAL;
} }
@ -378,15 +467,48 @@ int bch2_inode_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
return __bch2_inode_invalid(k, err); return __bch2_inode_invalid(k, err);
} }
static void __bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) int bch2_inode_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
int rw, struct printbuf *err)
{ {
prt_printf(out, "mode %o flags %x journal_seq %llu", struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
if (bkey_val_bytes(k.k) < sizeof(*inode.v)) {
prt_printf(err, "incorrect value size (%zu < %zu)",
bkey_val_bytes(k.k), sizeof(*inode.v));
return -EINVAL;
}
if (INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL ||
INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k)) {
prt_printf(err, "invalid fields_start (got %llu, min %u max %zu)",
INODEv3_FIELDS_START(inode.v),
INODEv3_FIELDS_START_INITIAL,
bkey_val_u64s(inode.k));
return -EINVAL;
}
if (INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR) {
prt_printf(err, "invalid str hash type (%llu >= %u)",
INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
return -EINVAL;
}
return __bch2_inode_invalid(k, err);
}
static void __bch2_inode_unpacked_to_text(struct printbuf *out,
struct bch_inode_unpacked *inode)
{
prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu",
inode->bi_mode, inode->bi_flags, inode->bi_mode, inode->bi_flags,
inode->bi_journal_seq); inode->bi_journal_seq,
inode->bi_size,
inode->bi_sectors,
inode->bi_version);
#define x(_name, _bits) \ #define x(_name, _bits) \
prt_printf(out, " "#_name " %llu", (u64) inode->_name); prt_printf(out, " "#_name " %llu", (u64) inode->_name);
BCH_INODE_FIELDS() BCH_INODE_FIELDS_v3()
#undef x #undef x
} }
@ -396,8 +518,7 @@ void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked
__bch2_inode_unpacked_to_text(out, inode); __bch2_inode_unpacked_to_text(out, inode);
} }
void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
struct bkey_s_c k)
{ {
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;

View File

@ -2,12 +2,14 @@
#ifndef _BCACHEFS_INODE_H #ifndef _BCACHEFS_INODE_H
#define _BCACHEFS_INODE_H #define _BCACHEFS_INODE_H
#include "bkey.h"
#include "opts.h" #include "opts.h"
extern const char * const bch2_inode_opts[]; extern const char * const bch2_inode_opts[];
int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); int bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); int bch2_inode_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
int bch2_inode_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_inode (struct bkey_ops) { \ #define bch2_bkey_ops_inode (struct bkey_ops) { \
@ -24,10 +26,18 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.atomic_trigger = bch2_mark_inode, \ .atomic_trigger = bch2_mark_inode, \
} }
#define bch2_bkey_ops_inode_v3 (struct bkey_ops) { \
.key_invalid = bch2_inode_v3_invalid, \
.val_to_text = bch2_inode_to_text, \
.trans_trigger = bch2_trans_mark_inode, \
.atomic_trigger = bch2_mark_inode, \
}
static inline bool bkey_is_inode(const struct bkey *k) static inline bool bkey_is_inode(const struct bkey *k)
{ {
return k->type == KEY_TYPE_inode || return k->type == KEY_TYPE_inode ||
k->type == KEY_TYPE_inode_v2; k->type == KEY_TYPE_inode_v2 ||
k->type == KEY_TYPE_inode_v3;
} }
int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c, int bch2_inode_generation_invalid(const struct bch_fs *, struct bkey_s_c,
@ -51,25 +61,28 @@ struct bch_inode_unpacked {
u64 bi_inum; u64 bi_inum;
u64 bi_journal_seq; u64 bi_journal_seq;
__le64 bi_hash_seed; __le64 bi_hash_seed;
u64 bi_size;
u64 bi_sectors;
u64 bi_version;
u32 bi_flags; u32 bi_flags;
u16 bi_mode; u16 bi_mode;
#define x(_name, _bits) u##_bits _name; #define x(_name, _bits) u##_bits _name;
BCH_INODE_FIELDS() BCH_INODE_FIELDS_v3()
#undef x #undef x
}; };
struct bkey_inode_buf { struct bkey_inode_buf {
struct bkey_i_inode_v2 inode; struct bkey_i_inode_v3 inode;
#define x(_name, _bits) + 8 + _bits / 8 #define x(_name, _bits) + 8 + _bits / 8
u8 _pad[0 + BCH_INODE_FIELDS()]; u8 _pad[0 + BCH_INODE_FIELDS_v3()];
#undef x #undef x
} __attribute__((packed, aligned(8))); } __attribute__((packed, aligned(8)));
void bch2_inode_pack(struct bch_fs *, struct bkey_inode_buf *, void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *);
struct bkey_s_c bch2_inode_to_v3(struct btree_trans *, struct bkey_s_c);
void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);

View File

@ -242,8 +242,7 @@ int bch2_extent_update(struct btree_trans *trans,
s64 *i_sectors_delta_total, s64 *i_sectors_delta_total,
bool check_enospc) bool check_enospc)
{ {
struct btree_iter inode_iter; struct btree_iter inode_iter = { NULL };
struct bch_inode_unpacked inode_u;
struct bpos next_pos; struct bpos next_pos;
bool usage_increasing; bool usage_increasing;
s64 i_sectors_delta = 0, disk_sectors_delta = 0; s64 i_sectors_delta = 0, disk_sectors_delta = 0;
@ -283,32 +282,67 @@ int bch2_extent_update(struct btree_trans *trans,
return ret; return ret;
} }
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum, if (new_i_size || i_sectors_delta) {
BTREE_ITER_INTENT); struct bkey_s_c k;
if (ret) struct bkey_s_c_inode_v3 inode;
return ret; struct bkey_i_inode_v3 *new_inode;
bool i_size_update;
if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) && bch2_trans_iter_init(trans, &inode_iter, BTREE_ID_inodes,
new_i_size > inode_u.bi_size) SPOS(0, inum.inum, iter->snapshot),
inode_u.bi_size = new_i_size; BTREE_ITER_INTENT|BTREE_ITER_CACHED);
k = bch2_btree_iter_peek_slot(&inode_iter);
ret = bkey_err(k);
if (unlikely(ret))
goto err;
inode_u.bi_sectors += i_sectors_delta; ret = bkey_is_inode(k.k) ? 0 : -ENOENT;
if (unlikely(ret))
goto err;
if (unlikely(k.k->type != KEY_TYPE_inode_v3)) {
k = bch2_inode_to_v3(trans, k);
ret = bkey_err(k);
if (unlikely(ret))
goto err;
}
inode = bkey_s_c_to_inode_v3(k);
i_size_update = !(le64_to_cpu(inode.v->bi_flags) & BCH_INODE_I_SIZE_DIRTY) &&
new_i_size > le64_to_cpu(inode.v->bi_size);
if (!i_sectors_delta && !i_size_update)
goto no_inode_update;
new_inode = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
ret = PTR_ERR_OR_ZERO(new_inode);
if (unlikely(ret))
goto err;
bkey_reassemble(&new_inode->k_i, k);
if (i_size_update)
new_inode->v.bi_size = cpu_to_le64(new_i_size);
le64_add_cpu(&new_inode->v.bi_sectors, i_sectors_delta);
ret = bch2_trans_update(trans, &inode_iter, &new_inode->k_i, 0);
if (unlikely(ret))
goto err;
}
no_inode_update:
ret = bch2_trans_update(trans, iter, k, 0) ?: ret = bch2_trans_update(trans, iter, k, 0) ?:
bch2_inode_write(trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(trans, disk_res, journal_seq, bch2_trans_commit(trans, disk_res, journal_seq,
BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL); BTREE_INSERT_NOFAIL);
bch2_trans_iter_exit(trans, &inode_iter); if (unlikely(ret))
goto err;
if (ret)
return ret;
if (i_sectors_delta_total) if (i_sectors_delta_total)
*i_sectors_delta_total += i_sectors_delta; *i_sectors_delta_total += i_sectors_delta;
bch2_btree_iter_set_pos(iter, next_pos); bch2_btree_iter_set_pos(iter, next_pos);
err:
return 0; bch2_trans_iter_exit(trans, &inode_iter);
return ret;
} }
/* /*
@ -926,8 +960,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
saved_iter = dst->bi_iter; saved_iter = dst->bi_iter;
do { do {
struct bch_extent_crc_unpacked crc = struct bch_extent_crc_unpacked crc = { 0 };
(struct bch_extent_crc_unpacked) { 0 };
struct bversion version = op->version; struct bversion version = op->version;
size_t dst_len, src_len; size_t dst_len, src_len;
@ -979,6 +1012,8 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
!crc_is_compressed(crc) && !crc_is_compressed(crc) &&
bch2_csum_type_is_encryption(op->crc.csum_type) == bch2_csum_type_is_encryption(op->crc.csum_type) ==
bch2_csum_type_is_encryption(op->csum_type)) { bch2_csum_type_is_encryption(op->csum_type)) {
u8 compression_type = crc.compression_type;
u16 nonce = crc.nonce;
/* /*
* Note: when we're using rechecksum(), we need to be * Note: when we're using rechecksum(), we need to be
* checksumming @src because it has all the data our * checksumming @src because it has all the data our
@ -997,6 +1032,13 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
bio_sectors(src) - (src_len >> 9), bio_sectors(src) - (src_len >> 9),
op->csum_type)) op->csum_type))
goto csum_err; goto csum_err;
/*
* rchecksum_bio sets compression_type on crc from op->crc,
* this isn't always correct as sometimes we're changing
* an extent from uncompressed to incompressible.
*/
crc.compression_type = compression_type;
crc.nonce = nonce;
} else { } else {
if ((op->flags & BCH_WRITE_DATA_ENCODED) && if ((op->flags & BCH_WRITE_DATA_ENCODED) &&
bch2_rechecksum_bio(c, src, version, op->crc, bch2_rechecksum_bio(c, src, version, op->crc,
@ -1115,8 +1157,8 @@ again:
BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl); BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl);
EBUG_ON(!wp); EBUG_ON(!wp);
if (unlikely(IS_ERR(wp))) { if (IS_ERR(wp)) {
if (unlikely(PTR_ERR(wp) != -EAGAIN)) { if (unlikely(wp != ERR_PTR(-EAGAIN))) {
ret = PTR_ERR(wp); ret = PTR_ERR(wp);
goto err; goto err;
} }

View File

@ -739,7 +739,7 @@ int bch2_journal_log_msg(struct journal *j, const char *fmt, ...)
return ret; return ret;
entry = container_of(journal_res_entry(j, &res), entry = container_of(journal_res_entry(j, &res),
struct jset_entry_log, entry);; struct jset_entry_log, entry);
memset(entry, 0, u64s * sizeof(u64)); memset(entry, 0, u64s * sizeof(u64));
entry->entry.type = BCH_JSET_ENTRY_log; entry->entry.type = BCH_JSET_ENTRY_log;
entry->entry.u64s = u64s - 1; entry->entry.u64s = u64s - 1;
@ -796,10 +796,10 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
bch2_journal_block(&c->journal); bch2_journal_block(&c->journal);
} }
bu = kzalloc(nr_want * sizeof(*bu), GFP_KERNEL); bu = kcalloc(nr_want, sizeof(*bu), GFP_KERNEL);
ob = kzalloc(nr_want * sizeof(*ob), GFP_KERNEL); ob = kcalloc(nr_want, sizeof(*ob), GFP_KERNEL);
new_buckets = kzalloc(nr * sizeof(u64), GFP_KERNEL); new_buckets = kcalloc(nr, sizeof(u64), GFP_KERNEL);
new_bucket_seq = kzalloc(nr * sizeof(u64), GFP_KERNEL); new_bucket_seq = kcalloc(nr, sizeof(u64), GFP_KERNEL);
if (!bu || !ob || !new_buckets || !new_bucket_seq) { if (!bu || !ob || !new_buckets || !new_bucket_seq) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_unblock; goto err_unblock;
@ -1265,7 +1265,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
rcu_read_lock(); rcu_read_lock();
s = READ_ONCE(j->reservations); s = READ_ONCE(j->reservations);
prt_printf(out, "dirty journal entries:\t%llu/%llu\n",fifo_used(&j->pin), j->pin.size); prt_printf(out, "dirty journal entries:\t%llu/%llu\n", fifo_used(&j->pin), j->pin.size);
prt_printf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j)); prt_printf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j));
prt_printf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk); prt_printf(out, "seq_ondisk:\t\t%llu\n", j->seq_ondisk);
prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j)); prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));

View File

@ -110,6 +110,7 @@
*/ */
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/prefetch.h>
#include "journal_types.h" #include "journal_types.h"
@ -304,15 +305,26 @@ static inline int journal_res_get_fast(struct journal *j,
{ {
union journal_res_state old, new; union journal_res_state old, new;
u64 v = atomic64_read(&j->reservations.counter); u64 v = atomic64_read(&j->reservations.counter);
unsigned u64s, offset;
do { do {
old.v = new.v = v; old.v = new.v = v;
/*
* Round up the end of the journal reservation to the next
* cacheline boundary:
*/
u64s = res->u64s;
offset = sizeof(struct jset) / sizeof(u64) +
new.cur_entry_offset + u64s;
u64s += ((offset - 1) & ((SMP_CACHE_BYTES / sizeof(u64)) - 1)) + 1;
/* /*
* Check if there is still room in the current journal * Check if there is still room in the current journal
* entry: * entry:
*/ */
if (new.cur_entry_offset + res->u64s > j->cur_entry_u64s) if (new.cur_entry_offset + u64s > j->cur_entry_u64s)
return 0; return 0;
EBUG_ON(!journal_state_count(new, new.idx)); EBUG_ON(!journal_state_count(new, new.idx));
@ -320,7 +332,7 @@ static inline int journal_res_get_fast(struct journal *j,
if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark) if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark)
return 0; return 0;
new.cur_entry_offset += res->u64s; new.cur_entry_offset += u64s;
journal_state_inc(&new); journal_state_inc(&new);
/* /*
@ -337,8 +349,15 @@ static inline int journal_res_get_fast(struct journal *j,
res->ref = true; res->ref = true;
res->idx = old.idx; res->idx = old.idx;
res->u64s = u64s;
res->offset = old.cur_entry_offset; res->offset = old.cur_entry_offset;
res->seq = le64_to_cpu(j->buf[old.idx].data->seq); res->seq = le64_to_cpu(j->buf[old.idx].data->seq);
offset = res->offset;
while (offset < res->offset + res->u64s) {
prefetchw(vstruct_idx(j->buf[res->idx].data, offset));
offset += SMP_CACHE_BYTES / sizeof(u64);
}
return 1; return 1;
} }

View File

@ -232,7 +232,7 @@ void bch2_journal_space_available(struct journal *j)
if ((j->space[journal_space_clean_ondisk].next_entry < if ((j->space[journal_space_clean_ondisk].next_entry <
j->space[journal_space_clean_ondisk].total) && j->space[journal_space_clean_ondisk].total) &&
(clean - clean_ondisk <= total / 8) && (clean - clean_ondisk <= total / 8) &&
(clean_ondisk * 2 > clean )) (clean_ondisk * 2 > clean))
set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); set_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
else else
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags); clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
@ -363,7 +363,7 @@ static inline void __journal_pin_drop(struct journal *j,
list_del_init(&pin->list); list_del_init(&pin->list);
/* /*
* Unpinning a journal entry make make journal_next_bucket() succeed, if * Unpinning a journal entry may make journal_next_bucket() succeed if
* writing a new last_seq will now make another bucket available: * writing a new last_seq will now make another bucket available:
*/ */
if (atomic_dec_and_test(&pin_list->count) && if (atomic_dec_and_test(&pin_list->count) &&

View File

@ -31,7 +31,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb,
if (!nr) if (!nr)
return 0; return 0;
b = kmalloc_array(sizeof(u64), nr, GFP_KERNEL); b = kmalloc_array(nr, sizeof(u64), GFP_KERNEL);
if (!b) if (!b)
return -ENOMEM; return -ENOMEM;
@ -114,7 +114,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb,
if (!nr) if (!nr)
return 0; return 0;
b = kmalloc_array(sizeof(*b), nr, GFP_KERNEL); b = kmalloc_array(nr, sizeof(*b), GFP_KERNEL);
if (!b) if (!b)
return -ENOMEM; return -ENOMEM;

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "bkey.h"
#include "keylist.h" #include "keylist.h"
int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s, int bch2_keylist_realloc(struct keylist *l, u64 *inline_u64s,

View File

@ -497,7 +497,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
/* /*
* The iterator gets unlocked by __bch2_read_extent - need to * The iterator gets unlocked by __bch2_read_extent - need to
* save a copy of @k elsewhere: * save a copy of @k elsewhere:
*/ */
bch2_bkey_buf_reassemble(&sk, c, k); bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k); k = bkey_i_to_s_c(sk.k);
@ -868,7 +868,7 @@ static bool migrate_pred(struct bch_fs *c, void *arg,
i++; i++;
} }
return data_opts->rewrite_ptrs != 0;; return data_opts->rewrite_ptrs != 0;
} }
static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,

View File

@ -225,7 +225,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
.size = max_t(size_t, keys->size, 8) * 2, .size = max_t(size_t, keys->size, 8) * 2,
}; };
new_keys.d = kvmalloc(sizeof(new_keys.d[0]) * new_keys.size, GFP_KERNEL); new_keys.d = kvmalloc_array(new_keys.size, sizeof(new_keys.d[0]), GFP_KERNEL);
if (!new_keys.d) { if (!new_keys.d) {
bch_err(c, "%s: error allocating new key array (size %zu)", bch_err(c, "%s: error allocating new key array (size %zu)",
__func__, new_keys.size); __func__, new_keys.size);
@ -502,7 +502,7 @@ static int journal_keys_sort(struct bch_fs *c)
keys->size = roundup_pow_of_two(nr_keys); keys->size = roundup_pow_of_two(nr_keys);
keys->d = kvmalloc(sizeof(keys->d[0]) * keys->size, GFP_KERNEL); keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
if (!keys->d) if (!keys->d)
return -ENOMEM; return -ENOMEM;
@ -1092,6 +1092,9 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.version_upgrade = true; c->opts.version_upgrade = true;
c->opts.fsck = true; c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES; c->opts.fix_errors = FSCK_OPT_YES;
} else if (c->sb.version < bcachefs_metadata_version_inode_v3) {
bch_info(c, "version prior to inode_v3, upgrade required");
c->opts.version_upgrade = true;
} }
} }
@ -1458,7 +1461,7 @@ int bch2_fs_initialize(struct bch_fs *c)
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
if (c->sb.version < bcachefs_metadata_version_backpointers) if (c->sb.version < bcachefs_metadata_version_inode_v3)
c->opts.version_upgrade = true; c->opts.version_upgrade = true;
if (c->opts.version_upgrade) { if (c->opts.version_upgrade) {
@ -1537,7 +1540,7 @@ int bch2_fs_initialize(struct bch_fs *c)
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO; root_inode.bi_inum = BCACHEFS_ROOT_INO;
root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
bch2_inode_pack(c, &packed_inode, &root_inode); bch2_inode_pack(&packed_inode, &root_inode);
packed_inode.inode.k.p.snapshot = U32_MAX; packed_inode.inode.k.p.snapshot = U32_MAX;
err = "error creating root directory"; err = "error creating root directory";

View File

@ -2,6 +2,7 @@
#ifndef _BCACHEFS_REPLICAS_H #ifndef _BCACHEFS_REPLICAS_H
#define _BCACHEFS_REPLICAS_H #define _BCACHEFS_REPLICAS_H
#include "bkey.h"
#include "eytzinger.h" #include "eytzinger.h"
#include "replicas_types.h" #include "replicas_types.h"

View File

@ -160,7 +160,7 @@ u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]); r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
memset(ctx, 0, sizeof(*ctx)); memset(ctx, 0, sizeof(*ctx));
return (r); return r;
} }
u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len) u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)

View File

@ -100,8 +100,7 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb,
void bch2_free_super(struct bch_sb_handle *sb) void bch2_free_super(struct bch_sb_handle *sb)
{ {
if (sb->bio) kfree(sb->bio);
kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->bdev)) if (!IS_ERR_OR_NULL(sb->bdev))
blkdev_put(sb->bdev, sb->mode); blkdev_put(sb->bdev, sb->mode);
@ -149,8 +148,7 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0); bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0);
if (sb->bio) kfree(sb->bio);
kfree(sb->bio);
sb->bio = bio; sb->bio = bio;
} }

View File

@ -327,26 +327,12 @@ static int bch2_fs_read_write_late(struct bch_fs *c)
{ {
int ret; int ret;
ret = bch2_gc_thread_start(c);
if (ret) {
bch_err(c, "error starting gc thread");
return ret;
}
ret = bch2_copygc_start(c);
if (ret) {
bch_err(c, "error starting copygc thread");
return ret;
}
ret = bch2_rebalance_start(c); ret = bch2_rebalance_start(c);
if (ret) { if (ret) {
bch_err(c, "error starting rebalance thread"); bch_err(c, "error starting rebalance thread");
return ret; return ret;
} }
schedule_work(&c->ec_stripe_delete_work);
return 0; return 0;
} }
@ -385,6 +371,20 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch2_dev_allocator_add(c, ca); bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c); bch2_recalc_capacity(c);
ret = bch2_gc_thread_start(c);
if (ret) {
bch_err(c, "error starting gc thread");
return ret;
}
ret = bch2_copygc_start(c);
if (ret) {
bch_err(c, "error starting copygc thread");
return ret;
}
schedule_work(&c->ec_stripe_delete_work);
bch2_do_discards(c); bch2_do_discards(c);
bch2_do_invalidates(c); bch2_do_invalidates(c);
@ -463,8 +463,8 @@ static void __bch2_fs_free(struct bch_fs *c)
kfree(c->unused_inode_hints); kfree(c->unused_inode_hints);
free_heap(&c->copygc_heap); free_heap(&c->copygc_heap);
if (c->io_complete_wq ) if (c->io_complete_wq)
destroy_workqueue(c->io_complete_wq ); destroy_workqueue(c->io_complete_wq);
if (c->copygc_wq) if (c->copygc_wq)
destroy_workqueue(c->copygc_wq); destroy_workqueue(c->copygc_wq);
if (c->btree_io_complete_wq) if (c->btree_io_complete_wq)
@ -711,7 +711,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
goto err; goto err;
pr_uuid(&name, c->sb.user_uuid.b); pr_uuid(&name, c->sb.user_uuid.b);
strlcpy(c->name, name.buf, sizeof(c->name)); strscpy(c->name, name.buf, sizeof(c->name));
printbuf_exit(&name); printbuf_exit(&name);
ret = name.allocation_failure ? -ENOMEM : 0; ret = name.allocation_failure ? -ENOMEM : 0;
@ -1784,9 +1784,8 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
} }
ret = bch2_trans_mark_dev_sb(c, ca); ret = bch2_trans_mark_dev_sb(c, ca);
if (ret) { if (ret)
goto err; goto err;
}
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];

View File

@ -175,7 +175,7 @@ read_attribute(minor);
read_attribute(bucket_size); read_attribute(bucket_size);
read_attribute(first_bucket); read_attribute(first_bucket);
read_attribute(nbuckets); read_attribute(nbuckets);
read_attribute(durability); rw_attribute(durability);
read_attribute(iodone); read_attribute(iodone);
read_attribute(io_latency_read); read_attribute(io_latency_read);
@ -425,7 +425,7 @@ SHOW(bch2_fs)
bch2_btree_updates_to_text(out, c); bch2_btree_updates_to_text(out, c);
if (attr == &sysfs_btree_cache) if (attr == &sysfs_btree_cache)
bch2_btree_cache_to_text(out, c); bch2_btree_cache_to_text(out, &c->btree_cache);
if (attr == &sysfs_btree_key_cache) if (attr == &sysfs_btree_key_cache)
bch2_btree_key_cache_to_text(out, &c->btree_key_cache); bch2_btree_key_cache_to_text(out, &c->btree_key_cache);
@ -907,6 +907,19 @@ STORE(bch2_dev)
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
} }
if (attr == &sysfs_durability) {
u64 v = strtoul_or_return(buf);
mutex_lock(&c->sb_lock);
mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
if (v != BCH_MEMBER_DURABILITY(mi)) {
SET_BCH_MEMBER_DURABILITY(mi, v + 1);
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock);
}
if (attr == &sysfs_label) { if (attr == &sysfs_label) {
char *tmp; char *tmp;
int ret; int ret;

View File

@ -433,7 +433,7 @@ static void pr_time_units(struct printbuf *out, u64 ns)
static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
{ {
prt_printf(out, name); prt_str(out, name);
prt_tab(out); prt_tab(out);
pr_time_units(out, ns); pr_time_units(out, ns);
prt_newline(out); prt_newline(out);
@ -786,8 +786,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
} }
} }
#include "eytzinger.h"
static int alignment_ok(const void *base, size_t align) static int alignment_ok(const void *base, size_t align)
{ {
return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||

View File

@ -21,8 +21,10 @@
#include <ctype.h> #include <ctype.h>
#include <errno.h> #include <errno.h>
#include <limits.h>
#include <string.h> #include <string.h>
#include <linux/bug.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/string.h> #include <linux/string.h>
@ -62,6 +64,31 @@ size_t strlcpy(char *dest, const char *src, size_t size)
return ret; return ret;
} }
ssize_t strscpy(char *dest, const char *src, size_t count)
{
long res = 0;
if (count == 0 || WARN_ON_ONCE(count > INT_MAX))
return -E2BIG;
while (count) {
char c;
c = src[res];
dest[res] = c;
if (!c)
return res;
res++;
count--;
}
/* Hit buffer length without finding a NUL; force NUL-termination. */
if (res)
dest[res-1] = '\0';
return -E2BIG;
}
void memzero_explicit(void *s, size_t count) void memzero_explicit(void *s, size_t count)
{ {
memset(s, 0, count); memset(s, 0, count);

View File

@ -52,7 +52,7 @@ int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
static const unsigned int rounding[] = { 500, 50, 5 }; static const unsigned int rounding[] = { 500, 50, 5 };
int i = 0, j; int i = 0, j;
u32 remainder = 0, sf_cap; u32 remainder = 0, sf_cap;
char tmp[8]; char tmp[12];
const char *unit; const char *unit;
tmp[0] = '\0'; tmp[0] = '\0';