Update bcachefs sources to 438696e03da7 bcachefs: rename version -> bversion for big endian builds

This commit is contained in:
Kent Overstreet 2024-09-22 01:13:01 -04:00
parent 7a98f526b5
commit 9f20109509
56 changed files with 692 additions and 460 deletions

View File

@ -1 +1 @@
82792a451950397b8594b399bed03cfda1f31299 438696e03da7c2d1678bdcce98939ba642cbb467

View File

@ -71,7 +71,7 @@ struct bch_inode_unpacked create_file(struct bch_fs *c,
static const struct xattr_handler *xattr_resolve_name(char **name) static const struct xattr_handler *xattr_resolve_name(char **name)
{ {
const struct xattr_handler **handlers = bch2_xattr_handlers; const struct xattr_handler * const *handlers = bch2_xattr_handlers;
const struct xattr_handler *handler; const struct xattr_handler *handler;
for_each_xattr_handler(handlers, handler) { for_each_xattr_handler(handlers, handler) {

View File

@ -62,6 +62,29 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
dst[k] = ~src[k]; dst[k] = ~src[k];
} }
static inline bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits)
{
unsigned int k;
unsigned int lim = bits/BITS_PER_LONG;
unsigned long result = 0;
for (k = 0; k < lim; k++)
result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
if (bits % BITS_PER_LONG)
result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
BITMAP_LAST_WORD_MASK(bits));
return result != 0;
}
static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_andnot(dst, src1, src2, nbits);
}
static inline void bitmap_zero(unsigned long *dst, int nbits) static inline void bitmap_zero(unsigned long *dst, int nbits)
{ {
memset(dst, 0, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); memset(dst, 0, BITS_TO_LONGS(nbits) * sizeof(unsigned long));

View File

@ -19,7 +19,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp)
static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) static inline bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
{ {
return false; return true;
} }
static inline unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp) static inline unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
@ -58,7 +58,7 @@ static inline void cleanup_srcu_struct(struct srcu_struct *ssp) {}
static inline void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, static inline void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
rcu_callback_t func) rcu_callback_t func)
{ {
func(rhp); call_rcu(rhp, func);
} }
static inline int init_srcu_struct(struct srcu_struct *ssp) static inline int init_srcu_struct(struct srcu_struct *ssp)

View File

@ -137,7 +137,7 @@ static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans,
return NULL; return NULL;
acl = allocate_dropping_locks(trans, ret, acl = allocate_dropping_locks(trans, ret,
posix_acl_alloc(count, GFP_KERNEL)); posix_acl_alloc(count, _gfp));
if (!acl) if (!acl)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
if (ret) { if (ret) {
@ -427,8 +427,7 @@ int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum,
if (ret) if (ret)
goto err; goto err;
ret = allocate_dropping_locks_errcode(trans, ret = allocate_dropping_locks_errcode(trans, __posix_acl_chmod(&acl, _gfp, mode));
__posix_acl_chmod(&acl, GFP_KERNEL, mode));
if (ret) if (ret)
goto err; goto err;

View File

@ -2310,7 +2310,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
* We clear the LRU and need_discard btrees first so that we don't race * We clear the LRU and need_discard btrees first so that we don't race
* with bch2_do_invalidates() and bch2_do_discards() * with bch2_do_invalidates() and bch2_do_discards()
*/ */
ret = bch2_dev_remove_stripes(c, ca) ?: ret = bch2_dev_remove_stripes(c, ca->dev_idx) ?:
bch2_btree_delete_range(c, BTREE_ID_lru, start, end, bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
BTREE_TRIGGER_norun, NULL) ?: BTREE_TRIGGER_norun, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end, bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
@ -2324,7 +2324,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
bch2_btree_delete_range(c, BTREE_ID_alloc, start, end, bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
BTREE_TRIGGER_norun, NULL) ?: BTREE_TRIGGER_norun, NULL) ?:
bch2_dev_usage_remove(c, ca->dev_idx); bch2_dev_usage_remove(c, ca->dev_idx);
bch_err_msg(c, ret, "removing dev alloc info"); bch_err_msg(ca, ret, "removing dev alloc info");
return ret; return ret;
} }

View File

@ -16,7 +16,7 @@ enum bch_validate_flags;
static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos) static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
{ {
rcu_read_lock(); rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu(c, pos.inode); struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
bool ret = ca && bucket_valid(ca, pos.offset); bool ret = ca && bucket_valid(ca, pos.offset);
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;

View File

@ -3,7 +3,6 @@
#include "bbpos.h" #include "bbpos.h"
#include "alloc_background.h" #include "alloc_background.h"
#include "backpointers.h" #include "backpointers.h"
#include "bbpos.h"
#include "bkey_buf.h" #include "bkey_buf.h"
#include "btree_cache.h" #include "btree_cache.h"
#include "btree_update.h" #include "btree_update.h"
@ -502,7 +501,7 @@ found:
prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree));
bch2_bkey_val_to_text(&buf, c, extent2); bch2_bkey_val_to_text(&buf, c, extent2);
struct nonce nonce = extent_nonce(extent.k->version, p.crc); struct nonce nonce = extent_nonce(extent.k->bversion, p.crc);
struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
trans, dup_backpointer_to_bad_csum_extent, trans, dup_backpointer_to_bad_csum_extent,

View File

@ -134,26 +134,35 @@ static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
} }
} }
static inline void __bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p,
const union bch_extent_entry *entry,
struct bpos *bucket_pos, struct bch_backpointer *bp,
u64 sectors)
{
u32 bucket_offset;
*bucket_pos = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset);
*bp = (struct bch_backpointer) {
.btree_id = btree_id,
.level = level,
.data_type = bch2_bkey_ptr_data_type(k, p, entry),
.bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
p.crc.offset,
.bucket_len = sectors,
.pos = k.k->p,
};
}
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca, static inline void bch2_extent_ptr_to_bp(struct bch_fs *c, struct bch_dev *ca,
enum btree_id btree_id, unsigned level, enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p, struct bkey_s_c k, struct extent_ptr_decoded p,
const union bch_extent_entry *entry, const union bch_extent_entry *entry,
struct bpos *bucket_pos, struct bch_backpointer *bp) struct bpos *bucket_pos, struct bch_backpointer *bp)
{ {
enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); u64 sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p);
s64 sectors = level ? btree_sectors(c) : k.k->size;
u32 bucket_offset;
*bucket_pos = PTR_BUCKET_POS_OFFSET(ca, &p.ptr, &bucket_offset); __bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, bucket_pos, bp, sectors);
*bp = (struct bch_backpointer) {
.btree_id = btree_id,
.level = level,
.data_type = data_type,
.bucket_offset = ((u64) bucket_offset << MAX_EXTENT_COMPRESS_RATIO_SHIFT) +
p.crc.offset,
.bucket_len = ptr_disk_sectors(sectors, p),
.pos = k.k->p,
};
} }
int bch2_get_next_backpointer(struct btree_trans *, struct bch_dev *ca, struct bpos, int, int bch2_get_next_backpointer(struct btree_trans *, struct bch_dev *ca, struct bpos, int,

View File

@ -594,6 +594,7 @@ struct bch_dev {
#define BCH_FS_FLAGS() \ #define BCH_FS_FLAGS() \
x(new_fs) \ x(new_fs) \
x(started) \ x(started) \
x(clean_recovery) \
x(btree_running) \ x(btree_running) \
x(accounting_replay_done) \ x(accounting_replay_done) \
x(may_go_rw) \ x(may_go_rw) \
@ -776,7 +777,7 @@ struct bch_fs {
unsigned nsec_per_time_unit; unsigned nsec_per_time_unit;
u64 features; u64 features;
u64 compat; u64 compat;
unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)]; unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)];
u64 btrees_lost_data; u64 btrees_lost_data;
} sb; } sb;

View File

@ -217,13 +217,13 @@ struct bkey {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
__u8 pad[1]; __u8 pad[1];
struct bversion version; struct bversion bversion;
__u32 size; /* extent size, in sectors */ __u32 size; /* extent size, in sectors */
struct bpos p; struct bpos p;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
struct bpos p; struct bpos p;
__u32 size; /* extent size, in sectors */ __u32 size; /* extent size, in sectors */
struct bversion version; struct bversion bversion;
__u8 pad[1]; __u8 pad[1];
#endif #endif
@ -328,8 +328,8 @@ enum bch_bkey_fields {
bkey_format_field(OFFSET, p.offset), \ bkey_format_field(OFFSET, p.offset), \
bkey_format_field(SNAPSHOT, p.snapshot), \ bkey_format_field(SNAPSHOT, p.snapshot), \
bkey_format_field(SIZE, size), \ bkey_format_field(SIZE, size), \
bkey_format_field(VERSION_HI, version.hi), \ bkey_format_field(VERSION_HI, bversion.hi), \
bkey_format_field(VERSION_LO, version.lo), \ bkey_format_field(VERSION_LO, bversion.lo), \
}, \ }, \
}) })

View File

@ -214,9 +214,9 @@ static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
#define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 }) #define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 })
#define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL }) #define MAX_VERSION ((struct bversion) { .hi = ~0, .lo = ~0ULL })
static __always_inline int bversion_zero(struct bversion v) static __always_inline bool bversion_zero(struct bversion v)
{ {
return !bversion_cmp(v, ZERO_VERSION); return bversion_cmp(v, ZERO_VERSION) == 0;
} }
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
@ -554,8 +554,8 @@ static inline void bch2_bkey_pack_test(void) {}
x(BKEY_FIELD_OFFSET, p.offset) \ x(BKEY_FIELD_OFFSET, p.offset) \
x(BKEY_FIELD_SNAPSHOT, p.snapshot) \ x(BKEY_FIELD_SNAPSHOT, p.snapshot) \
x(BKEY_FIELD_SIZE, size) \ x(BKEY_FIELD_SIZE, size) \
x(BKEY_FIELD_VERSION_HI, version.hi) \ x(BKEY_FIELD_VERSION_HI, bversion.hi) \
x(BKEY_FIELD_VERSION_LO, version.lo) x(BKEY_FIELD_VERSION_LO, bversion.lo)
struct bkey_format_state { struct bkey_format_state {
u64 field_min[BKEY_NR_FIELDS]; u64 field_min[BKEY_NR_FIELDS];

View File

@ -289,7 +289,7 @@ void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k)
bch2_bpos_to_text(out, k->p); bch2_bpos_to_text(out, k->p);
prt_printf(out, " len %u ver %llu", k->size, k->version.lo); prt_printf(out, " len %u ver %llu", k->size, k->bversion.lo);
} else { } else {
prt_printf(out, "(null)"); prt_printf(out, "(null)");
} }

View File

@ -70,7 +70,7 @@ bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s);
static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r) static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct bkey *r)
{ {
return l->type == r->type && return l->type == r->type &&
!bversion_cmp(l->version, r->version) && !bversion_cmp(l->bversion, r->bversion) &&
bpos_eq(l->p, bkey_start_pos(r)); bpos_eq(l->p, bkey_start_pos(r));
} }

View File

@ -804,8 +804,7 @@ got_node:
mutex_unlock(&bc->lock); mutex_unlock(&bc->lock);
if (memalloc_flags_do(PF_MEMALLOC_NORECLAIM, if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) {
btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN))) {
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN))
goto err; goto err;

View File

@ -513,6 +513,8 @@ int bch2_check_topology(struct bch_fs *c)
struct bpos pulled_from_scan = POS_MIN; struct bpos pulled_from_scan = POS_MIN;
int ret = 0; int ret = 0;
bch2_trans_srcu_unlock(trans);
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
struct btree_root *r = bch2_btree_id_root(c, i); struct btree_root *r = bch2_btree_id_root(c, i);
bool reconstructed_root = false; bool reconstructed_root = false;
@ -599,15 +601,15 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (initial) { if (initial) {
BUG_ON(bch2_journal_seq_verify && BUG_ON(bch2_journal_seq_verify &&
k.k->version.lo > atomic64_read(&c->journal.seq)); k.k->bversion.lo > atomic64_read(&c->journal.seq));
if (fsck_err_on(btree_id != BTREE_ID_accounting && if (fsck_err_on(btree_id != BTREE_ID_accounting &&
k.k->version.lo > atomic64_read(&c->key_version), k.k->bversion.lo > atomic64_read(&c->key_version),
trans, bkey_version_in_future, trans, bkey_version_in_future,
"key version number higher than recorded %llu\n %s", "key version number higher than recorded %llu\n %s",
atomic64_read(&c->key_version), atomic64_read(&c->key_version),
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
atomic64_set(&c->key_version, k.k->version.lo); atomic64_set(&c->key_version, k.k->bversion.lo);
} }
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k), if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),

View File

@ -1195,6 +1195,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
set_btree_bset(b, b->set, &b->data->keys); set_btree_bset(b, b->set, &b->data->keys);
b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter); b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter);
memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0,
btree_buf_bytes(b) -
sizeof(struct btree_node) -
b->nr.live_u64s * sizeof(u64));
u64s = le16_to_cpu(sorted->keys.u64s); u64s = le16_to_cpu(sorted->keys.u64s);
*sorted = *b->data; *sorted = *b->data;
@ -1219,7 +1223,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
ret = bch2_bkey_val_validate(c, u.s_c, READ); ret = bch2_bkey_val_validate(c, u.s_c, READ);
if (ret == -BCH_ERR_fsck_delete_bkey || if (ret == -BCH_ERR_fsck_delete_bkey ||
(bch2_inject_invalid_keys && (bch2_inject_invalid_keys &&
!bversion_cmp(u.k->version, MAX_VERSION))) { !bversion_cmp(u.k->bversion, MAX_VERSION))) {
btree_keys_account_key_drop(&b->nr, 0, k); btree_keys_account_key_drop(&b->nr, 0, k);
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);

View File

@ -6,8 +6,6 @@
#include "btree_types.h" #include "btree_types.h"
#include "trace.h" #include "trace.h"
#include <linux/sched/mm.h>
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t); void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t);
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
@ -873,33 +871,29 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
(_do) ?: bch2_trans_relock(_trans); \ (_do) ?: bch2_trans_relock(_trans); \
}) })
#define memalloc_flags_do(_flags, _do) \ #define allocate_dropping_locks_errcode(_trans, _do) \
({ \ ({ \
unsigned _saved_flags = memalloc_flags_save(_flags); \ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \
typeof(_do) _ret = _do; \ int _ret = _do; \
memalloc_noreclaim_restore(_saved_flags); \ \
_ret; \ if (bch2_err_matches(_ret, ENOMEM)) { \
_gfp = GFP_KERNEL; \
_ret = drop_locks_do(_trans, _do); \
} \
_ret; \
}) })
#define allocate_dropping_locks_errcode(_trans, _do) \ #define allocate_dropping_locks(_trans, _ret, _do) \
({ \ ({ \
int _ret = memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN, _do);\ gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \
\ typeof(_do) _p = _do; \
if (bch2_err_matches(_ret, ENOMEM)) { \ \
_ret = drop_locks_do(_trans, _do); \ _ret = 0; \
} \ if (unlikely(!_p)) { \
_ret; \ _gfp = GFP_KERNEL; \
}) _ret = drop_locks_do(_trans, ((_p = _do), 0)); \
} \
#define allocate_dropping_locks(_trans, _ret, _do) \ _p; \
({ \
typeof(_do) _p = memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN, _do);\
\
_ret = 0; \
if (unlikely(!_p)) { \
_ret = drop_locks_do(_trans, ((_p = _do), 0)); \
} \
_p; \
}) })
#define bch2_trans_run(_c, _do) \ #define bch2_trans_run(_c, _do) \

View File

@ -116,9 +116,9 @@ static void bkey_cached_free(struct btree_key_cache *bc,
this_cpu_inc(*bc->nr_pending); this_cpu_inc(*bc->nr_pending);
} }
static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s) static struct bkey_cached *__bkey_cached_alloc(unsigned key_u64s, gfp_t gfp)
{ {
gfp_t gfp = GFP_KERNEL|__GFP_ACCOUNT|__GFP_RECLAIMABLE; gfp |= __GFP_ACCOUNT|__GFP_RECLAIMABLE;
struct bkey_cached *ck = kmem_cache_zalloc(bch2_key_cache, gfp); struct bkey_cached *ck = kmem_cache_zalloc(bch2_key_cache, gfp);
if (unlikely(!ck)) if (unlikely(!ck))
@ -147,7 +147,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
goto lock; goto lock;
ck = allocate_dropping_locks(trans, ret, ck = allocate_dropping_locks(trans, ret,
__bkey_cached_alloc(key_u64s)); __bkey_cached_alloc(key_u64s, _gfp));
if (ret) { if (ret) {
if (ck) if (ck)
kfree(ck->k); kfree(ck->k);
@ -241,7 +241,7 @@ static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *
mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED);
struct bkey_i *new_k = allocate_dropping_locks(trans, ret, struct bkey_i *new_k = allocate_dropping_locks(trans, ret,
kmalloc(key_u64s * sizeof(u64), GFP_KERNEL)); kmalloc(key_u64s * sizeof(u64), _gfp));
if (unlikely(!new_k)) { if (unlikely(!new_k)) {
bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
bch2_btree_id_str(ck->key.btree_id), key_u64s); bch2_btree_id_str(ck->key.btree_id), key_u64s);

View File

@ -275,7 +275,7 @@ static int read_btree_nodes(struct find_btree_nodes *f)
w->ca = ca; w->ca = ca;
t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name); t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
ret = IS_ERR_OR_NULL(t); ret = PTR_ERR_OR_ZERO(t);
if (ret) { if (ret) {
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
closure_put(&cl); closure_put(&cl);

View File

@ -684,10 +684,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
!(flags & BCH_TRANS_COMMIT_no_journal_res)) { !(flags & BCH_TRANS_COMMIT_no_journal_res)) {
if (bch2_journal_seq_verify) if (bch2_journal_seq_verify)
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
i->k->k.version.lo = trans->journal_res.seq; i->k->k.bversion.lo = trans->journal_res.seq;
else if (bch2_inject_invalid_keys) else if (bch2_inject_invalid_keys)
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
i->k->k.version = MAX_VERSION; i->k->k.bversion = MAX_VERSION;
} }
h = trans->hooks; h = trans->hooks;
@ -700,27 +700,31 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
struct jset_entry *entry = trans->journal_entries; struct jset_entry *entry = trans->journal_entries;
if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { percpu_down_read(&c->mark_lock);
percpu_down_read(&c->mark_lock);
for (entry = trans->journal_entries; for (entry = trans->journal_entries;
entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
entry = vstruct_next(entry)) entry = vstruct_next(entry))
if (jset_entry_is_key(entry) && entry->start->k.type == KEY_TYPE_accounting) { if (entry->type == BCH_JSET_ENTRY_write_buffer_keys &&
struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); entry->start->k.type == KEY_TYPE_accounting) {
BUG_ON(!trans->journal_res.ref);
a->k.version = journal_pos_to_bversion(&trans->journal_res, struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start);
(u64 *) entry - (u64 *) trans->journal_entries);
BUG_ON(bversion_zero(a->k.version)); a->k.bversion = journal_pos_to_bversion(&trans->journal_res,
ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); (u64 *) entry - (u64 *) trans->journal_entries);
BUG_ON(bversion_zero(a->k.bversion));
if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) {
ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal);
if (ret) if (ret)
goto revert_fs_usage; goto revert_fs_usage;
} }
percpu_up_read(&c->mark_lock); }
percpu_up_read(&c->mark_lock);
/* XXX: we only want to run this if deltas are nonzero */ /* XXX: we only want to run this if deltas are nonzero */
bch2_trans_account_disk_usage_change(trans); bch2_trans_account_disk_usage_change(trans);
}
trans_for_each_update(trans, i) trans_for_each_update(trans, i)
if (btree_node_type_has_atomic_triggers(i->bkey_type)) { if (btree_node_type_has_atomic_triggers(i->bkey_type)) {
@ -735,6 +739,40 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
goto fatal_err; goto fatal_err;
} }
trans_for_each_update(trans, i) {
enum bch_validate_flags invalid_flags = 0;
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k),
i->bkey_type, invalid_flags);
if (unlikely(ret)){
bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n",
trans->fn, (void *) i->ip_allocated);
goto fatal_err;
}
btree_insert_entry_checks(trans, i);
}
for (struct jset_entry *i = trans->journal_entries;
i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
i = vstruct_next(i)) {
enum bch_validate_flags invalid_flags = 0;
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
ret = bch2_journal_entry_validate(c, NULL, i,
bcachefs_metadata_version_current,
CPU_BIG_ENDIAN, invalid_flags);
if (unlikely(ret)) {
bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
trans->fn);
goto fatal_err;
}
}
if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) { if (likely(!(flags & BCH_TRANS_COMMIT_no_journal_res))) {
struct journal *j = &c->journal; struct journal *j = &c->journal;
struct jset_entry *entry; struct jset_entry *entry;
@ -798,7 +836,7 @@ revert_fs_usage:
struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start);
bch2_accounting_neg(a); bch2_accounting_neg(a);
bch2_accounting_mem_mod_locked(trans, a.c, false, false); bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal);
bch2_accounting_neg(a); bch2_accounting_neg(a);
} }
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
@ -1019,40 +1057,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
if (ret) if (ret)
goto out_reset; goto out_reset;
trans_for_each_update(trans, i) {
enum bch_validate_flags invalid_flags = 0;
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k),
i->bkey_type, invalid_flags);
if (unlikely(ret)){
bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n",
trans->fn, (void *) i->ip_allocated);
return ret;
}
btree_insert_entry_checks(trans, i);
}
for (struct jset_entry *i = trans->journal_entries;
i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
i = vstruct_next(i)) {
enum bch_validate_flags invalid_flags = 0;
if (!(flags & BCH_TRANS_COMMIT_no_journal_res))
invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
ret = bch2_journal_entry_validate(c, NULL, i,
bcachefs_metadata_version_current,
CPU_BIG_ENDIAN, invalid_flags);
if (unlikely(ret)) {
bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n",
trans->fn);
return ret;
}
}
if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) {
ret = do_bch2_trans_commit_to_journal_replay(trans); ret = do_bch2_trans_commit_to_journal_replay(trans);
goto out_reset; goto out_reset;

View File

@ -220,7 +220,8 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t
if (type && k.k->type != type) if (type && k.k->type != type)
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
mut = bch2_trans_kmalloc_nomemzero(trans, bytes); /* extra padding for varint_decode_fast... */
mut = bch2_trans_kmalloc_nomemzero(trans, bytes + 8);
if (!IS_ERR(mut)) { if (!IS_ERR(mut)) {
bkey_reassemble(mut, k); bkey_reassemble(mut, k);

View File

@ -281,7 +281,7 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
goto err; goto err;
rcu_read_lock(); rcu_read_lock();
bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_rcu(c, ptr->dev)); bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev));
rcu_read_unlock(); rcu_read_unlock();
if (level) { if (level) {
@ -565,11 +565,14 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
s64 *sectors, s64 *sectors,
enum btree_iter_update_trigger_flags flags) enum btree_iter_update_trigger_flags flags)
{ {
struct bch_fs *c = trans->c;
bool insert = !(flags & BTREE_TRIGGER_overwrite); bool insert = !(flags & BTREE_TRIGGER_overwrite);
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
struct bch_fs *c = trans->c; u64 abs_sectors = ptr_disk_sectors(level ? btree_sectors(c) : k.k->size, p);
*sectors = insert ? abs_sectors : -abs_sectors;
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
if (unlikely(!ca)) { if (unlikely(!ca)) {
if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID) if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID)
@ -579,8 +582,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
struct bpos bucket; struct bpos bucket;
struct bch_backpointer bp; struct bch_backpointer bp;
bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp); __bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp, abs_sectors);
*sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
if (flags & BTREE_TRIGGER_transactional) { if (flags & BTREE_TRIGGER_transactional) {
struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0);

View File

@ -639,7 +639,7 @@ int bch2_data_update_init(struct btree_trans *trans,
bch2_write_op_init(&m->op, c, io_opts); bch2_write_op_init(&m->op, c, io_opts);
m->op.pos = bkey_start_pos(k.k); m->op.pos = bkey_start_pos(k.k);
m->op.version = k.k->version; m->op.version = k.k->bversion;
m->op.target = data_opts.target; m->op.target = data_opts.target;
m->op.write_point = wp; m->op.write_point = wp;
m->op.nr_replicas = 0; m->op.nr_replicas = 0;

View File

@ -134,6 +134,10 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
void *end = &acc_k + 1; void *end = &acc_k + 1;
int ret = 0; int ret = 0;
bkey_fsck_err_on(bversion_zero(k.k->bversion),
c, accounting_key_version_0,
"accounting key with version=0");
switch (acc_k.type) { switch (acc_k.type) {
case BCH_DISK_ACCOUNTING_nr_inodes: case BCH_DISK_ACCOUNTING_nr_inodes:
end = field_end(acc_k, nr_inodes); end = field_end(acc_k, nr_inodes);
@ -291,7 +295,7 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun
struct accounting_mem_entry n = { struct accounting_mem_entry n = {
.pos = a.k->p, .pos = a.k->p,
.version = a.k->version, .bversion = a.k->bversion,
.nr_counters = bch2_accounting_counters(a.k), .nr_counters = bch2_accounting_counters(a.k),
.v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64), .v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64),
sizeof(u64), GFP_KERNEL), sizeof(u64), GFP_KERNEL),
@ -319,11 +323,13 @@ err:
return -BCH_ERR_ENOMEM_disk_accounting; return -BCH_ERR_ENOMEM_disk_accounting;
} }
int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a,
enum bch_accounting_mode mode)
{ {
struct bch_replicas_padded r; struct bch_replicas_padded r;
if (accounting_to_replicas(&r.e, a.k->p) && if (mode != BCH_ACCOUNTING_read &&
accounting_to_replicas(&r.e, a.k->p) &&
!bch2_replicas_marked_locked(c, &r.e)) !bch2_replicas_marked_locked(c, &r.e))
return -BCH_ERR_btree_insert_need_mark_replicas; return -BCH_ERR_btree_insert_need_mark_replicas;
@ -566,7 +572,9 @@ int bch2_gc_accounting_done(struct bch_fs *c)
struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i;
accounting_key_init(&k_i.k, &acc_k, src_v, nr); accounting_key_init(&k_i.k, &acc_k, src_v, nr);
bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); bch2_accounting_mem_mod_locked(trans,
bkey_i_to_s_c_accounting(&k_i.k),
BCH_ACCOUNTING_normal);
preempt_disable(); preempt_disable();
struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage);
@ -589,30 +597,14 @@ fsck_err:
static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
if (k.k->type != KEY_TYPE_accounting) if (k.k->type != KEY_TYPE_accounting)
return 0; return 0;
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k),
BCH_ACCOUNTING_read);
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) &&
ret == -BCH_ERR_btree_insert_need_mark_replicas)
ret = 0;
struct disk_accounting_pos acc;
bpos_to_disk_accounting_pos(&acc, k.k->p);
if (fsck_err_on(ret == -BCH_ERR_btree_insert_need_mark_replicas,
trans, accounting_replicas_not_marked,
"accounting not marked in superblock replicas\n %s",
(bch2_accounting_key_to_text(&buf, &acc),
buf.buf)))
ret = bch2_accounting_update_sb_one(c, k.k->p);
fsck_err:
printbuf_exit(&buf);
return ret; return ret;
} }
@ -624,6 +616,7 @@ int bch2_accounting_read(struct bch_fs *c)
{ {
struct bch_accounting_mem *acc = &c->accounting; struct bch_accounting_mem *acc = &c->accounting;
struct btree_trans *trans = bch2_trans_get(c); struct btree_trans *trans = bch2_trans_get(c);
struct printbuf buf = PRINTBUF;
int ret = for_each_btree_key(trans, iter, int ret = for_each_btree_key(trans, iter,
BTREE_ID_accounting, POS_MIN, BTREE_ID_accounting, POS_MIN,
@ -647,7 +640,7 @@ int bch2_accounting_read(struct bch_fs *c)
accounting_pos_cmp, &k.k->p); accounting_pos_cmp, &k.k->p);
bool applied = idx < acc->k.nr && bool applied = idx < acc->k.nr &&
bversion_cmp(acc->k.data[idx].version, k.k->version) >= 0; bversion_cmp(acc->k.data[idx].bversion, k.k->bversion) >= 0;
if (applied) if (applied)
continue; continue;
@ -655,7 +648,7 @@ int bch2_accounting_read(struct bch_fs *c)
if (i + 1 < &darray_top(*keys) && if (i + 1 < &darray_top(*keys) &&
i[1].k->k.type == KEY_TYPE_accounting && i[1].k->k.type == KEY_TYPE_accounting &&
!journal_key_cmp(i, i + 1)) { !journal_key_cmp(i, i + 1)) {
BUG_ON(bversion_cmp(i[0].k->k.version, i[1].k->k.version) >= 0); WARN_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0);
i[1].journal_seq = i[0].journal_seq; i[1].journal_seq = i[0].journal_seq;
@ -674,6 +667,45 @@ int bch2_accounting_read(struct bch_fs *c)
keys->gap = keys->nr = dst - keys->data; keys->gap = keys->nr = dst - keys->data;
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
for (unsigned i = 0; i < acc->k.nr; i++) {
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
if (bch2_is_zero(v, sizeof(v[0]) * acc->k.data[i].nr_counters))
continue;
struct bch_replicas_padded r;
if (!accounting_to_replicas(&r.e, acc->k.data[i].pos))
continue;
/*
* If the replicas entry is invalid it'll get cleaned up by
* check_allocations:
*/
if (bch2_replicas_entry_validate(&r.e, c, &buf))
continue;
struct disk_accounting_pos k;
bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
trans, accounting_replicas_not_marked,
"accounting not marked in superblock replicas\n %s",
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, &k),
buf.buf))) {
/*
* We're not RW yet and still single threaded, dropping
* and retaking lock is ok:
*/
percpu_up_read(&c->mark_lock);
ret = bch2_mark_replicas(c, &r.e);
if (ret)
goto fsck_err;
percpu_down_read(&c->mark_lock);
}
}
preempt_disable(); preempt_disable();
struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
@ -709,8 +741,10 @@ int bch2_accounting_read(struct bch_fs *c)
} }
} }
preempt_enable(); preempt_enable();
fsck_err:
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
err: err:
printbuf_exit(&buf);
bch2_trans_put(trans); bch2_trans_put(trans);
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;

View File

@ -36,8 +36,8 @@ static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst,
for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++) for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++)
dst->v.d[i] += src.v->d[i]; dst->v.d[i] += src.v->d[i];
if (bversion_cmp(dst->k.version, src.k->version) < 0) if (bversion_cmp(dst->k.bversion, src.k->bversion) < 0)
dst->k.version = src.k->version; dst->k.bversion = src.k->bversion;
} }
static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage, static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage,
@ -103,23 +103,35 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r)
return bpos_cmp(*l, *r); return bpos_cmp(*l, *r);
} }
int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); enum bch_accounting_mode {
BCH_ACCOUNTING_normal,
BCH_ACCOUNTING_gc,
BCH_ACCOUNTING_read,
};
int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode);
void bch2_accounting_mem_gc(struct bch_fs *); void bch2_accounting_mem_gc(struct bch_fs *);
/* /*
* Update in memory counters so they match the btree update we're doing; called * Update in memory counters so they match the btree update we're doing; called
* from transaction commit path * from transaction commit path
*/ */
static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
struct bkey_s_c_accounting a,
enum bch_accounting_mode mode)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_accounting_mem *acc = &c->accounting;
struct disk_accounting_pos acc_k; struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, a.k->p); bpos_to_disk_accounting_pos(&acc_k, a.k->p);
bool gc = mode == BCH_ACCOUNTING_gc;
EBUG_ON(gc && !acc->gc_running);
if (acc_k.type == BCH_DISK_ACCOUNTING_inum) if (acc_k.type == BCH_DISK_ACCOUNTING_inum)
return 0; return 0;
if (!gc && !read) { if (mode == BCH_ACCOUNTING_normal) {
switch (acc_k.type) { switch (acc_k.type) {
case BCH_DISK_ACCOUNTING_persistent_reserved: case BCH_DISK_ACCOUNTING_persistent_reserved:
trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0];
@ -140,14 +152,11 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru
} }
} }
struct bch_accounting_mem *acc = &c->accounting;
unsigned idx; unsigned idx;
EBUG_ON(gc && !acc->gc_running);
while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { accounting_pos_cmp, &a.k->p)) >= acc->k.nr) {
int ret = bch2_accounting_mem_insert(c, a, gc); int ret = bch2_accounting_mem_insert(c, a, mode);
if (ret) if (ret)
return ret; return ret;
} }
@ -164,7 +173,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru
static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc)
{ {
percpu_down_read(&trans->c->mark_lock); percpu_down_read(&trans->c->mark_lock);
int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal);
percpu_up_read(&trans->c->mark_lock); percpu_up_read(&trans->c->mark_lock);
return ret; return ret;
} }

View File

@ -6,7 +6,7 @@
struct accounting_mem_entry { struct accounting_mem_entry {
struct bpos pos; struct bpos pos;
struct bversion version; struct bversion bversion;
unsigned nr_counters; unsigned nr_counters;
u64 __percpu *v[2]; u64 __percpu *v[2];
}; };

View File

@ -838,6 +838,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
struct bch_stripe *v; struct bch_stripe *v;
unsigned i, offset; unsigned i, offset;
const char *msg = NULL; const char *msg = NULL;
struct printbuf msgbuf = PRINTBUF;
int ret = 0; int ret = 0;
closure_init_stack(&cl); closure_init_stack(&cl);
@ -896,7 +897,6 @@ out:
kfree(buf); kfree(buf);
return ret; return ret;
err: err:
struct printbuf msgbuf = PRINTBUF;
bch2_bkey_val_to_text(&msgbuf, c, orig_k); bch2_bkey_val_to_text(&msgbuf, c, orig_k);
bch_err_ratelimited(c, bch_err_ratelimited(c,
"error doing reconstruct read: %s\n %s", msg, msgbuf.buf); "error doing reconstruct read: %s\n %s", msg, msgbuf.buf);
@ -907,12 +907,12 @@ err:
/* stripe bucket accounting: */ /* stripe bucket accounting: */
static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx) static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
{ {
ec_stripes_heap n, *h = &c->ec_stripes_heap; ec_stripes_heap n, *h = &c->ec_stripes_heap;
if (idx >= h->size) { if (idx >= h->size) {
if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), GFP_KERNEL)) if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
mutex_lock(&c->ec_stripes_heap_lock); mutex_lock(&c->ec_stripes_heap_lock);
@ -926,11 +926,11 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx)
free_heap(&n); free_heap(&n);
} }
if (!genradix_ptr_alloc(&c->stripes, idx, GFP_KERNEL)) if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
if (c->gc_pos.phase != GC_PHASE_not_running && if (c->gc_pos.phase != GC_PHASE_not_running &&
!genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL)) !genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
return 0; return 0;
@ -940,7 +940,7 @@ static int ec_stripe_mem_alloc(struct btree_trans *trans,
struct btree_iter *iter) struct btree_iter *iter)
{ {
return allocate_dropping_locks_errcode(trans, return allocate_dropping_locks_errcode(trans,
__ec_stripe_mem_alloc(trans->c, iter->pos.offset)); __ec_stripe_mem_alloc(trans->c, iter->pos.offset, _gfp));
} }
/* /*
@ -1721,6 +1721,8 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
{ {
struct bch_devs_mask devs = h->devs;
rcu_read_lock(); rcu_read_lock();
h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
? group_to_target(h->disk_label - 1) ? group_to_target(h->disk_label - 1)
@ -1762,7 +1764,10 @@ static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *
h->nr_active_devs, h->redundancy + 2, err); h->nr_active_devs, h->redundancy + 2, err);
} }
if (h->s && !h->s->allocated) struct bch_devs_mask devs_leaving;
bitmap_andnot(devs_leaving.d, devs.d, h->devs.d, BCH_SB_MEMBERS_MAX);
if (h->s && !h->s->allocated && dev_mask_nr(&devs_leaving))
ec_stripe_new_cancel(c, h, -EINTR); ec_stripe_new_cancel(c, h, -EINTR);
h->rw_devs_change_count = c->rw_devs_change_count; h->rw_devs_change_count = c->rw_devs_change_count;
@ -1855,39 +1860,38 @@ err:
static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h, static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
enum bch_watermark watermark, struct closure *cl) enum bch_watermark watermark, struct closure *cl)
{ {
struct ec_stripe_new *s = h->s;
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_devs_mask devs = h->devs; struct bch_devs_mask devs = h->devs;
struct open_bucket *ob; struct open_bucket *ob;
struct open_buckets buckets; struct open_buckets buckets;
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
unsigned i, j, nr_have_parity = 0, nr_have_data = 0; unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
bool have_cache = true; bool have_cache = true;
int ret = 0; int ret = 0;
BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity); BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
BUG_ON(v->nr_redundant != s->nr_parity); BUG_ON(v->nr_redundant != h->s->nr_parity);
/* * We bypass the sector allocator which normally does this: */ /* * We bypass the sector allocator which normally does this: */
bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) { for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
__clear_bit(v->ptrs[i].dev, devs.d); __clear_bit(v->ptrs[i].dev, devs.d);
if (i < s->nr_data) if (i < h->s->nr_data)
nr_have_data++; nr_have_data++;
else else
nr_have_parity++; nr_have_parity++;
} }
BUG_ON(nr_have_data > s->nr_data); BUG_ON(nr_have_data > h->s->nr_data);
BUG_ON(nr_have_parity > s->nr_parity); BUG_ON(nr_have_parity > h->s->nr_parity);
buckets.nr = 0; buckets.nr = 0;
if (nr_have_parity < s->nr_parity) { if (nr_have_parity < h->s->nr_parity) {
ret = bch2_bucket_alloc_set_trans(trans, &buckets, ret = bch2_bucket_alloc_set_trans(trans, &buckets,
&h->parity_stripe, &h->parity_stripe,
&devs, &devs,
s->nr_parity, h->s->nr_parity,
&nr_have_parity, &nr_have_parity,
&have_cache, 0, &have_cache, 0,
BCH_DATA_parity, BCH_DATA_parity,
@ -1895,14 +1899,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
cl); cl);
open_bucket_for_each(c, &buckets, ob, i) { open_bucket_for_each(c, &buckets, ob, i) {
j = find_next_zero_bit(s->blocks_gotten, j = find_next_zero_bit(h->s->blocks_gotten,
s->nr_data + s->nr_parity, h->s->nr_data + h->s->nr_parity,
s->nr_data); h->s->nr_data);
BUG_ON(j >= s->nr_data + s->nr_parity); BUG_ON(j >= h->s->nr_data + h->s->nr_parity);
s->blocks[j] = buckets.v[i]; h->s->blocks[j] = buckets.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob); v->ptrs[j] = bch2_ob_ptr(c, ob);
__set_bit(j, s->blocks_gotten); __set_bit(j, h->s->blocks_gotten);
} }
if (ret) if (ret)
@ -1910,11 +1914,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
} }
buckets.nr = 0; buckets.nr = 0;
if (nr_have_data < s->nr_data) { if (nr_have_data < h->s->nr_data) {
ret = bch2_bucket_alloc_set_trans(trans, &buckets, ret = bch2_bucket_alloc_set_trans(trans, &buckets,
&h->block_stripe, &h->block_stripe,
&devs, &devs,
s->nr_data, h->s->nr_data,
&nr_have_data, &nr_have_data,
&have_cache, 0, &have_cache, 0,
BCH_DATA_user, BCH_DATA_user,
@ -1922,13 +1926,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
cl); cl);
open_bucket_for_each(c, &buckets, ob, i) { open_bucket_for_each(c, &buckets, ob, i) {
j = find_next_zero_bit(s->blocks_gotten, j = find_next_zero_bit(h->s->blocks_gotten,
s->nr_data, 0); h->s->nr_data, 0);
BUG_ON(j >= s->nr_data); BUG_ON(j >= h->s->nr_data);
s->blocks[j] = buckets.v[i]; h->s->blocks[j] = buckets.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob); v->ptrs[j] = bch2_ob_ptr(c, ob);
__set_bit(j, s->blocks_gotten); __set_bit(j, h->s->blocks_gotten);
} }
if (ret) if (ret)
@ -1974,53 +1978,12 @@ static s64 get_existing_stripe(struct bch_fs *c,
return ret; return ret;
} }
static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s)
{
struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v;
unsigned i;
BUG_ON(existing_v->nr_redundant != s->nr_parity);
s->nr_data = existing_v->nr_blocks -
existing_v->nr_redundant;
int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors));
if (ret) {
bch2_stripe_close(c, s);
return ret;
}
BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
/*
* Free buckets we initially allocated - they might conflict with
* blocks from the stripe we're reusing:
*/
for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) {
bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]);
s->blocks[i] = 0;
}
memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten));
memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated));
for (i = 0; i < existing_v->nr_blocks; i++) {
if (stripe_blockcount_get(existing_v, i)) {
__set_bit(i, s->blocks_gotten);
__set_bit(i, s->blocks_allocated);
}
ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone);
}
bkey_copy(&s->new_stripe.key, &s->existing_stripe.key);
s->have_existing_stripe = true;
return 0;
}
static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h) static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
struct bch_stripe *existing_v;
unsigned i;
s64 idx; s64 idx;
int ret; int ret;
@ -2040,7 +2003,45 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
return ret; return ret;
} }
return init_new_stripe_from_existing(c, h->s); existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v;
BUG_ON(existing_v->nr_redundant != h->s->nr_parity);
h->s->nr_data = existing_v->nr_blocks -
existing_v->nr_redundant;
ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize);
if (ret) {
bch2_stripe_close(c, h->s);
return ret;
}
BUG_ON(h->s->existing_stripe.size != h->blocksize);
BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
/*
* Free buckets we initially allocated - they might conflict with
* blocks from the stripe we're reusing:
*/
for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) {
bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
h->s->blocks[i] = 0;
}
memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
for (i = 0; i < existing_v->nr_blocks; i++) {
if (stripe_blockcount_get(existing_v, i)) {
__set_bit(i, h->s->blocks_gotten);
__set_bit(i, h->s->blocks_allocated);
}
ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
}
bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key);
h->s->have_existing_stripe = true;
return 0;
} }
static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h) static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h)
@ -2238,14 +2239,8 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(&s->k_i)); struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(&s->k_i));
bkey_for_each_ptr(ptrs, ptr) bkey_for_each_ptr(ptrs, ptr)
if (ptr->dev == k_a.k->p.inode) { if (ptr->dev == k_a.k->p.inode)
if (stripe_blockcount_get(&s->v, ptr - &ptrs.start->ptr)) {
bch_err(trans->c, "trying to invalidate device in stripe when stripe block not empty");
ret = -BCH_ERR_invalidate_stripe_to_dev;
goto err;
}
ptr->dev = BCH_SB_MEMBER_INVALID; ptr->dev = BCH_SB_MEMBER_INVALID;
}
sectors = -sectors; sectors = -sectors;
@ -2259,11 +2254,11 @@ err:
return ret; return ret;
} }
int bch2_dev_remove_stripes(struct bch_fs *c, struct bch_dev *ca) int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx)
{ {
return bch2_trans_run(c, return bch2_trans_run(c,
for_each_btree_key_upto_commit(trans, iter, for_each_btree_key_upto_commit(trans, iter,
BTREE_ID_alloc, POS(ca->dev_idx, 0), POS(ca->dev_idx, U64_MAX), BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX),
BTREE_ITER_intent, k, BTREE_ITER_intent, k,
NULL, NULL, 0, ({ NULL, NULL, 0, ({
bch2_invalidate_stripe_to_dev(trans, k); bch2_invalidate_stripe_to_dev(trans, k);
@ -2338,7 +2333,7 @@ int bch2_stripes_read(struct bch_fs *c)
if (k.k->type != KEY_TYPE_stripe) if (k.k->type != KEY_TYPE_stripe)
continue; continue;
ret = __ec_stripe_mem_alloc(c, k.k->p.offset); ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
if (ret) if (ret)
break; break;

View File

@ -254,7 +254,7 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s,
} }
} }
int bch2_dev_remove_stripes(struct bch_fs *, struct bch_dev *); int bch2_dev_remove_stripes(struct bch_fs *, unsigned);
void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
void bch2_fs_ec_stop(struct bch_fs *); void bch2_fs_ec_stop(struct bch_fs *);

View File

@ -239,7 +239,19 @@ int __bch2_fsck_err(struct bch_fs *c,
if (!c) if (!c)
c = trans->c; c = trans->c;
WARN_ON(!trans && bch2_current_has_btree_trans(c)); /*
* Ugly: if there's a transaction in the current task it has to be
* passed in to unlock if we prompt for user input.
*
* But, plumbing a transaction and transaction restarts into
* bkey_validate() is problematic.
*
* So:
* - make all bkey errors AUTOFIX, they're simple anyways (we just
* delete the key)
* - and we don't need to warn if we're not prompting
*/
WARN_ON(!(flags & FSCK_AUTOFIX) && !trans && bch2_current_has_btree_trans(c));
if ((flags & FSCK_CAN_FIX) && if ((flags & FSCK_CAN_FIX) &&
test_bit(err, c->sb.errors_silent)) test_bit(err, c->sb.errors_silent))

View File

@ -184,7 +184,7 @@ do { \
ret = -BCH_ERR_fsck_delete_bkey; \ ret = -BCH_ERR_fsck_delete_bkey; \
goto fsck_err; \ goto fsck_err; \
} \ } \
int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX|FSCK_AUTOFIX,\
BCH_FSCK_ERR_##_err_type, \ BCH_FSCK_ERR_##_err_type, \
_err_msg, ##__VA_ARGS__); \ _err_msg, ##__VA_ARGS__); \
if (_ret != -BCH_ERR_fsck_fix && \ if (_ret != -BCH_ERR_fsck_fix && \

View File

@ -185,12 +185,17 @@ static void __wait_on_freeing_inode(struct inode *inode)
finish_wait(wq, &wait.wq_entry); finish_wait(wq, &wait.wq_entry);
} }
struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
{
return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
}
static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans, static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans,
subvol_inum inum) subvol_inum inum)
{ {
struct bch_inode_info *inode; struct bch_inode_info *inode;
repeat: repeat:
inode = rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); inode = __bch2_inode_hash_find(c, inum);
if (inode) { if (inode) {
spin_lock(&inode->v.i_lock); spin_lock(&inode->v.i_lock);
if (!test_bit(EI_INODE_HASHED, &inode->ei_flags)) { if (!test_bit(EI_INODE_HASHED, &inode->ei_flags)) {
@ -273,6 +278,14 @@ retry:
} }
} }
#define memalloc_flags_do(_flags, _do) \
({ \
unsigned _saved_flags = memalloc_flags_save(_flags); \
typeof(_do) _ret = _do; \
memalloc_noreclaim_restore(_saved_flags); \
_ret; \
})
static struct inode *bch2_alloc_inode(struct super_block *sb) static struct inode *bch2_alloc_inode(struct super_block *sb)
{ {
BUG(); BUG();
@ -1698,12 +1711,17 @@ static void bch2_evict_inode(struct inode *vinode)
{ {
struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_fs *c = vinode->i_sb->s_fs_info;
struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_inode_info *inode = to_bch_ei(vinode);
bool delete = !inode->v.i_nlink && !is_bad_inode(&inode->v);
/* /*
* evict() has waited for outstanding writeback, we'll do no more IO * evict() has waited for outstanding writeback, we'll do no more IO
* through this inode: it's safe to remove from VFS inode hashtable here * through this inode: it's safe to remove from VFS inode hashtable here
*
* Do that now so that other threads aren't blocked from pulling it back
* in, there's no reason for them to be:
*/ */
bch2_inode_hash_remove(c, inode); if (!delete)
bch2_inode_hash_remove(c, inode);
truncate_inode_pages_final(&inode->v.i_data); truncate_inode_pages_final(&inode->v.i_data);
@ -1711,12 +1729,18 @@ static void bch2_evict_inode(struct inode *vinode)
BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved); BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { if (delete) {
bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
KEY_TYPE_QUOTA_WARN); KEY_TYPE_QUOTA_WARN);
bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
KEY_TYPE_QUOTA_WARN); KEY_TYPE_QUOTA_WARN);
bch2_inode_rm(c, inode_inum(inode)); bch2_inode_rm(c, inode_inum(inode));
/*
* If we are deleting, we need it present in the vfs hash table
* so that fsck can check if unlinked inodes are still open:
*/
bch2_inode_hash_remove(c, inode);
} }
mutex_lock(&c->vfs_inodes_lock); mutex_lock(&c->vfs_inodes_lock);
@ -1910,6 +1934,7 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb, bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb,
OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE); OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE);
printbuf_nul_terminate(&buf);
seq_puts(seq, buf.buf); seq_puts(seq, buf.buf);
int ret = buf.allocation_failure ? -ENOMEM : 0; int ret = buf.allocation_failure ? -ENOMEM : 0;

View File

@ -146,6 +146,8 @@ struct bch_inode_info *
__bch2_create(struct mnt_idmap *, struct bch_inode_info *, __bch2_create(struct mnt_idmap *, struct bch_inode_info *,
struct dentry *, umode_t, dev_t, subvol_inum, unsigned); struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum);
int bch2_fs_quota_transfer(struct bch_fs *, int bch2_fs_quota_transfer(struct bch_fs *,
struct bch_inode_info *, struct bch_inode_info *,
struct bch_qid, struct bch_qid,
@ -196,6 +198,11 @@ int bch2_vfs_init(void);
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); }) #define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
static inline struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
{
return NULL;
}
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c, static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
snapshot_id_list *s) {} snapshot_id_list *s) {}

View File

@ -8,6 +8,7 @@
#include "darray.h" #include "darray.h"
#include "dirent.h" #include "dirent.h"
#include "error.h" #include "error.h"
#include "fs.h"
#include "fs-common.h" #include "fs-common.h"
#include "fsck.h" #include "fsck.h"
#include "inode.h" #include "inode.h"
@ -20,6 +21,49 @@
#include <linux/bsearch.h> #include <linux/bsearch.h>
#include <linux/dcache.h> /* struct qstr */ #include <linux/dcache.h> /* struct qstr */
static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
struct bkey_s_c_dirent d)
{
return inode->bi_dir == d.k->p.inode &&
inode->bi_dir_offset == d.k->p.offset;
}
static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d,
struct bch_inode_unpacked *inode)
{
if (d.v->d_type == DT_SUBVOL
? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
: le64_to_cpu(d.v->d_inum) == inode->bi_inum)
return 0;
return -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
}
static void dirent_inode_mismatch_msg(struct printbuf *out,
struct bch_fs *c,
struct bkey_s_c_dirent dirent,
struct bch_inode_unpacked *inode)
{
prt_str(out, "inode points to dirent that does not point back:");
prt_newline(out);
bch2_bkey_val_to_text(out, c, dirent.s_c);
prt_newline(out);
bch2_inode_unpacked_to_text(out, inode);
}
static int dirent_points_to_inode(struct bch_fs *c,
struct bkey_s_c_dirent dirent,
struct bch_inode_unpacked *inode)
{
int ret = dirent_points_to_inode_nowarn(dirent, inode);
if (ret) {
struct printbuf buf = PRINTBUF;
dirent_inode_mismatch_msg(&buf, c, dirent, inode);
bch_warn(c, "%s", buf.buf);
printbuf_exit(&buf);
}
return ret;
}
/* /*
* XXX: this is handling transaction restarts without returning * XXX: this is handling transaction restarts without returning
* -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
@ -345,14 +389,17 @@ static int reattach_inode(struct btree_trans *trans,
static int remove_backpointer(struct btree_trans *trans, static int remove_backpointer(struct btree_trans *trans,
struct bch_inode_unpacked *inode) struct bch_inode_unpacked *inode)
{ {
struct btree_iter iter; if (!inode->bi_dir)
struct bkey_s_c_dirent d; return 0;
int ret;
d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, struct bch_fs *c = trans->c;
POS(inode->bi_dir, inode->bi_dir_offset), 0, struct btree_iter iter;
struct bkey_s_c_dirent d =
bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents,
SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot), 0,
dirent); dirent);
ret = bkey_err(d) ?: int ret = bkey_err(d) ?:
dirent_points_to_inode(c, d, inode) ?:
__remove_dirent(trans, d.k->p); __remove_dirent(trans, d.k->p);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
@ -370,7 +417,8 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume
return ret; return ret;
ret = remove_backpointer(trans, &inode); ret = remove_backpointer(trans, &inode);
bch_err_msg(c, ret, "removing dirent"); if (!bch2_err_matches(ret, ENOENT))
bch_err_msg(c, ret, "removing dirent");
if (ret) if (ret)
return ret; return ret;
@ -625,12 +673,12 @@ static int ref_visible2(struct bch_fs *c,
struct inode_walker_entry { struct inode_walker_entry {
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;
u32 snapshot; u32 snapshot;
bool seen_this_pos;
u64 count; u64 count;
}; };
struct inode_walker { struct inode_walker {
bool first_this_inode; bool first_this_inode;
bool have_inodes;
bool recalculate_sums; bool recalculate_sums;
struct bpos last_pos; struct bpos last_pos;
@ -668,6 +716,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
/*
* We no longer have inodes for w->last_pos; clear this to avoid
* screwing up check_i_sectors/check_subdir_count if we take a
* transaction restart here:
*/
w->have_inodes = false;
w->recalculate_sums = false; w->recalculate_sums = false;
w->inodes.nr = 0; w->inodes.nr = 0;
@ -685,6 +739,7 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
return ret; return ret;
w->first_this_inode = true; w->first_this_inode = true;
w->have_inodes = true;
return 0; return 0;
} }
@ -739,9 +794,6 @@ static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
} else if (bkey_cmp(w->last_pos, k.k->p)) {
darray_for_each(w->inodes, i)
i->seen_this_pos = false;
} }
w->last_pos = k.k->p; w->last_pos = k.k->p;
@ -895,21 +947,6 @@ static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot));
} }
static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
struct bkey_s_c_dirent d)
{
return inode->bi_dir == d.k->p.inode &&
inode->bi_dir_offset == d.k->p.offset;
}
static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
struct bch_inode_unpacked *inode)
{
return d.v->d_type == DT_SUBVOL
? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
: le64_to_cpu(d.v->d_inum) == inode->bi_inum;
}
static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
{ {
struct btree_iter iter; struct btree_iter iter;
@ -919,13 +956,14 @@ static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
return ret; return ret;
} }
static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, static int check_inode_dirent_inode(struct btree_trans *trans,
struct bch_inode_unpacked *inode, struct bch_inode_unpacked *inode,
u32 inode_snapshot, bool *write_inode) bool *write_inode)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
u32 inode_snapshot = inode->bi_snapshot;
struct btree_iter dirent_iter = {}; struct btree_iter dirent_iter = {};
struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot);
int ret = bkey_err(d); int ret = bkey_err(d);
@ -935,13 +973,13 @@ static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c i
if (fsck_err_on(ret, if (fsck_err_on(ret,
trans, inode_points_to_missing_dirent, trans, inode_points_to_missing_dirent,
"inode points to missing dirent\n%s", "inode points to missing dirent\n%s",
(bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || (bch2_inode_unpacked_to_text(&buf, inode), buf.buf)) ||
fsck_err_on(!ret && !dirent_points_to_inode(d, inode), fsck_err_on(!ret && dirent_points_to_inode_nowarn(d, inode),
trans, inode_points_to_wrong_dirent, trans, inode_points_to_wrong_dirent,
"inode points to dirent that does not point back:\n%s", "%s",
(bch2_bkey_val_to_text(&buf, c, inode_k), (printbuf_reset(&buf),
prt_newline(&buf), dirent_inode_mismatch_msg(&buf, c, d, inode),
bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { buf.buf))) {
/* /*
* We just clear the backpointer fields for now. If we find a * We just clear the backpointer fields for now. If we find a
* dirent that points to this inode in check_dirents(), we'll * dirent that points to this inode in check_dirents(), we'll
@ -962,6 +1000,22 @@ fsck_err:
return ret; return ret;
} }
static bool bch2_inode_is_open(struct bch_fs *c, struct bpos p)
{
subvol_inum inum = {
.subvol = snapshot_t(c, p.snapshot)->subvol,
.inum = p.offset,
};
/* snapshot tree corruption, can't safely delete */
if (!inum.subvol) {
bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot);
return true;
}
return __bch2_inode_hash_find(c, inum) != NULL;
}
static int check_inode(struct btree_trans *trans, static int check_inode(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c k, struct bkey_s_c k,
@ -1028,29 +1082,44 @@ static int check_inode(struct btree_trans *trans,
} }
if (u.bi_flags & BCH_INODE_unlinked) { if (u.bi_flags & BCH_INODE_unlinked) {
ret = check_inode_deleted_list(trans, k.k->p); if (!test_bit(BCH_FS_started, &c->flags)) {
if (ret < 0) /*
return ret; * If we're not in online fsck, don't delete unlinked
* inodes, just make sure they're on the deleted list.
*
* They might be referred to by a logged operation -
* i.e. we might have crashed in the middle of a
* truncate on an unlinked but open file - so we want to
* let the delete_dead_inodes kill it after resuming
* logged ops.
*/
ret = check_inode_deleted_list(trans, k.k->p);
if (ret < 0)
return ret;
fsck_err_on(!ret, fsck_err_on(!ret,
trans, unlinked_inode_not_on_deleted_list, trans, unlinked_inode_not_on_deleted_list,
"inode %llu:%u unlinked, but not on deleted list", "inode %llu:%u unlinked, but not on deleted list",
u.bi_inum, k.k->p.snapshot); u.bi_inum, k.k->p.snapshot);
ret = 0;
} ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, k.k->p, 1);
if (ret)
if (u.bi_flags & BCH_INODE_unlinked && goto err;
(!c->sb.clean || } else {
fsck_err(trans, inode_unlinked_but_clean, if (fsck_err_on(bch2_inode_is_open(c, k.k->p),
"filesystem marked clean, but inode %llu unlinked", trans, inode_unlinked_and_not_open,
u.bi_inum))) { "inode %llu%u unlinked and not open",
ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); u.bi_inum, u.bi_snapshot)) {
bch_err_msg(c, ret, "in fsck deleting inode"); ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
return ret; bch_err_msg(c, ret, "in fsck deleting inode");
return ret;
}
}
} }
/* i_size_dirty is vestigal, since we now have logged ops for truncate * */
if (u.bi_flags & BCH_INODE_i_size_dirty && if (u.bi_flags & BCH_INODE_i_size_dirty &&
(!c->sb.clean || (!test_bit(BCH_FS_clean_recovery, &c->flags) ||
fsck_err(trans, inode_i_size_dirty_but_clean, fsck_err(trans, inode_i_size_dirty_but_clean,
"filesystem marked clean, but inode %llu has i_size dirty", "filesystem marked clean, but inode %llu has i_size dirty",
u.bi_inum))) { u.bi_inum))) {
@ -1079,8 +1148,9 @@ static int check_inode(struct btree_trans *trans,
do_update = true; do_update = true;
} }
/* i_sectors_dirty is vestigal, i_sectors is always updated transactionally */
if (u.bi_flags & BCH_INODE_i_sectors_dirty && if (u.bi_flags & BCH_INODE_i_sectors_dirty &&
(!c->sb.clean || (!test_bit(BCH_FS_clean_recovery, &c->flags) ||
fsck_err(trans, inode_i_sectors_dirty_but_clean, fsck_err(trans, inode_i_sectors_dirty_but_clean,
"filesystem marked clean, but inode %llu has i_sectors dirty", "filesystem marked clean, but inode %llu has i_sectors dirty",
u.bi_inum))) { u.bi_inum))) {
@ -1108,7 +1178,7 @@ static int check_inode(struct btree_trans *trans,
} }
if (u.bi_dir || u.bi_dir_offset) { if (u.bi_dir || u.bi_dir_offset) {
ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); ret = check_inode_dirent_inode(trans, &u, &do_update);
if (ret) if (ret)
goto err; goto err;
} }
@ -1537,10 +1607,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k, struct bkey_s_c k,
struct inode_walker *inode, struct inode_walker *inode,
struct snapshots_seen *s, struct snapshots_seen *s,
struct extent_ends *extent_ends) struct extent_ends *extent_ends,
struct disk_reservation *res)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct inode_walker_entry *i;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
@ -1550,7 +1620,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto out; goto out;
} }
if (inode->last_pos.inode != k.k->p.inode) { if (inode->last_pos.inode != k.k->p.inode && inode->have_inodes) {
ret = check_i_sectors(trans, inode); ret = check_i_sectors(trans, inode);
if (ret) if (ret)
goto err; goto err;
@ -1560,12 +1630,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
if (ret) if (ret)
goto err; goto err;
i = walk_inode(trans, inode, k); struct inode_walker_entry *extent_i = walk_inode(trans, inode, k);
ret = PTR_ERR_OR_ZERO(i); ret = PTR_ERR_OR_ZERO(extent_i);
if (ret) if (ret)
goto err; goto err;
ret = check_key_has_inode(trans, iter, inode, i, k); ret = check_key_has_inode(trans, iter, inode, extent_i, k);
if (ret) if (ret)
goto err; goto err;
@ -1574,24 +1644,19 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
&inode->recalculate_sums); &inode->recalculate_sums);
if (ret) if (ret)
goto err; goto err;
}
/* /*
* Check inodes in reverse order, from oldest snapshots to newest, * Check inodes in reverse order, from oldest snapshots to
* starting from the inode that matches this extent's snapshot. If we * newest, starting from the inode that matches this extent's
* didn't have one, iterate over all inodes: * snapshot. If we didn't have one, iterate over all inodes:
*/ */
if (!i) for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
i = &darray_last(inode->inodes); inode->inodes.data && i >= inode->inodes.data;
--i) {
if (i->snapshot > k.k->p.snapshot ||
!key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
continue;
for (;
inode->inodes.data && i >= inode->inodes.data;
--i) {
if (i->snapshot > k.k->p.snapshot ||
!key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
continue;
if (k.k->type != KEY_TYPE_whiteout) {
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) &&
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
!bkey_extent_is_reservation(k), !bkey_extent_is_reservation(k),
@ -1611,13 +1676,25 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto err; goto err;
iter->k.type = KEY_TYPE_whiteout; iter->k.type = KEY_TYPE_whiteout;
break;
} }
if (bkey_extent_is_allocation(k.k))
i->count += k.k->size;
} }
}
i->seen_this_pos = true; ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
goto err;
if (bkey_extent_is_allocation(k.k)) {
for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes);
inode->inodes.data && i >= inode->inodes.data;
--i) {
if (i->snapshot > k.k->p.snapshot ||
!key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot))
continue;
i->count += k.k->size;
}
} }
if (k.k->type != KEY_TYPE_whiteout) { if (k.k->type != KEY_TYPE_whiteout) {
@ -1648,13 +1725,11 @@ int bch2_check_extents(struct bch_fs *c)
extent_ends_init(&extent_ends); extent_ends_init(&extent_ends);
int ret = bch2_trans_run(c, int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_extents, for_each_btree_key(trans, iter, BTREE_ID_extents,
POS(BCACHEFS_ROOT_INO, 0), POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
&res, NULL,
BCH_TRANS_COMMIT_no_enospc, ({
bch2_disk_reservation_put(c, &res); bch2_disk_reservation_put(c, &res);
check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?:
check_extent_overbig(trans, &iter, k); check_extent_overbig(trans, &iter, k);
})) ?: })) ?:
check_i_sectors_notnested(trans, &w)); check_i_sectors_notnested(trans, &w));
@ -1740,6 +1815,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
struct btree_iter bp_iter = { NULL };
int ret = 0; int ret = 0;
if (inode_points_to_dirent(target, d)) if (inode_points_to_dirent(target, d))
@ -1752,7 +1828,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
prt_printf(&buf, "\n "), prt_printf(&buf, "\n "),
bch2_inode_unpacked_to_text(&buf, target), bch2_inode_unpacked_to_text(&buf, target),
buf.buf))) buf.buf)))
goto out_noiter; goto err;
if (!target->bi_dir && if (!target->bi_dir &&
!target->bi_dir_offset) { !target->bi_dir_offset) {
@ -1761,7 +1837,6 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
return __bch2_fsck_write_inode(trans, target, target_snapshot); return __bch2_fsck_write_inode(trans, target, target_snapshot);
} }
struct btree_iter bp_iter = { NULL };
struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
ret = bkey_err(bp_dirent); ret = bkey_err(bp_dirent);
@ -1822,7 +1897,6 @@ out:
err: err:
fsck_err: fsck_err:
bch2_trans_iter_exit(trans, &bp_iter); bch2_trans_iter_exit(trans, &bp_iter);
out_noiter:
printbuf_exit(&buf); printbuf_exit(&buf);
bch_err_fn(c, ret); bch_err_fn(c, ret);
return ret; return ret;
@ -2057,7 +2131,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (k.k->type == KEY_TYPE_whiteout) if (k.k->type == KEY_TYPE_whiteout)
goto out; goto out;
if (dir->last_pos.inode != k.k->p.inode) { if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) {
ret = check_subdir_count(trans, dir); ret = check_subdir_count(trans, dir);
if (ret) if (ret)
goto err; goto err;
@ -2119,11 +2193,15 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (ret) if (ret)
goto err; goto err;
} }
if (d.v->d_type == DT_DIR)
for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
i->count++;
} }
ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
goto err;
if (d.v->d_type == DT_DIR)
for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
i->count++;
out: out:
err: err:
fsck_err: fsck_err:
@ -2146,12 +2224,9 @@ int bch2_check_dirents(struct bch_fs *c)
snapshots_seen_init(&s); snapshots_seen_init(&s);
int ret = bch2_trans_run(c, int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, for_each_btree_key(trans, iter, BTREE_ID_dirents,
POS(BCACHEFS_ROOT_INO, 0), POS(BCACHEFS_ROOT_INO, 0),
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
k,
NULL, NULL,
BCH_TRANS_COMMIT_no_enospc,
check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?:
check_subdir_count_notnested(trans, &dir)); check_subdir_count_notnested(trans, &dir));
@ -2296,22 +2371,6 @@ static bool darray_u32_has(darray_u32 *d, u32 v)
return false; return false;
} }
/*
* We've checked that inode backpointers point to valid dirents; here, it's
* sufficient to check that the subvolume root has a dirent:
*/
static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s)
{
struct bch_inode_unpacked inode;
int ret = bch2_inode_find_by_inum_trans(trans,
(subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) },
&inode);
if (ret)
return ret;
return inode.bi_dir != 0;
}
static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
@ -2330,14 +2389,24 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
ret = subvol_has_dirent(trans, s); struct bch_inode_unpacked subvol_root;
if (ret < 0) ret = bch2_inode_find_by_inum_trans(trans,
(subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) },
&subvol_root);
if (ret)
break; break;
if (fsck_err_on(!ret, /*
* We've checked that inode backpointers point to valid dirents;
* here, it's sufficient to check that the subvolume root has a
* dirent:
*/
if (fsck_err_on(!subvol_root.bi_dir,
trans, subvol_unreachable, trans, subvol_unreachable,
"unreachable subvolume %s", "unreachable subvolume %s",
(bch2_bkey_val_to_text(&buf, c, s.s_c), (bch2_bkey_val_to_text(&buf, c, s.s_c),
prt_newline(&buf),
bch2_inode_unpacked_to_text(&buf, &subvol_root),
buf.buf))) { buf.buf))) {
ret = reattach_subvol(trans, s); ret = reattach_subvol(trans, s);
break; break;
@ -2432,10 +2501,8 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
if (ret && !bch2_err_matches(ret, ENOENT)) if (ret && !bch2_err_matches(ret, ENOENT))
break; break;
if (!ret && !dirent_points_to_inode(d, &inode)) { if (!ret && (ret = dirent_points_to_inode(c, d, &inode)))
bch2_trans_iter_exit(trans, &dirent_iter); bch2_trans_iter_exit(trans, &dirent_iter);
ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
}
if (bch2_err_matches(ret, ENOENT)) { if (bch2_err_matches(ret, ENOENT)) {
ret = 0; ret = 0;

View File

@ -320,9 +320,11 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
int bch2_inode_unpack(struct bkey_s_c k, int bch2_inode_unpack(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked) struct bch_inode_unpacked *unpacked)
{ {
if (likely(k.k->type == KEY_TYPE_inode_v3)) unpacked->bi_snapshot = k.k->p.snapshot;
return bch2_inode_unpack_v3(k, unpacked);
return bch2_inode_unpack_slowpath(k, unpacked); return likely(k.k->type == KEY_TYPE_inode_v3)
? bch2_inode_unpack_v3(k, unpacked)
: bch2_inode_unpack_slowpath(k, unpacked);
} }
int bch2_inode_peek_nowarn(struct btree_trans *trans, int bch2_inode_peek_nowarn(struct btree_trans *trans,
@ -557,7 +559,7 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode) void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
{ {
prt_printf(out, "inum: %llu ", inode->bi_inum); prt_printf(out, "inum: %llu:%u ", inode->bi_inum, inode->bi_snapshot);
__bch2_inode_unpacked_to_text(out, inode); __bch2_inode_unpacked_to_text(out, inode);
} }
@ -1111,7 +1113,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
pos.offset, pos.snapshot)) pos.offset, pos.snapshot))
goto delete; goto delete;
if (c->sb.clean && if (test_bit(BCH_FS_clean_recovery, &c->flags) &&
!fsck_err(trans, deleted_inode_but_clean, !fsck_err(trans, deleted_inode_but_clean,
"filesystem marked as clean but have deleted inode %llu:%u", "filesystem marked as clean but have deleted inode %llu:%u",
pos.offset, pos.snapshot)) { pos.offset, pos.snapshot)) {

View File

@ -69,6 +69,7 @@ typedef u64 u96;
struct bch_inode_unpacked { struct bch_inode_unpacked {
u64 bi_inum; u64 bi_inum;
u32 bi_snapshot;
u64 bi_journal_seq; u64 bi_journal_seq;
__le64 bi_hash_seed; __le64 bi_hash_seed;
u64 bi_size; u64 bi_size;

View File

@ -517,7 +517,7 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
if ((ret = bkey_err(k))) if ((ret = bkey_err(k)))
goto out; goto out;
if (bversion_cmp(k.k->version, rbio->version) || if (bversion_cmp(k.k->bversion, rbio->version) ||
!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
goto out; goto out;
@ -1031,7 +1031,7 @@ get_bio:
rbio->read_pos = read_pos; rbio->read_pos = read_pos;
rbio->data_btree = data_btree; rbio->data_btree = data_btree;
rbio->data_pos = data_pos; rbio->data_pos = data_pos;
rbio->version = k.k->version; rbio->version = k.k->bversion;
rbio->promote = promote; rbio->promote = promote;
INIT_WORK(&rbio->work, NULL); INIT_WORK(&rbio->work, NULL);

View File

@ -697,7 +697,7 @@ static void init_append_extent(struct bch_write_op *op,
e = bkey_extent_init(op->insert_keys.top); e = bkey_extent_init(op->insert_keys.top);
e->k.p = op->pos; e->k.p = op->pos;
e->k.size = crc.uncompressed_size; e->k.size = crc.uncompressed_size;
e->k.version = version; e->k.bversion = version;
if (crc.csum_type || if (crc.csum_type ||
crc.compression_type || crc.compression_type ||
@ -1544,7 +1544,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
id = bkey_inline_data_init(op->insert_keys.top); id = bkey_inline_data_init(op->insert_keys.top);
id->k.p = op->pos; id->k.p = op->pos;
id->k.version = op->version; id->k.bversion = op->version;
id->k.size = sectors; id->k.size = sectors;
iter = bio->bi_iter; iter = bio->bi_iter;

View File

@ -605,7 +605,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
goto out; goto out;
} }
if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err), if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c, &err),
c, version, jset, entry, c, version, jset, entry,
journal_entry_data_usage_bad_size, journal_entry_data_usage_bad_size,
"invalid journal entry usage: %s", err.buf)) { "invalid journal entry usage: %s", err.buf)) {

View File

@ -37,6 +37,14 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
struct bkey_buf sk; struct bkey_buf sk;
u32 restart_count = trans->restart_count; u32 restart_count = trans->restart_count;
struct printbuf buf = PRINTBUF;
int ret = 0;
fsck_err_on(test_bit(BCH_FS_clean_recovery, &c->flags),
trans, logged_op_but_clean,
"filesystem marked as clean but have logged op\n%s",
(bch2_bkey_val_to_text(&buf, c, k),
buf.buf));
if (!fn) if (!fn)
return 0; return 0;
@ -47,8 +55,9 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
fn->resume(trans, sk.k); fn->resume(trans, sk.k);
bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&sk, c);
fsck_err:
return trans_was_restarted(trans, restart_count); printbuf_exit(&buf);
return ret ?: trans_was_restarted(trans, restart_count);
} }
int bch2_resume_logged_ops(struct bch_fs *c) int bch2_resume_logged_ops(struct bch_fs *c)

View File

@ -478,7 +478,9 @@ start_gp:
*/ */
if (!p->cb_armed) { if (!p->cb_armed) {
p->cb_armed = true; p->cb_armed = true;
spin_unlock_irqrestore(&p->lock, flags);
__call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb); __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb);
goto free_node;
} else { } else {
__start_poll_synchronize_rcu(pending->srcu); __start_poll_synchronize_rcu(pending->srcu);
} }

View File

@ -151,7 +151,7 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans,
struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u); struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u);
/* Has this delta already been applied to the btree? */ /* Has this delta already been applied to the btree? */
if (bversion_cmp(old.k->version, k->k->k.version) >= 0) { if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) {
ret = 0; ret = 0;
goto out; goto out;
} }
@ -717,6 +717,8 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.fsck) if (c->opts.fsck)
set_bit(BCH_FS_fsck_running, &c->flags); set_bit(BCH_FS_fsck_running, &c->flags);
if (c->sb.clean)
set_bit(BCH_FS_clean_recovery, &c->flags);
ret = bch2_blacklist_table_initialize(c); ret = bch2_blacklist_table_initialize(c);
if (ret) { if (ret) {
@ -862,6 +864,9 @@ use_clean:
clear_bit(BCH_FS_fsck_running, &c->flags); clear_bit(BCH_FS_fsck_running, &c->flags);
/* in case we don't run journal replay, i.e. norecovery mode */
set_bit(BCH_FS_accounting_replay_done, &c->flags);
/* fsync if we fixed errors */ /* fsync if we fixed errors */
if (test_bit(BCH_FS_errors_fixed, &c->flags) && if (test_bit(BCH_FS_errors_fixed, &c->flags) &&
bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) { bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) {

View File

@ -50,7 +50,7 @@
x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
x(check_nlinks, 31, PASS_FSCK) \ x(check_nlinks, 31, PASS_FSCK) \
x(resume_logged_ops, 23, PASS_ALWAYS) \ x(resume_logged_ops, 23, PASS_ALWAYS) \
x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \ x(delete_dead_inodes, 32, PASS_ALWAYS) \
x(fix_reflink_p, 33, 0) \ x(fix_reflink_p, 33, 0) \
x(set_fs_needs_rebalance, 34, 0) \ x(set_fs_needs_rebalance, 34, 0) \

View File

@ -367,7 +367,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
r_v->k.type = bkey_type_to_indirect(&orig->k); r_v->k.type = bkey_type_to_indirect(&orig->k);
r_v->k.p = reflink_iter.pos; r_v->k.p = reflink_iter.pos;
bch2_key_resize(&r_v->k, orig->k.size); bch2_key_resize(&r_v->k, orig->k.size);
r_v->k.version = orig->k.version; r_v->k.bversion = orig->k.bversion;
set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));

View File

@ -66,9 +66,9 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
prt_printf(out, "]"); prt_printf(out, "]");
} }
int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, static int bch2_replicas_entry_validate_locked(struct bch_replicas_entry_v1 *r,
struct bch_sb *sb, struct bch_sb *sb,
struct printbuf *err) struct printbuf *err)
{ {
if (!r->nr_devs) { if (!r->nr_devs) {
prt_printf(err, "no devices in entry "); prt_printf(err, "no devices in entry ");
@ -94,6 +94,16 @@ bad:
return -BCH_ERR_invalid_replicas_entry; return -BCH_ERR_invalid_replicas_entry;
} }
int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
struct bch_fs *c,
struct printbuf *err)
{
mutex_lock(&c->sb_lock);
int ret = bch2_replicas_entry_validate_locked(r, c->disk_sb.sb, err);
mutex_unlock(&c->sb_lock);
return ret;
}
void bch2_cpu_replicas_to_text(struct printbuf *out, void bch2_cpu_replicas_to_text(struct printbuf *out,
struct bch_replicas_cpu *r) struct bch_replicas_cpu *r)
{ {
@ -676,7 +686,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
struct bch_replicas_entry_v1 *e = struct bch_replicas_entry_v1 *e =
cpu_replicas_entry(cpu_r, i); cpu_replicas_entry(cpu_r, i);
int ret = bch2_replicas_entry_validate(e, sb, err); int ret = bch2_replicas_entry_validate_locked(e, sb, err);
if (ret) if (ret)
return ret; return ret;
@ -795,7 +805,7 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
for (unsigned i = 0; i < e->nr_devs; i++) { for (unsigned i = 0; i < e->nr_devs; i++) {
nr_online += test_bit(e->devs[i], devs.d); nr_online += test_bit(e->devs[i], devs.d);
struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]); struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]);
nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed; nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
} }
rcu_read_unlock(); rcu_read_unlock();

View File

@ -10,7 +10,7 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
void bch2_replicas_entry_to_text(struct printbuf *, void bch2_replicas_entry_to_text(struct printbuf *,
struct bch_replicas_entry_v1 *); struct bch_replicas_entry_v1 *);
int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *, int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *,
struct bch_sb *, struct printbuf *); struct bch_fs *, struct printbuf *);
void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
static inline struct bch_replicas_entry_v1 * static inline struct bch_replicas_entry_v1 *

View File

@ -155,7 +155,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c)
SET_BCH_SB_CLEAN(c->disk_sb.sb, false); SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
c->sb.clean = false; c->sb.clean = false;
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
return NULL; return ERR_PTR(-BCH_ERR_invalid_sb_clean);
} }
clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field), clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
@ -167,6 +167,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c)
ret = bch2_sb_clean_validate_late(c, clean, READ); ret = bch2_sb_clean_validate_late(c, clean, READ);
if (ret) { if (ret) {
kfree(clean);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
return ERR_PTR(ret); return ERR_PTR(ret);
} }

View File

@ -312,8 +312,7 @@ static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
if (!first) if (!first)
prt_char(out, ','); prt_char(out, ',');
first = false; first = false;
unsigned e = le16_to_cpu(i->errors[j]); bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j]));
prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
} }
prt_newline(out); prt_newline(out);
} }
@ -353,7 +352,9 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
for (unsigned i = 0; i < src->nr_errors; i++) for (unsigned i = 0; i < src->nr_errors; i++)
dst->errors[i] = cpu_to_le16(src->errors[i]); dst->errors[i] = cpu_to_le16(src->errors[i]);
downgrade_table_extra(c, &table); ret = downgrade_table_extra(c, &table);
if (ret)
goto out;
if (!dst->recovery_passes[0] && if (!dst->recovery_passes[0] &&
!dst->recovery_passes[1] && !dst->recovery_passes[1] &&
@ -399,7 +400,7 @@ void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_mi
for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
unsigned e = le16_to_cpu(i->errors[j]); unsigned e = le16_to_cpu(i->errors[j]);
if (e < BCH_SB_ERR_MAX) if (e < BCH_FSCK_ERR_MAX)
__set_bit(e, c->sb.errors_silent); __set_bit(e, c->sb.errors_silent);
if (e < sizeof(ext->errors_silent) * 8) if (e < sizeof(ext->errors_silent) * 8)
__set_bit_le64(e, ext->errors_silent); __set_bit_le64(e, ext->errors_silent);

View File

@ -7,12 +7,12 @@
const char * const bch2_sb_error_strs[] = { const char * const bch2_sb_error_strs[] = {
#define x(t, n, ...) [n] = #t, #define x(t, n, ...) [n] = #t,
BCH_SB_ERRS() BCH_SB_ERRS()
NULL #undef x
}; };
static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id) void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id id)
{ {
if (id < BCH_SB_ERR_MAX) if (id < BCH_FSCK_ERR_MAX)
prt_str(out, bch2_sb_error_strs[id]); prt_str(out, bch2_sb_error_strs[id]);
else else
prt_printf(out, "(unknown error %u)", id); prt_printf(out, "(unknown error %u)", id);

View File

@ -6,6 +6,8 @@
extern const char * const bch2_sb_error_strs[]; extern const char * const bch2_sb_error_strs[];
void bch2_sb_error_id_to_text(struct printbuf *, enum bch_sb_error_id);
extern const struct bch_sb_field_ops bch_sb_field_ops_errors; extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id); void bch2_sb_error_count(struct bch_fs *, enum bch_sb_error_id);

View File

@ -210,22 +210,23 @@ enum bch_fsck_flags {
x(inode_snapshot_mismatch, 196, 0) \ x(inode_snapshot_mismatch, 196, 0) \
x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \
x(inode_unlinked_and_not_open, 281, 0) \
x(inode_checksum_type_invalid, 199, 0) \ x(inode_checksum_type_invalid, 199, 0) \
x(inode_compression_type_invalid, 200, 0) \ x(inode_compression_type_invalid, 200, 0) \
x(inode_subvol_root_but_not_dir, 201, 0) \ x(inode_subvol_root_but_not_dir, 201, 0) \
x(inode_i_size_dirty_but_clean, 202, 0) \ x(inode_i_size_dirty_but_clean, 202, FSCK_AUTOFIX) \
x(inode_i_sectors_dirty_but_clean, 203, 0) \ x(inode_i_sectors_dirty_but_clean, 203, FSCK_AUTOFIX) \
x(inode_i_sectors_wrong, 204, 0) \ x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \
x(inode_dir_wrong_nlink, 205, 0) \ x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \
x(inode_dir_multiple_links, 206, 0) \ x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \
x(inode_multiple_links_but_nlink_0, 207, 0) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \
x(inode_wrong_backpointer, 208, 0) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \
x(inode_wrong_nlink, 209, 0) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \
x(inode_unreachable, 210, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \
x(deleted_inode_but_clean, 211, 0) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \
x(deleted_inode_missing, 212, 0) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \
x(deleted_inode_is_dir, 213, 0) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \
x(deleted_inode_not_unlinked, 214, 0) \ x(deleted_inode_not_unlinked, 214, FSCK_AUTOFIX) \
x(extent_overlapping, 215, 0) \ x(extent_overlapping, 215, 0) \
x(key_in_missing_inode, 216, 0) \ x(key_in_missing_inode, 216, 0) \
x(key_in_wrong_inode_type, 217, 0) \ x(key_in_wrong_inode_type, 217, 0) \
@ -255,7 +256,7 @@ enum bch_fsck_flags {
x(dir_loop, 241, 0) \ x(dir_loop, 241, 0) \
x(hash_table_key_duplicate, 242, 0) \ x(hash_table_key_duplicate, 242, 0) \
x(hash_table_key_wrong_offset, 243, 0) \ x(hash_table_key_wrong_offset, 243, 0) \
x(unlinked_inode_not_on_deleted_list, 244, 0) \ x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \
x(reflink_p_front_pad_bad, 245, 0) \ x(reflink_p_front_pad_bad, 245, 0) \
x(journal_entry_dup_same_device, 246, 0) \ x(journal_entry_dup_same_device, 246, 0) \
x(inode_bi_subvol_missing, 247, 0) \ x(inode_bi_subvol_missing, 247, 0) \
@ -270,7 +271,7 @@ enum bch_fsck_flags {
x(subvol_children_not_set, 256, 0) \ x(subvol_children_not_set, 256, 0) \
x(subvol_children_bad, 257, 0) \ x(subvol_children_bad, 257, 0) \
x(subvol_loop, 258, 0) \ x(subvol_loop, 258, 0) \
x(subvol_unreachable, 259, 0) \ x(subvol_unreachable, 259, FSCK_AUTOFIX) \
x(btree_node_bkey_bad_u64s, 260, 0) \ x(btree_node_bkey_bad_u64s, 260, 0) \
x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_node_topology_empty_interior_node, 261, 0) \
x(btree_ptr_v2_min_key_bad, 262, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \
@ -282,8 +283,8 @@ enum bch_fsck_flags {
x(btree_ptr_v2_written_0, 268, 0) \ x(btree_ptr_v2_written_0, 268, 0) \
x(subvol_snapshot_bad, 269, 0) \ x(subvol_snapshot_bad, 269, 0) \
x(subvol_inode_bad, 270, 0) \ x(subvol_inode_bad, 270, 0) \
x(alloc_key_stripe_sectors_wrong, 271, 0) \ x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \
x(accounting_mismatch, 272, 0) \ x(accounting_mismatch, 272, FSCK_AUTOFIX) \
x(accounting_replicas_not_marked, 273, 0) \ x(accounting_replicas_not_marked, 273, 0) \
x(invalid_btree_id, 274, 0) \ x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0) \ x(alloc_key_io_time_bad, 275, 0) \
@ -292,12 +293,14 @@ enum bch_fsck_flags {
x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \ x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \
x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \ x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
x(MAX, 284, 0)
enum bch_sb_error_id { enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n, #define x(t, n, ...) BCH_FSCK_ERR_##t = n,
BCH_SB_ERRS() BCH_SB_ERRS()
#undef x #undef x
BCH_SB_ERR_MAX
}; };
struct bch_sb_field_errors { struct bch_sb_field_errors {

View File

@ -169,11 +169,17 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
ret = -1 - SIX_LOCK_write; ret = -1 - SIX_LOCK_write;
} }
} else if (type == SIX_LOCK_write && lock->readers) { } else if (type == SIX_LOCK_write && lock->readers) {
if (try) { if (try)
atomic_add(SIX_LOCK_HELD_write, &lock->state); atomic_add(SIX_LOCK_HELD_write, &lock->state);
smp_mb__after_atomic();
}
/*
* Make sure atomic_add happens before pcpu_read_count and
* six_set_bitmask in slow path happens before pcpu_read_count.
*
* Paired with the smp_mb() in read lock fast path (per-cpu mode)
* and the one before atomic_read in read unlock path.
*/
smp_mb();
ret = !pcpu_read_count(lock); ret = !pcpu_read_count(lock);
if (try && !ret) { if (try && !ret) {

View File

@ -469,6 +469,7 @@ static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
u32 id = snapshot_root; u32 id = snapshot_root;
u32 subvol = 0, s; u32 subvol = 0, s;
rcu_read_lock();
while (id) { while (id) {
s = snapshot_t(c, id)->subvol; s = snapshot_t(c, id)->subvol;
@ -477,6 +478,7 @@ static u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root)
id = bch2_snapshot_tree_next(c, id); id = bch2_snapshot_tree_next(c, id);
} }
rcu_read_unlock();
return subvol; return subvol;
} }
@ -1782,6 +1784,7 @@ static int bch2_propagate_key_to_snapshot_leaf(struct btree_trans *trans,
new->k.p.snapshot = leaf_id; new->k.p.snapshot = leaf_id;
ret = bch2_trans_update(trans, &iter, new, 0); ret = bch2_trans_update(trans, &iter, new, 0);
out: out:
bch2_set_btree_iter_dontneed(&iter);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
} }

View File

@ -92,34 +92,32 @@ static int check_subvol(struct btree_trans *trans,
} }
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;
struct btree_iter inode_iter = {}; ret = bch2_inode_find_by_inum_nowarn_trans(trans,
ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
(subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) }, (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) },
0); &inode);
bch2_trans_iter_exit(trans, &inode_iter); if (!ret) {
if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset,
if (ret && !bch2_err_matches(ret, ENOENT)) trans, subvol_root_wrong_bi_subvol,
return ret; "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu",
inode.bi_inum, inode.bi_snapshot,
if (fsck_err_on(ret, inode.bi_subvol, subvol.k->p.offset)) {
trans, subvol_to_missing_root, inode.bi_subvol = subvol.k->p.offset;
"subvolume %llu points to missing subvolume root %llu:%u", ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot));
k.k->p.offset, le64_to_cpu(subvol.v->inode), if (ret)
le32_to_cpu(subvol.v->snapshot))) { goto err;
ret = bch2_subvolume_delete(trans, iter->pos.offset); }
bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); } else if (bch2_err_matches(ret, ENOENT)) {
return ret ?: -BCH_ERR_transaction_restart_nested; if (fsck_err(trans, subvol_to_missing_root,
} "subvolume %llu points to missing subvolume root %llu:%u",
k.k->p.offset, le64_to_cpu(subvol.v->inode),
if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset, le32_to_cpu(subvol.v->snapshot))) {
trans, subvol_root_wrong_bi_subvol, ret = bch2_subvolume_delete(trans, iter->pos.offset);
"subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu", bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
inode.bi_inum, inode_iter.k.p.snapshot, ret = ret ?: -BCH_ERR_transaction_restart_nested;
inode.bi_subvol, subvol.k->p.offset)) {
inode.bi_subvol = subvol.k->p.offset;
ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot));
if (ret)
goto err; goto err;
}
} else {
goto err;
} }
if (!BCH_SUBVOLUME_SNAP(subvol.v)) { if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
@ -137,7 +135,7 @@ static int check_subvol(struct btree_trans *trans,
"%s: snapshot tree %u not found", __func__, snapshot_tree); "%s: snapshot tree %u not found", __func__, snapshot_tree);
if (ret) if (ret)
return ret; goto err;
if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset,
trans, subvol_not_master_and_not_snapshot, trans, subvol_not_master_and_not_snapshot,
@ -147,7 +145,7 @@ static int check_subvol(struct btree_trans *trans,
bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
ret = PTR_ERR_OR_ZERO(s); ret = PTR_ERR_OR_ZERO(s);
if (ret) if (ret)
return ret; goto err;
SET_BCH_SUBVOLUME_SNAP(&s->v, true); SET_BCH_SUBVOLUME_SNAP(&s->v, true);
} }

View File

@ -799,8 +799,10 @@ retry:
i < layout.sb_offset + layout.nr_superblocks; i++) { i < layout.sb_offset + layout.nr_superblocks; i++) {
offset = le64_to_cpu(*i); offset = le64_to_cpu(*i);
if (offset == opt_get(*opts, sb)) if (offset == opt_get(*opts, sb)) {
ret = -BCH_ERR_invalid;
continue; continue;
}
ret = read_one_super(sb, offset, &err); ret = read_one_super(sb, offset, &err);
if (!ret) if (!ret)
@ -1188,7 +1190,8 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8); le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8);
prt_printf(out, "Errors to silently fix:\t"); prt_printf(out, "Errors to silently fix:\t");
prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8); prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent,
min(BCH_FSCK_ERR_MAX, sizeof(e->errors_silent) * 8));
prt_newline(out); prt_newline(out);
kfree(errors_silent); kfree(errors_silent);

View File

@ -394,7 +394,7 @@ static int insert_test_extent(struct bch_fs *c,
k.k_i.k.p.offset = end; k.k_i.k.p.offset = end;
k.k_i.k.p.snapshot = U32_MAX; k.k_i.k.p.snapshot = U32_MAX;
k.k_i.k.size = end - start; k.k_i.k.size = end - start;
k.k_i.k.version.lo = test_version++; k.k_i.k.bversion.lo = test_version++;
ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0); ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0);
bch_err_fn(c, ret); bch_err_fn(c, ret);