mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to f026e4e024
This commit is contained in:
parent
03bc9d71b1
commit
819f2dde79
@ -1 +1 @@
|
||||
3b4024f94489e4d8dc8eb7f1278754a2545f8026
|
||||
f026e4e0243cc10e721504a8bfaa131ea8aa4c91
|
||||
|
2
Makefile
2
Makefile
@ -78,7 +78,7 @@ SRCS=bcachefs.c \
|
||||
libbcachefs/dirent.c \
|
||||
libbcachefs/error.c \
|
||||
libbcachefs/extents.c \
|
||||
libbcachefs/fs-gc.c \
|
||||
libbcachefs/fsck.c \
|
||||
libbcachefs/inode.c \
|
||||
libbcachefs/io.c \
|
||||
libbcachefs/journal.c \
|
||||
|
@ -288,8 +288,8 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
|
||||
{
|
||||
}
|
||||
|
||||
extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter,
|
||||
struct bio *src, struct bvec_iter src_iter);
|
||||
extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||
struct bio *src, struct bvec_iter *src_iter);
|
||||
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
||||
extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
|
||||
|
||||
|
@ -458,6 +458,7 @@ enum {
|
||||
BCH_FS_BDEV_MOUNTED,
|
||||
BCH_FS_ERROR,
|
||||
BCH_FS_FSCK_FIXED_ERRORS,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_FIXED_GENS,
|
||||
};
|
||||
|
||||
@ -724,6 +725,11 @@ struct bch_fs {
|
||||
struct work_struct read_retry_work;
|
||||
spinlock_t read_retry_lock;
|
||||
|
||||
/* ERRORS */
|
||||
struct list_head fsck_errors;
|
||||
struct mutex fsck_error_lock;
|
||||
bool fsck_alloc_err;
|
||||
|
||||
/* FILESYSTEM */
|
||||
wait_queue_head_t writeback_wait;
|
||||
atomic_t writeback_pages;
|
||||
|
@ -89,18 +89,20 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
|
||||
ops->key_debugcheck(c, b, k);
|
||||
}
|
||||
|
||||
void bch2_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
char *buf, size_t size, struct bkey_s_c k)
|
||||
char *bch2_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
char *buf, size_t size, struct bkey_s_c k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
|
||||
if (k.k->type >= KEY_TYPE_GENERIC_NR &&
|
||||
ops->val_to_text)
|
||||
ops->val_to_text(c, buf, size, k);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
char *buf, size_t size, struct bkey_s_c k)
|
||||
char *bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
char *buf, size_t size, struct bkey_s_c k)
|
||||
{
|
||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||
char *out = buf, *end = buf + size;
|
||||
@ -109,9 +111,11 @@ void bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||
|
||||
if (k.k->type >= KEY_TYPE_GENERIC_NR &&
|
||||
ops->val_to_text) {
|
||||
out += scnprintf(out, end - out, " -> ");
|
||||
out += scnprintf(out, end - out, ": ");
|
||||
ops->val_to_text(c, out, end - out, k);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void bch2_bkey_swab(enum bkey_type type,
|
||||
|
@ -67,10 +67,10 @@ const char *bch2_btree_bkey_invalid(struct bch_fs *, struct btree *,
|
||||
struct bkey_s_c);
|
||||
|
||||
void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
|
||||
void bch2_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
char *, size_t, struct bkey_s_c);
|
||||
void bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
char *, size_t, struct bkey_s_c);
|
||||
char *bch2_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
char *, size_t, struct bkey_s_c);
|
||||
char *bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type,
|
||||
char *, size_t, struct bkey_s_c);
|
||||
|
||||
void bch2_bkey_swab(enum bkey_type, const struct bkey_format *,
|
||||
struct bkey_packed *);
|
||||
|
@ -91,6 +91,7 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
|
||||
six_lock_init(&b->lock);
|
||||
INIT_LIST_HEAD(&b->list);
|
||||
INIT_LIST_HEAD(&b->write_blocked);
|
||||
INIT_LIST_HEAD(&b->reachable);
|
||||
|
||||
mca_data_alloc(c, b, gfp);
|
||||
return b->data ? b : NULL;
|
||||
|
@ -605,10 +605,12 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
|
||||
bch2_btree_interior_update_will_free_node(c, as, old_nodes[i]);
|
||||
|
||||
/* Repack everything with @new_format and sort down to one bset */
|
||||
for (i = 0; i < nr_old_nodes; i++)
|
||||
for (i = 0; i < nr_old_nodes; i++) {
|
||||
new_nodes[i] =
|
||||
__bch2_btree_node_alloc_replacement(c, old_nodes[i],
|
||||
new_format, res);
|
||||
list_add(&new_nodes[i]->reachable, &as->reachable_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* Conceptually we concatenate the nodes together and slice them
|
||||
@ -645,6 +647,7 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
|
||||
|
||||
set_btree_bset_end(n1, n1->set);
|
||||
|
||||
list_del_init(&n2->reachable);
|
||||
six_unlock_write(&n2->lock);
|
||||
bch2_btree_node_free_never_inserted(c, n2);
|
||||
six_unlock_intent(&n2->lock);
|
||||
|
@ -872,32 +872,57 @@ static void bset_encrypt(struct bch_fs *c, struct bset *i, struct nonce nonce)
|
||||
vstruct_end(i) - (void *) i->_data);
|
||||
}
|
||||
|
||||
#define btree_node_error(b, c, ptr, fmt, ...) \
|
||||
bch2_fs_inconsistent(c, \
|
||||
"btree node error at btree %u level %u/%u bucket %zu block %u u64s %u: " fmt,\
|
||||
(b)->btree_id, (b)->level, btree_node_root(c, b) \
|
||||
? btree_node_root(c, b)->level : -1, \
|
||||
PTR_BUCKET_NR(ca, ptr), (b)->written, \
|
||||
le16_to_cpu((i)->u64s), ##__VA_ARGS__)
|
||||
#define btree_node_error(c, b, ptr, msg, ...) \
|
||||
do { \
|
||||
if (write == READ && \
|
||||
!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \
|
||||
mustfix_fsck_err(c, \
|
||||
"btree node read error at btree %u level %u/%u\n"\
|
||||
"sector %llu node offset %u bset u64s %u: " msg,\
|
||||
(b)->btree_id, (b)->level, \
|
||||
(c)->btree_roots[(b)->btree_id].level, \
|
||||
(u64) ptr->offset, (b)->written, \
|
||||
le16_to_cpu((i)->u64s), ##__VA_ARGS__); \
|
||||
} else { \
|
||||
bch_err(c, "%s at btree %u level %u/%u\n" \
|
||||
"sector %llu node offset %u bset u64s %u: " msg,\
|
||||
write == WRITE \
|
||||
? "corrupt metadata in btree node write" \
|
||||
: "btree node error", \
|
||||
(b)->btree_id, (b)->level, \
|
||||
(c)->btree_roots[(b)->btree_id].level, \
|
||||
(u64) ptr->offset, (b)->written, \
|
||||
le16_to_cpu((i)->u64s), ##__VA_ARGS__); \
|
||||
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
||||
goto fsck_err; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
||||
struct bch_dev *ca,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
struct bset *i, unsigned sectors,
|
||||
unsigned *whiteout_u64s)
|
||||
static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||
const struct bch_extent_ptr *ptr,
|
||||
struct bset *i, unsigned sectors,
|
||||
unsigned *whiteout_u64s,
|
||||
int write)
|
||||
{
|
||||
struct bkey_packed *k, *prev = NULL;
|
||||
struct bpos prev_pos = POS_MIN;
|
||||
bool seen_non_whiteout = false;
|
||||
int ret = 0;
|
||||
|
||||
if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION)
|
||||
return "unsupported bset version";
|
||||
if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION) {
|
||||
btree_node_error(c, b, ptr, "unsupported bset version");
|
||||
i->u64s = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (b->written + sectors > c->sb.btree_node_size)
|
||||
return "bset past end of btree node";
|
||||
if (b->written + sectors > c->sb.btree_node_size) {
|
||||
btree_node_error(c, b, ptr, "bset past end of btree node");
|
||||
i->u64s = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (i != &b->data->keys && !i->u64s)
|
||||
btree_node_error(b, c, ptr, "empty set");
|
||||
if (b->written && !i->u64s)
|
||||
btree_node_error(c, b, ptr, "empty set");
|
||||
|
||||
if (!BSET_SEPARATE_WHITEOUTS(i)) {
|
||||
seen_non_whiteout = true;
|
||||
@ -911,7 +936,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
||||
const char *invalid;
|
||||
|
||||
if (!k->u64s) {
|
||||
btree_node_error(b, c, ptr,
|
||||
btree_node_error(c, b, ptr,
|
||||
"KEY_U64s 0: %zu bytes of metadata lost",
|
||||
vstruct_end(i) - (void *) k);
|
||||
|
||||
@ -920,7 +945,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
if (bkey_next(k) > vstruct_last(i)) {
|
||||
btree_node_error(b, c, ptr,
|
||||
btree_node_error(c, b, ptr,
|
||||
"key extends past end of bset");
|
||||
|
||||
i->u64s = cpu_to_le16((u64 *) k - i->_data);
|
||||
@ -928,7 +953,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
if (k->format > KEY_FORMAT_CURRENT) {
|
||||
btree_node_error(b, c, ptr,
|
||||
btree_node_error(c, b, ptr,
|
||||
"invalid bkey format %u", k->format);
|
||||
|
||||
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
||||
@ -947,8 +972,8 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
||||
char buf[160];
|
||||
|
||||
bch2_bkey_val_to_text(c, btree_node_type(b),
|
||||
buf, sizeof(buf), u);
|
||||
btree_node_error(b, c, ptr,
|
||||
buf, sizeof(buf), u);
|
||||
btree_node_error(c, b, ptr,
|
||||
"invalid bkey %s: %s", buf, invalid);
|
||||
|
||||
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
||||
@ -969,7 +994,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
||||
*whiteout_u64s = k->_data - i->_data;
|
||||
seen_non_whiteout = true;
|
||||
} else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) {
|
||||
btree_node_error(b, c, ptr,
|
||||
btree_node_error(c, b, ptr,
|
||||
"keys out of order: %llu:%llu > %llu:%llu",
|
||||
prev_pos.inode,
|
||||
prev_pos.offset,
|
||||
@ -984,7 +1009,8 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
|
||||
return NULL;
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool extent_contains_ptr(struct bkey_s_c_extent e,
|
||||
@ -1012,7 +1038,7 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
|
||||
const char *err;
|
||||
struct bch_csum csum;
|
||||
struct nonce nonce;
|
||||
int ret;
|
||||
int ret, write = READ;
|
||||
|
||||
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
|
||||
__bch2_btree_node_iter_init(iter, btree_node_is_extents(b));
|
||||
@ -1115,9 +1141,10 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
|
||||
sectors = vstruct_sectors(bne, c->block_bits);
|
||||
}
|
||||
|
||||
err = validate_bset(c, b, ca, ptr, i, sectors, &whiteout_u64s);
|
||||
if (err)
|
||||
goto err;
|
||||
ret = validate_bset(c, b, ptr, i, sectors,
|
||||
&whiteout_u64s, READ);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
b->written += sectors;
|
||||
|
||||
@ -1172,8 +1199,10 @@ out:
|
||||
mempool_free(iter, &c->fill_iter);
|
||||
return;
|
||||
err:
|
||||
btree_node_error(c, b, ptr, "%s", err);
|
||||
fsck_err:
|
||||
bch2_inconsistent_error(c);
|
||||
set_btree_node_read_error(b);
|
||||
btree_node_error(b, c, ptr, "%s", err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1309,6 +1338,23 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
struct bset *i, unsigned sectors)
|
||||
{
|
||||
const struct bch_extent_ptr *ptr;
|
||||
unsigned whiteout_u64s = 0;
|
||||
int ret;
|
||||
|
||||
extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr)
|
||||
break;
|
||||
|
||||
ret = validate_bset(c, b, ptr, i, sectors, &whiteout_u64s, WRITE);
|
||||
if (ret)
|
||||
bch2_fatal_error(c);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
struct closure *parent,
|
||||
enum six_lock_type lock_type_held)
|
||||
@ -1343,18 +1389,24 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
if (!(old & (1 << BTREE_NODE_dirty)))
|
||||
return;
|
||||
|
||||
if (b->written &&
|
||||
!btree_node_may_write(b))
|
||||
return;
|
||||
|
||||
if (old & (1 << BTREE_NODE_write_in_flight)) {
|
||||
btree_node_wait_on_io(b);
|
||||
continue;
|
||||
}
|
||||
|
||||
new &= ~(1 << BTREE_NODE_dirty);
|
||||
new &= ~(1 << BTREE_NODE_need_write);
|
||||
new |= (1 << BTREE_NODE_write_in_flight);
|
||||
new |= (1 << BTREE_NODE_just_written);
|
||||
new ^= (1 << BTREE_NODE_write_idx);
|
||||
} while (cmpxchg_acquire(&b->flags, old, new) != old);
|
||||
|
||||
BUG_ON(!list_empty(&b->write_blocked));
|
||||
BUG_ON(!list_empty_careful(&b->reachable) != !b->written);
|
||||
|
||||
BUG_ON(b->written >= c->sb.btree_node_size);
|
||||
BUG_ON(bset_written(b, btree_bset_last(b)));
|
||||
@ -1430,13 +1482,17 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
|
||||
clear_needs_whiteout(i);
|
||||
|
||||
if (b->written && !i->u64s) {
|
||||
/* Nothing to write: */
|
||||
btree_bounce_free(c, order, used_mempool, data);
|
||||
btree_node_write_done(c, b);
|
||||
return;
|
||||
}
|
||||
/* do we have data to write? */
|
||||
if (b->written && !i->u64s)
|
||||
goto nowrite;
|
||||
|
||||
bytes_to_write = vstruct_end(i) - data;
|
||||
sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
|
||||
|
||||
memset(data + bytes_to_write, 0,
|
||||
(sectors_to_write << 9) - bytes_to_write);
|
||||
|
||||
BUG_ON(b->written + sectors_to_write > c->sb.btree_node_size);
|
||||
BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
|
||||
BUG_ON(i->seq != b->data->keys.seq);
|
||||
|
||||
@ -1445,6 +1501,11 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
|
||||
nonce = btree_nonce(b, i, b->written << 9);
|
||||
|
||||
/* if we're going to be encrypting, check metadata validity first: */
|
||||
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
|
||||
validate_bset_for_write(c, b, i, sectors_to_write))
|
||||
goto err;
|
||||
|
||||
if (bn) {
|
||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
|
||||
&bn->flags,
|
||||
@ -1464,15 +1525,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
||||
}
|
||||
|
||||
bytes_to_write = vstruct_end(i) - data;
|
||||
sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
|
||||
|
||||
memset(data + bytes_to_write, 0,
|
||||
(sectors_to_write << 9) - bytes_to_write);
|
||||
|
||||
BUG_ON(b->written + sectors_to_write > c->sb.btree_node_size);
|
||||
|
||||
trace_btree_write(b, bytes_to_write, sectors_to_write);
|
||||
/* if we're not encrypting, check metadata after checksumming: */
|
||||
if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
|
||||
validate_bset_for_write(c, b, i, sectors_to_write))
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* We handle btree write errors by immediately halting the journal -
|
||||
@ -1488,14 +1544,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
* break:
|
||||
*/
|
||||
if (bch2_journal_error(&c->journal) ||
|
||||
c->opts.nochanges) {
|
||||
set_btree_node_noevict(b);
|
||||
b->written += sectors_to_write;
|
||||
c->opts.nochanges)
|
||||
goto err;
|
||||
|
||||
btree_bounce_free(c, order, used_mempool, data);
|
||||
btree_node_write_done(c, b);
|
||||
return;
|
||||
}
|
||||
trace_btree_write(b, bytes_to_write, sectors_to_write);
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write);
|
||||
|
||||
@ -1543,6 +1595,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
b->written += sectors_to_write;
|
||||
|
||||
bch2_submit_wbio_replicas(wbio, c, &k.key);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
b->written += sectors_to_write;
|
||||
nowrite:
|
||||
btree_bounce_free(c, order, used_mempool, data);
|
||||
btree_node_write_done(c, b);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -27,7 +27,8 @@ static inline void btree_node_wait_on_io(struct btree *b)
|
||||
|
||||
static inline bool btree_node_may_write(struct btree *b)
|
||||
{
|
||||
return list_empty_careful(&b->write_blocked);
|
||||
return list_empty_careful(&b->write_blocked) &&
|
||||
list_empty_careful(&b->reachable);
|
||||
}
|
||||
|
||||
enum compact_mode {
|
||||
@ -80,6 +81,8 @@ void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
||||
#define bch2_btree_node_write_dirty(_c, _b, _cl, cond) \
|
||||
do { \
|
||||
while ((_b)->written && btree_node_dirty(_b) && (cond)) { \
|
||||
set_btree_node_need_write(_b); \
|
||||
\
|
||||
if (!btree_node_may_write(_b)) \
|
||||
break; \
|
||||
\
|
||||
|
@ -1109,6 +1109,26 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
|
||||
prefetch(c->btree_roots[btree_id].b);
|
||||
}
|
||||
|
||||
void bch2_btree_iter_unlink(struct btree_iter *iter)
|
||||
{
|
||||
struct btree_iter *linked;
|
||||
|
||||
__bch2_btree_iter_unlock(iter);
|
||||
|
||||
if (!btree_iter_linked(iter))
|
||||
return;
|
||||
|
||||
for_each_linked_btree_iter(iter, linked) {
|
||||
|
||||
if (linked->next == iter) {
|
||||
linked->next = iter->next;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BUG();
|
||||
}
|
||||
|
||||
void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new)
|
||||
{
|
||||
BUG_ON(btree_iter_linked(new));
|
||||
@ -1128,7 +1148,7 @@ void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new)
|
||||
|
||||
void bch2_btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
|
||||
{
|
||||
bch2_btree_iter_unlock(dst);
|
||||
__bch2_btree_iter_unlock(dst);
|
||||
memcpy(dst, src, offsetof(struct btree_iter, next));
|
||||
dst->nodes_locked = dst->nodes_intent_locked = 0;
|
||||
}
|
||||
|
@ -185,6 +185,7 @@ static inline void bch2_btree_iter_init_intent(struct btree_iter *iter,
|
||||
}
|
||||
|
||||
void bch2_btree_iter_link(struct btree_iter *, struct btree_iter *);
|
||||
void bch2_btree_iter_unlink(struct btree_iter *);
|
||||
void bch2_btree_iter_copy(struct btree_iter *, struct btree_iter *);
|
||||
|
||||
static inline struct bpos btree_type_successor(enum btree_id id,
|
||||
|
@ -110,6 +110,14 @@ struct btree {
|
||||
*/
|
||||
struct list_head write_blocked;
|
||||
|
||||
/*
|
||||
* Also for asynchronous splits/interior node updates:
|
||||
* If a btree node isn't reachable yet, we don't want to kick off
|
||||
* another write - because that write also won't yet be reachable and
|
||||
* marking it as completed before it's reachable would be incorrect:
|
||||
*/
|
||||
struct list_head reachable;
|
||||
|
||||
struct open_bucket *ob;
|
||||
|
||||
/* lru list */
|
||||
@ -136,6 +144,7 @@ enum btree_flags {
|
||||
BTREE_NODE_read_error,
|
||||
BTREE_NODE_write_error,
|
||||
BTREE_NODE_dirty,
|
||||
BTREE_NODE_need_write,
|
||||
BTREE_NODE_noevict,
|
||||
BTREE_NODE_write_idx,
|
||||
BTREE_NODE_accessed,
|
||||
@ -146,6 +155,7 @@ enum btree_flags {
|
||||
BTREE_FLAG(read_error);
|
||||
BTREE_FLAG(write_error);
|
||||
BTREE_FLAG(dirty);
|
||||
BTREE_FLAG(need_write);
|
||||
BTREE_FLAG(noevict);
|
||||
BTREE_FLAG(write_idx);
|
||||
BTREE_FLAG(accessed);
|
||||
|
@ -162,9 +162,11 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b,
|
||||
trace_btree_node_free(c, b);
|
||||
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
BUG_ON(btree_node_need_write(b));
|
||||
BUG_ON(b == btree_node_root(c, b));
|
||||
BUG_ON(b->ob);
|
||||
BUG_ON(!list_empty(&b->write_blocked));
|
||||
BUG_ON(!list_empty(&b->reachable));
|
||||
|
||||
clear_btree_node_noevict(b);
|
||||
|
||||
@ -589,7 +591,6 @@ struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
|
||||
unsigned nr_nodes = btree_reserve_required_nodes(depth) + extra_nodes;
|
||||
|
||||
return __bch2_btree_reserve_get(c, nr_nodes, flags, cl);
|
||||
|
||||
}
|
||||
|
||||
int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
||||
@ -598,6 +599,7 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
||||
struct closure cl;
|
||||
struct btree_reserve *reserve;
|
||||
struct btree *b;
|
||||
LIST_HEAD(reachable_list);
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
@ -614,11 +616,14 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
||||
}
|
||||
|
||||
b = __btree_root_alloc(c, 0, id, reserve);
|
||||
list_add(&b->reachable, &reachable_list);
|
||||
|
||||
bch2_btree_node_write(c, b, writes, SIX_LOCK_intent);
|
||||
|
||||
bch2_btree_set_root_initial(c, b, reserve);
|
||||
bch2_btree_open_bucket_put(c, b);
|
||||
|
||||
list_del_init(&b->reachable);
|
||||
six_unlock_intent(&b->lock);
|
||||
|
||||
bch2_btree_reserve_put(c, reserve);
|
||||
@ -659,6 +664,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_iter *iter,
|
||||
|
||||
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
|
||||
set_btree_node_dirty(b);
|
||||
set_btree_node_need_write(b);
|
||||
}
|
||||
|
||||
/* Inserting into a given leaf node (last stage of insert): */
|
||||
@ -798,12 +804,6 @@ void bch2_btree_journal_key(struct btree_insert *trans,
|
||||
u64 seq = trans->journal_res.seq;
|
||||
bool needs_whiteout = insert->k.needs_whiteout;
|
||||
|
||||
/*
|
||||
* have a bug where we're seeing an extent with an invalid crc
|
||||
* entry in the journal, trying to track it down:
|
||||
*/
|
||||
BUG_ON(bch2_bkey_invalid(c, b->btree_id, bkey_i_to_s_c(insert)));
|
||||
|
||||
/* ick */
|
||||
insert->k.needs_whiteout = false;
|
||||
bch2_journal_add_keys(j, &trans->journal_res,
|
||||
@ -878,6 +878,8 @@ bch2_btree_interior_update_alloc(struct bch_fs *c)
|
||||
closure_init(&as->cl, &c->cl);
|
||||
as->c = c;
|
||||
as->mode = BTREE_INTERIOR_NO_UPDATE;
|
||||
INIT_LIST_HEAD(&as->write_blocked_list);
|
||||
INIT_LIST_HEAD(&as->reachable_list);
|
||||
|
||||
bch2_keylist_init(&as->parent_keys, as->inline_keys,
|
||||
ARRAY_SIZE(as->inline_keys));
|
||||
@ -908,6 +910,18 @@ static void btree_interior_update_nodes_reachable(struct closure *cl)
|
||||
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
|
||||
while (!list_empty(&as->reachable_list)) {
|
||||
struct btree *b = list_first_entry(&as->reachable_list,
|
||||
struct btree, reachable);
|
||||
list_del_init(&b->reachable);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
six_lock_read(&b->lock);
|
||||
bch2_btree_node_write_dirty(c, b, NULL, btree_node_need_write(b));
|
||||
six_unlock_read(&b->lock);
|
||||
mutex_lock(&c->btree_interior_update_lock);
|
||||
}
|
||||
|
||||
for (i = 0; i < as->nr_pending; i++)
|
||||
bch2_btree_node_free_ondisk(c, &as->pending[i]);
|
||||
as->nr_pending = 0;
|
||||
@ -929,6 +943,7 @@ static void btree_interior_update_nodes_written(struct closure *cl)
|
||||
|
||||
if (bch2_journal_error(&c->journal)) {
|
||||
/* XXX what? */
|
||||
/* we don't want to free the nodes on disk, that's what */
|
||||
}
|
||||
|
||||
/* XXX: missing error handling, damnit */
|
||||
@ -962,7 +977,8 @@ retry:
|
||||
list_del(&as->write_blocked_list);
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
|
||||
bch2_btree_node_write_dirty(c, b, NULL, true);
|
||||
bch2_btree_node_write_dirty(c, b, NULL,
|
||||
btree_node_need_write(b));
|
||||
six_unlock_read(&b->lock);
|
||||
break;
|
||||
|
||||
@ -1135,6 +1151,7 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
||||
}
|
||||
|
||||
clear_btree_node_dirty(b);
|
||||
clear_btree_node_need_write(b);
|
||||
w = btree_current_write(b);
|
||||
|
||||
llist_for_each_entry_safe(cl, cl_n, llist_del_all(&w->wait.list), list)
|
||||
@ -1152,6 +1169,8 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
||||
&as->journal, interior_update_flush);
|
||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||
|
||||
if (!list_empty(&b->reachable))
|
||||
list_del_init(&b->reachable);
|
||||
|
||||
mutex_unlock(&c->btree_interior_update_lock);
|
||||
}
|
||||
@ -1265,7 +1284,8 @@ bch2_btree_insert_keys_interior(struct btree *b,
|
||||
* node)
|
||||
*/
|
||||
static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n1,
|
||||
struct btree_reserve *reserve)
|
||||
struct btree_reserve *reserve,
|
||||
struct btree_interior_update *as)
|
||||
{
|
||||
size_t nr_packed = 0, nr_unpacked = 0;
|
||||
struct btree *n2;
|
||||
@ -1273,6 +1293,8 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
|
||||
struct bkey_packed *k, *prev = NULL;
|
||||
|
||||
n2 = bch2_btree_node_alloc(iter->c, n1->level, iter->btree_id, reserve);
|
||||
list_add(&n2->reachable, &as->reachable_list);
|
||||
|
||||
n2->data->max_key = n1->data->max_key;
|
||||
n2->data->format = n1->format;
|
||||
n2->key.k.p = n1->key.k.p;
|
||||
@ -1421,13 +1443,15 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
|
||||
bch2_btree_interior_update_will_free_node(c, as, b);
|
||||
|
||||
n1 = bch2_btree_node_alloc_replacement(c, b, reserve);
|
||||
list_add(&n1->reachable, &as->reachable_list);
|
||||
|
||||
if (b->level)
|
||||
btree_split_insert_keys(iter, n1, insert_keys, reserve);
|
||||
|
||||
if (vstruct_blocks(n1->data, c->block_bits) > BTREE_SPLIT_THRESHOLD(c)) {
|
||||
trace_btree_node_split(c, b, b->nr.live_u64s);
|
||||
|
||||
n2 = __btree_split_node(iter, n1, reserve);
|
||||
n2 = __btree_split_node(iter, n1, reserve, as);
|
||||
|
||||
bch2_btree_build_aux_trees(n2);
|
||||
bch2_btree_build_aux_trees(n1);
|
||||
@ -1449,6 +1473,8 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
|
||||
n3 = __btree_root_alloc(c, b->level + 1,
|
||||
iter->btree_id,
|
||||
reserve);
|
||||
list_add(&n3->reachable, &as->reachable_list);
|
||||
|
||||
n3->sib_u64s[0] = U16_MAX;
|
||||
n3->sib_u64s[1] = U16_MAX;
|
||||
|
||||
@ -1748,6 +1774,8 @@ retry:
|
||||
bch2_btree_interior_update_will_free_node(c, as, m);
|
||||
|
||||
n = bch2_btree_node_alloc(c, b->level, b->btree_id, reserve);
|
||||
list_add(&n->reachable, &as->reachable_list);
|
||||
|
||||
n->data->min_key = prev->data->min_key;
|
||||
n->data->max_key = next->data->max_key;
|
||||
n->data->format = new_f;
|
||||
@ -1914,8 +1942,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
|
||||
int ret;
|
||||
|
||||
trans_for_each_entry(trans, i) {
|
||||
EBUG_ON(i->iter->level);
|
||||
EBUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
|
||||
BUG_ON(i->iter->level);
|
||||
BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
|
||||
}
|
||||
|
||||
sort(trans->entries, trans->nr, sizeof(trans->entries[0]),
|
||||
@ -2076,6 +2104,19 @@ err:
|
||||
goto out;
|
||||
}
|
||||
|
||||
int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags)
|
||||
{
|
||||
struct bkey_i k;
|
||||
|
||||
bkey_init(&k.k);
|
||||
k.k.p = iter->pos;
|
||||
|
||||
return bch2_btree_insert_at(iter->c, NULL, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_USE_RESERVE|flags,
|
||||
BTREE_INSERT_ENTRY(iter, &k));
|
||||
}
|
||||
|
||||
int bch2_btree_insert_list_at(struct btree_iter *iter,
|
||||
struct keylist *keys,
|
||||
struct disk_reservation *disk_res,
|
||||
@ -2104,45 +2145,6 @@ int bch2_btree_insert_list_at(struct btree_iter *iter,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_btree_insert_check_key - insert dummy key into btree
|
||||
*
|
||||
* We insert a random key on a cache miss, then compare exchange on it
|
||||
* once the cache promotion or backing device read completes. This
|
||||
* ensures that if this key is written to after the read, the read will
|
||||
* lose and not overwrite the key with stale data.
|
||||
*
|
||||
* Return values:
|
||||
* -EAGAIN: @iter->cl was put on a waitlist waiting for btree node allocation
|
||||
* -EINTR: btree node was changed while upgrading to write lock
|
||||
*/
|
||||
int bch2_btree_insert_check_key(struct btree_iter *iter,
|
||||
struct bkey_i *check_key)
|
||||
{
|
||||
struct bpos saved_pos = iter->pos;
|
||||
struct bkey_i_cookie *cookie;
|
||||
BKEY_PADDED(key) tmp;
|
||||
int ret;
|
||||
|
||||
BUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&check_key->k)));
|
||||
|
||||
check_key->k.type = KEY_TYPE_COOKIE;
|
||||
set_bkey_val_bytes(&check_key->k, sizeof(struct bch_cookie));
|
||||
|
||||
cookie = bkey_i_to_cookie(check_key);
|
||||
get_random_bytes(&cookie->v, sizeof(cookie->v));
|
||||
|
||||
bkey_copy(&tmp.key, check_key);
|
||||
|
||||
ret = bch2_btree_insert_at(iter->c, NULL, NULL, NULL,
|
||||
BTREE_INSERT_ATOMIC,
|
||||
BTREE_INSERT_ENTRY(iter, &tmp.key));
|
||||
|
||||
bch2_btree_iter_rewind(iter, saved_pos);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_btree_insert - insert keys into the extent btree
|
||||
* @c: pointer to struct bch_fs
|
||||
@ -2310,6 +2312,7 @@ int bch2_btree_node_rewrite(struct btree_iter *iter, struct btree *b,
|
||||
bch2_btree_interior_update_will_free_node(c, as, b);
|
||||
|
||||
n = bch2_btree_node_alloc_replacement(c, b, reserve);
|
||||
list_add(&n->reachable, &as->reachable_list);
|
||||
|
||||
bch2_btree_build_aux_trees(n);
|
||||
six_unlock_write(&n->lock);
|
||||
|
@ -64,7 +64,7 @@ struct pending_btree_node_free {
|
||||
*/
|
||||
struct btree_interior_update {
|
||||
struct closure cl;
|
||||
struct bch_fs *c;
|
||||
struct bch_fs *c;
|
||||
|
||||
struct list_head list;
|
||||
|
||||
@ -86,6 +86,7 @@ struct btree_interior_update {
|
||||
*/
|
||||
struct btree *b;
|
||||
struct list_head write_blocked_list;
|
||||
struct list_head reachable_list;
|
||||
|
||||
/*
|
||||
* BTREE_INTERIOR_UPDATING_AS: btree node we updated was freed, so now
|
||||
@ -317,7 +318,6 @@ struct btree_insert {
|
||||
|
||||
int __bch2_btree_insert_at(struct btree_insert *);
|
||||
|
||||
|
||||
#define _TENTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N
|
||||
#define COUNT_ARGS(...) _TENTH_ARG(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
|
||||
|
||||
@ -380,6 +380,8 @@ int __bch2_btree_insert_at(struct btree_insert *);
|
||||
*/
|
||||
#define BTREE_INSERT_JOURNAL_REPLAY (1 << 3)
|
||||
|
||||
int bch2_btree_delete_at(struct btree_iter *, unsigned);
|
||||
|
||||
int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *,
|
||||
struct disk_reservation *,
|
||||
struct extent_insert_hook *, u64 *, unsigned);
|
||||
@ -403,7 +405,6 @@ static inline bool journal_res_insert_fits(struct btree_insert *trans,
|
||||
return u64s <= trans->journal_res.u64s;
|
||||
}
|
||||
|
||||
int bch2_btree_insert_check_key(struct btree_iter *, struct bkey_i *);
|
||||
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
|
||||
struct disk_reservation *,
|
||||
struct extent_insert_hook *, u64 *, int flags);
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include "debug.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "fs-gc.h"
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "io.h"
|
||||
#include "super.h"
|
||||
|
@ -20,6 +20,11 @@ unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
|
||||
return len;
|
||||
}
|
||||
|
||||
static unsigned dirent_val_u64s(unsigned len)
|
||||
{
|
||||
return DIV_ROUND_UP(sizeof(struct bch_dirent) + len, sizeof(u64));
|
||||
}
|
||||
|
||||
static u64 bch2_dirent_hash(const struct bch_hash_info *info,
|
||||
const struct qstr *name)
|
||||
{
|
||||
@ -64,7 +69,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
|
||||
return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
|
||||
}
|
||||
|
||||
static const struct bch_hash_desc dirent_hash_desc = {
|
||||
const struct bch_hash_desc bch2_dirent_hash_desc = {
|
||||
.btree_id = BTREE_ID_DIRENTS,
|
||||
.key_type = BCH_DIRENT,
|
||||
.whiteout_type = BCH_DIRENT_WHITEOUT,
|
||||
@ -77,12 +82,30 @@ static const struct bch_hash_desc dirent_hash_desc = {
|
||||
static const char *bch2_dirent_invalid(const struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_dirent d;
|
||||
unsigned len;
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_DIRENT:
|
||||
return bkey_val_bytes(k.k) < sizeof(struct bch_dirent)
|
||||
? "value too small"
|
||||
: NULL;
|
||||
if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
|
||||
return "value too small";
|
||||
|
||||
d = bkey_s_c_to_dirent(k);
|
||||
len = bch2_dirent_name_bytes(d);
|
||||
|
||||
if (!len)
|
||||
return "empty name";
|
||||
|
||||
if (bkey_val_u64s(k.k) > dirent_val_u64s(len))
|
||||
return "value too big";
|
||||
|
||||
if (len > NAME_MAX)
|
||||
return "dirent name too big";
|
||||
|
||||
if (memchr(d.v->d_name, '/', len))
|
||||
return "dirent name has invalid characters";
|
||||
|
||||
return NULL;
|
||||
case BCH_DIRENT_WHITEOUT:
|
||||
return bkey_val_bytes(k.k) != 0
|
||||
? "value size should be zero"
|
||||
@ -97,21 +120,15 @@ static void bch2_dirent_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_dirent d;
|
||||
size_t n = 0;
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_DIRENT:
|
||||
d = bkey_s_c_to_dirent(k);
|
||||
|
||||
if (size) {
|
||||
unsigned n = min_t(unsigned, size,
|
||||
bch2_dirent_name_bytes(d));
|
||||
memcpy(buf, d.v->d_name, n);
|
||||
buf[size - 1] = '\0';
|
||||
buf += n;
|
||||
size -= n;
|
||||
}
|
||||
|
||||
scnprintf(buf, size, " -> %llu", d.v->d_inum);
|
||||
n += bch_scnmemcpy(buf + n, size - n, d.v->d_name,
|
||||
bch2_dirent_name_bytes(d));
|
||||
n += scnprintf(buf + n, size - n, " -> %llu", d.v->d_inum);
|
||||
break;
|
||||
case BCH_DIRENT_WHITEOUT:
|
||||
scnprintf(buf, size, "whiteout");
|
||||
@ -128,9 +145,7 @@ static struct bkey_i_dirent *dirent_create_key(u8 type,
|
||||
const struct qstr *name, u64 dst)
|
||||
{
|
||||
struct bkey_i_dirent *dirent;
|
||||
unsigned u64s = BKEY_U64s +
|
||||
DIV_ROUND_UP(sizeof(struct bch_dirent) + name->len,
|
||||
sizeof(u64));
|
||||
unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
|
||||
|
||||
dirent = kmalloc(u64s * sizeof(u64), GFP_NOFS);
|
||||
if (!dirent)
|
||||
@ -163,7 +178,7 @@ int bch2_dirent_create(struct bch_fs *c, u64 dir_inum,
|
||||
if (!dirent)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = bch2_hash_set(dirent_hash_desc, hash_info, c, dir_inum,
|
||||
ret = bch2_hash_set(bch2_dirent_hash_desc, hash_info, c, dir_inum,
|
||||
journal_seq, &dirent->k_i, flags);
|
||||
kfree(dirent);
|
||||
|
||||
@ -223,13 +238,13 @@ retry:
|
||||
* from the original hashed position (like we do when creating dirents,
|
||||
* in bch_hash_set) - we never move existing dirents to different slot:
|
||||
*/
|
||||
old_src = bch2_hash_lookup_at(dirent_hash_desc,
|
||||
old_src = bch2_hash_lookup_at(bch2_dirent_hash_desc,
|
||||
&src_ei->str_hash,
|
||||
&src_iter, src_name);
|
||||
if ((ret = btree_iter_err(old_src)))
|
||||
goto err;
|
||||
|
||||
ret = bch2_hash_needs_whiteout(dirent_hash_desc,
|
||||
ret = bch2_hash_needs_whiteout(bch2_dirent_hash_desc,
|
||||
&src_ei->str_hash,
|
||||
&whiteout_iter, &src_iter);
|
||||
if (ret < 0)
|
||||
@ -242,8 +257,8 @@ retry:
|
||||
* to do that check for us for correctness:
|
||||
*/
|
||||
old_dst = mode == BCH_RENAME
|
||||
? bch2_hash_hole_at(dirent_hash_desc, &dst_iter)
|
||||
: bch2_hash_lookup_at(dirent_hash_desc,
|
||||
? bch2_hash_hole_at(bch2_dirent_hash_desc, &dst_iter)
|
||||
: bch2_hash_lookup_at(bch2_dirent_hash_desc,
|
||||
&dst_ei->str_hash,
|
||||
&dst_iter, dst_name);
|
||||
if ((ret = btree_iter_err(old_dst)))
|
||||
@ -330,7 +345,7 @@ int bch2_dirent_delete(struct bch_fs *c, u64 dir_inum,
|
||||
const struct qstr *name,
|
||||
u64 *journal_seq)
|
||||
{
|
||||
return bch2_hash_delete(dirent_hash_desc, hash_info,
|
||||
return bch2_hash_delete(bch2_dirent_hash_desc, hash_info,
|
||||
c, dir_inum, journal_seq, name);
|
||||
}
|
||||
|
||||
@ -342,7 +357,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
|
||||
struct bkey_s_c k;
|
||||
u64 inum;
|
||||
|
||||
k = bch2_hash_lookup(dirent_hash_desc, hash_info, c,
|
||||
k = bch2_hash_lookup(bch2_dirent_hash_desc, hash_info, c,
|
||||
dir_inum, &iter, name);
|
||||
if (IS_ERR(k.k)) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
@ -1,6 +1,9 @@
|
||||
#ifndef _BCACHE_DIRENT_H
|
||||
#define _BCACHE_DIRENT_H
|
||||
|
||||
#include "str_hash.h"
|
||||
|
||||
extern const struct bch_hash_desc bch2_dirent_hash_desc;
|
||||
extern const struct bkey_ops bch2_bkey_dirent_ops;
|
||||
|
||||
struct qstr;
|
||||
|
@ -49,3 +49,102 @@ void bch2_nonfatal_io_error(struct bch_dev *ca)
|
||||
{
|
||||
queue_work(system_long_wq, &ca->io_error_work);
|
||||
}
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#define ask_yn() false
|
||||
#else
|
||||
#include "tools-util.h"
|
||||
#endif
|
||||
|
||||
enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct fsck_err_state *s;
|
||||
va_list args;
|
||||
bool fix = false, print = true, suppressing = false;
|
||||
char _buf[sizeof(s->buf)], *buf = _buf;
|
||||
|
||||
mutex_lock(&c->fsck_error_lock);
|
||||
|
||||
if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
|
||||
goto print;
|
||||
|
||||
list_for_each_entry(s, &c->fsck_errors, list)
|
||||
if (s->fmt == fmt)
|
||||
goto found;
|
||||
|
||||
s = kzalloc(sizeof(*s), GFP_KERNEL);
|
||||
if (!s) {
|
||||
if (!c->fsck_alloc_err)
|
||||
bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
|
||||
c->fsck_alloc_err = true;
|
||||
buf = _buf;
|
||||
goto print;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&s->list);
|
||||
s->fmt = fmt;
|
||||
found:
|
||||
list_move(&s->list, &c->fsck_errors);
|
||||
s->nr++;
|
||||
suppressing = s->nr == 10;
|
||||
print = s->nr <= 10;
|
||||
buf = s->buf;
|
||||
print:
|
||||
va_start(args, fmt);
|
||||
vscnprintf(buf, sizeof(_buf), fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (flags & FSCK_CAN_FIX) {
|
||||
if (c->opts.fix_errors == FSCK_ERR_ASK) {
|
||||
printk(KERN_ERR "%s: fix?", buf);
|
||||
fix = ask_yn();
|
||||
} else if (c->opts.fix_errors == FSCK_ERR_YES ||
|
||||
(c->opts.nochanges &&
|
||||
!(flags & FSCK_CAN_IGNORE))) {
|
||||
if (print)
|
||||
bch_err(c, "%s, fixing", buf);
|
||||
fix = true;
|
||||
} else {
|
||||
if (print)
|
||||
bch_err(c, "%s, not fixing", buf);
|
||||
fix = false;
|
||||
}
|
||||
} else if (flags & FSCK_NEED_FSCK) {
|
||||
if (print)
|
||||
bch_err(c, "%s (run fsck to correct)", buf);
|
||||
} else {
|
||||
if (print)
|
||||
bch_err(c, "%s (repair unimplemented)", buf);
|
||||
}
|
||||
|
||||
if (suppressing)
|
||||
bch_err(c, "Ratelimiting new instances of previous error");
|
||||
|
||||
mutex_unlock(&c->fsck_error_lock);
|
||||
|
||||
if (fix)
|
||||
set_bit(BCH_FS_FSCK_FIXED_ERRORS, &c->flags);
|
||||
|
||||
return fix ? FSCK_ERR_FIX
|
||||
: flags & FSCK_CAN_IGNORE ? FSCK_ERR_IGNORE
|
||||
: FSCK_ERR_EXIT;
|
||||
}
|
||||
|
||||
void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||
{
|
||||
struct fsck_err_state *s, *n;
|
||||
|
||||
mutex_lock(&c->fsck_error_lock);
|
||||
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||
|
||||
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
|
||||
if (s->nr > 10)
|
||||
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf);
|
||||
|
||||
list_del(&s->list);
|
||||
kfree(s);
|
||||
}
|
||||
|
||||
mutex_unlock(&c->fsck_error_lock);
|
||||
}
|
||||
|
@ -95,62 +95,38 @@ enum {
|
||||
BCH_FSCK_UNKNOWN_VERSION = 4,
|
||||
};
|
||||
|
||||
/* These macros return true if error should be fixed: */
|
||||
|
||||
/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
|
||||
|
||||
enum fsck_err_opts {
|
||||
FSCK_ERR_NO,
|
||||
FSCK_ERR_YES,
|
||||
FSCK_ERR_ASK,
|
||||
};
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#define __fsck_err_should_fix(c, msg, ...) \
|
||||
({ \
|
||||
bool _fix = (c)->opts.fix_errors; \
|
||||
bch_err(c, msg ", %sfixing", ##__VA_ARGS__, _fix ? "" : "not ");\
|
||||
_fix; \
|
||||
})
|
||||
#else
|
||||
#include "tools-util.h"
|
||||
enum fsck_err_ret {
|
||||
FSCK_ERR_IGNORE = 0,
|
||||
FSCK_ERR_FIX = 1,
|
||||
FSCK_ERR_EXIT = 2,
|
||||
};
|
||||
|
||||
#define __fsck_err_should_fix(c, msg, ...) \
|
||||
({ \
|
||||
bool _fix = false; \
|
||||
switch ((c)->opts.fix_errors) { \
|
||||
case FSCK_ERR_ASK: \
|
||||
printf(msg ": fix?", ##__VA_ARGS__); \
|
||||
_fix = ask_yn(); \
|
||||
break; \
|
||||
case FSCK_ERR_YES: \
|
||||
bch_err(c, msg ", fixing", ##__VA_ARGS__); \
|
||||
_fix = true; \
|
||||
break; \
|
||||
case FSCK_ERR_NO: \
|
||||
bch_err(c, msg, ##__VA_ARGS__); \
|
||||
_fix = false; \
|
||||
break; \
|
||||
} \
|
||||
_fix; \
|
||||
})
|
||||
#endif
|
||||
struct fsck_err_state {
|
||||
struct list_head list;
|
||||
const char *fmt;
|
||||
u64 nr;
|
||||
char buf[512];
|
||||
};
|
||||
|
||||
#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \
|
||||
#define FSCK_CAN_FIX (1 << 0)
|
||||
#define FSCK_CAN_IGNORE (1 << 1)
|
||||
#define FSCK_NEED_FSCK (1 << 2)
|
||||
|
||||
enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
|
||||
unsigned, const char *, ...);
|
||||
void bch2_flush_fsck_errs(struct bch_fs *);
|
||||
|
||||
#define __fsck_err(c, _flags, msg, ...) \
|
||||
({ \
|
||||
bool _fix; \
|
||||
int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\
|
||||
\
|
||||
if (_can_fix) { \
|
||||
_fix = __fsck_err_should_fix(c, msg, ##__VA_ARGS__); \
|
||||
} else { \
|
||||
bch_err(c, msg " ("_nofix_msg")", ##__VA_ARGS__); \
|
||||
_fix = false; \
|
||||
} \
|
||||
\
|
||||
if (_fix) \
|
||||
set_bit(BCH_FS_FSCK_FIXED_ERRORS, &(c)->flags); \
|
||||
\
|
||||
if (!_fix && !_can_ignore) { \
|
||||
if (_fix == FSCK_ERR_EXIT) { \
|
||||
bch_err(c, "Unable to continue, halting"); \
|
||||
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
||||
goto fsck_err; \
|
||||
@ -159,24 +135,27 @@ enum fsck_err_opts {
|
||||
_fix; \
|
||||
})
|
||||
|
||||
#define __fsck_err_on(cond, c, _can_fix, _can_ignore, _nofix_msg, ...) \
|
||||
((cond) ? __fsck_err(c, _can_fix, _can_ignore, \
|
||||
_nofix_msg, ##__VA_ARGS__) : false)
|
||||
/* These macros return true if error should be fixed: */
|
||||
|
||||
/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
|
||||
|
||||
#define __fsck_err_on(cond, c, _flags, ...) \
|
||||
((cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false)
|
||||
|
||||
#define unfixable_fsck_err_on(cond, c, ...) \
|
||||
__fsck_err_on(cond, c, false, true, "repair unimplemented", ##__VA_ARGS__)
|
||||
__fsck_err_on(cond, c, FSCK_CAN_IGNORE, ##__VA_ARGS__)
|
||||
|
||||
#define need_fsck_err_on(cond, c, ...) \
|
||||
__fsck_err_on(cond, c, false, true, "run fsck to correct", ##__VA_ARGS__)
|
||||
__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
|
||||
|
||||
#define mustfix_fsck_err(c, ...) \
|
||||
__fsck_err(c, true, false, "not fixing", ##__VA_ARGS__)
|
||||
__fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
|
||||
|
||||
#define mustfix_fsck_err_on(cond, c, ...) \
|
||||
__fsck_err_on(cond, c, true, false, "not fixing", ##__VA_ARGS__)
|
||||
__fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
|
||||
|
||||
#define fsck_err_on(cond, c, ...) \
|
||||
__fsck_err_on(cond, c, true, true, "not fixing", ##__VA_ARGS__)
|
||||
__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
|
||||
|
||||
/*
|
||||
* Fatal errors: these don't indicate a bug, but we can't continue running in RW
|
||||
|
@ -5,8 +5,8 @@
|
||||
#include "clock.h"
|
||||
#include "error.h"
|
||||
#include "fs.h"
|
||||
#include "fs-gc.h"
|
||||
#include "fs-io.h"
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "journal.h"
|
||||
#include "io.h"
|
||||
|
@ -7,8 +7,8 @@
|
||||
#include "dirent.h"
|
||||
#include "extents.h"
|
||||
#include "fs.h"
|
||||
#include "fs-gc.h"
|
||||
#include "fs-io.h"
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
|
@ -4,10 +4,11 @@
|
||||
#include "dirent.h"
|
||||
#include "error.h"
|
||||
#include "fs.h"
|
||||
#include "fs-gc.h"
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "keylist.h"
|
||||
#include "super.h"
|
||||
#include "xattr.h"
|
||||
|
||||
#include <linux/dcache.h> /* struct qstr */
|
||||
#include <linux/generic-radix-tree.h>
|
||||
@ -37,12 +38,16 @@ static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
|
||||
bch2_btree_iter_unlock(iter);
|
||||
|
||||
ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
dir_hash_info = bch2_hash_info_init(c, &dir_inode);
|
||||
|
||||
ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
|
||||
if (ret)
|
||||
bch_err(c, "remove_dirent: err %i deleting dirent", ret);
|
||||
err:
|
||||
kfree(buf);
|
||||
return ret;
|
||||
@ -108,6 +113,118 @@ static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct hash_check {
|
||||
struct bch_hash_info info;
|
||||
struct btree_iter chain;
|
||||
struct btree_iter iter;
|
||||
u64 next;
|
||||
};
|
||||
|
||||
static void hash_check_init(const struct bch_hash_desc desc,
|
||||
struct hash_check *h, struct bch_fs *c)
|
||||
{
|
||||
bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN);
|
||||
bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN);
|
||||
}
|
||||
|
||||
static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
|
||||
const struct bch_inode_unpacked *bi)
|
||||
{
|
||||
h->info = bch2_hash_info_init(c, bi);
|
||||
h->next = -1;
|
||||
}
|
||||
|
||||
static int hash_redo_key(const struct bch_hash_desc desc,
|
||||
struct hash_check *h, struct bch_fs *c,
|
||||
struct btree_iter *k_iter, struct bkey_s_c k,
|
||||
u64 hashed)
|
||||
{
|
||||
struct bkey_i *tmp;
|
||||
int ret = 0;
|
||||
|
||||
tmp = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
bkey_reassemble(tmp, k);
|
||||
|
||||
ret = bch2_btree_delete_at(k_iter, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_btree_iter_unlock(k_iter);
|
||||
|
||||
bch2_hash_set(desc, &h->info, c, k_iter->pos.inode, NULL,
|
||||
tmp, BCH_HASH_SET_MUST_CREATE);
|
||||
err:
|
||||
kfree(tmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int hash_check_key(const struct bch_hash_desc desc,
|
||||
struct hash_check *h, struct bch_fs *c,
|
||||
struct btree_iter *k_iter, struct bkey_s_c k)
|
||||
{
|
||||
char buf[200];
|
||||
u64 hashed;
|
||||
int ret = 0;
|
||||
|
||||
if (k.k->type != desc.whiteout_type &&
|
||||
k.k->type != desc.key_type)
|
||||
return 0;
|
||||
|
||||
if (k.k->p.offset != h->next) {
|
||||
if (!btree_iter_linked(&h->chain)) {
|
||||
bch2_btree_iter_link(k_iter, &h->chain);
|
||||
bch2_btree_iter_link(k_iter, &h->iter);
|
||||
}
|
||||
bch2_btree_iter_copy(&h->chain, k_iter);
|
||||
}
|
||||
h->next = k.k->p.offset + 1;
|
||||
|
||||
if (k.k->type != desc.key_type)
|
||||
return 0;
|
||||
|
||||
hashed = desc.hash_bkey(&h->info, k);
|
||||
|
||||
if (fsck_err_on(hashed < h->chain.pos.offset ||
|
||||
hashed > k.k->p.offset, c,
|
||||
"hash table key at wrong offset: %llu, "
|
||||
"hashed to %llu chain starts at %llu\n%s",
|
||||
k.k->p.offset, hashed, h->chain.pos.offset,
|
||||
bch2_bkey_val_to_text(c, desc.btree_id,
|
||||
buf, sizeof(buf), k))) {
|
||||
ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
|
||||
if (ret) {
|
||||
bch_err(c, "hash_redo_key err %i", ret);
|
||||
return ret;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!bkey_cmp(h->chain.pos, k_iter->pos))
|
||||
return 0;
|
||||
|
||||
bch2_btree_iter_copy(&h->iter, &h->chain);
|
||||
while (bkey_cmp(h->iter.pos, k_iter->pos) < 0) {
|
||||
struct bkey_s_c k2 = bch2_btree_iter_peek(&h->iter);
|
||||
|
||||
if (fsck_err_on(k2.k->type == desc.key_type &&
|
||||
!desc.cmp_bkey(k, k2), c,
|
||||
"duplicate hash table keys:\n%s",
|
||||
bch2_bkey_val_to_text(c, desc.btree_id,
|
||||
buf, sizeof(buf), k))) {
|
||||
ret = bch2_hash_delete_at(desc, &h->info, &h->iter, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
return 1;
|
||||
}
|
||||
bch2_btree_iter_advance_pos(&h->iter);
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk extents: verify that extents have a corresponding S_ISREG inode, and
|
||||
* that i_size an i_sectors are consistent
|
||||
@ -130,14 +247,18 @@ static int check_extents(struct bch_fs *c)
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
unfixable_fsck_err_on(!w.have_inode, c,
|
||||
if (fsck_err_on(!w.have_inode, c,
|
||||
"extent type %u for missing inode %llu",
|
||||
k.k->type, k.k->p.inode);
|
||||
|
||||
unfixable_fsck_err_on(w.have_inode &&
|
||||
k.k->type, k.k->p.inode) ||
|
||||
fsck_err_on(w.have_inode &&
|
||||
!S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c,
|
||||
"extent type %u for non regular file, inode %llu mode %o",
|
||||
k.k->type, k.k->p.inode, w.inode.i_mode);
|
||||
k.k->type, k.k->p.inode, w.inode.i_mode)) {
|
||||
ret = bch2_btree_delete_at(&iter, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
continue;
|
||||
}
|
||||
|
||||
unfixable_fsck_err_on(w.first_this_inode &&
|
||||
w.have_inode &&
|
||||
@ -154,6 +275,7 @@ static int check_extents(struct bch_fs *c)
|
||||
"extent type %u offset %llu past end of inode %llu, i_size %llu",
|
||||
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size);
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||
}
|
||||
@ -166,10 +288,15 @@ noinline_for_stack
|
||||
static int check_dirents(struct bch_fs *c)
|
||||
{
|
||||
struct inode_walker w = inode_walker_init();
|
||||
struct hash_check h;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
unsigned name_len;
|
||||
char buf[200];
|
||||
int ret = 0;
|
||||
|
||||
hash_check_init(bch2_dirent_hash_desc, &h, c);
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
|
||||
POS(BCACHE_ROOT_INO, 0), k) {
|
||||
struct bkey_s_c_dirent d;
|
||||
@ -181,13 +308,32 @@ static int check_dirents(struct bch_fs *c)
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
unfixable_fsck_err_on(!w.have_inode, c,
|
||||
"dirent in nonexisting directory %llu",
|
||||
k.k->p.inode);
|
||||
if (fsck_err_on(!w.have_inode, c,
|
||||
"dirent in nonexisting directory:\n%s",
|
||||
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||
buf, sizeof(buf), k)) ||
|
||||
fsck_err_on(!S_ISDIR(w.inode.i_mode), c,
|
||||
"dirent in non directory inode type %u:\n%s",
|
||||
mode_to_type(w.inode.i_mode),
|
||||
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||
buf, sizeof(buf), k))) {
|
||||
ret = bch2_btree_delete_at(&iter, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
continue;
|
||||
}
|
||||
|
||||
unfixable_fsck_err_on(!S_ISDIR(w.inode.i_mode), c,
|
||||
"dirent in non directory inode %llu, type %u",
|
||||
k.k->p.inode, mode_to_type(w.inode.i_mode));
|
||||
if (w.first_this_inode && w.have_inode)
|
||||
hash_check_set_inode(&h, c, &w.inode);
|
||||
|
||||
ret = hash_check_key(bch2_dirent_hash_desc, &h, c, &iter, k);
|
||||
if (ret > 0) {
|
||||
ret = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
|
||||
if (k.k->type != BCH_DIRENT)
|
||||
continue;
|
||||
@ -195,8 +341,25 @@ static int check_dirents(struct bch_fs *c)
|
||||
d = bkey_s_c_to_dirent(k);
|
||||
d_inum = le64_to_cpu(d.v->d_inum);
|
||||
|
||||
name_len = bch2_dirent_name_bytes(d);
|
||||
|
||||
if (fsck_err_on(!name_len, c, "empty dirent") ||
|
||||
fsck_err_on(name_len == 1 &&
|
||||
!memcmp(d.v->d_name, ".", 1), c,
|
||||
". dirent") ||
|
||||
fsck_err_on(name_len == 2 &&
|
||||
!memcmp(d.v->d_name, "..", 2), c,
|
||||
".. dirent")) {
|
||||
ret = remove_dirent(c, &iter, d);
|
||||
if (ret)
|
||||
goto err;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fsck_err_on(d_inum == d.k->p.inode, c,
|
||||
"dirent points to own directory")) {
|
||||
"dirent points to own directory:\n%s",
|
||||
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||
buf, sizeof(buf), k))) {
|
||||
ret = remove_dirent(c, &iter, d);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -211,8 +374,9 @@ static int check_dirents(struct bch_fs *c)
|
||||
ret = 0;
|
||||
|
||||
if (fsck_err_on(!have_target, c,
|
||||
"dirent points to missing inode %llu, type %u filename %s",
|
||||
d_inum, d.v->d_type, d.v->d_name)) {
|
||||
"dirent points to missing inode:\n%s",
|
||||
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||
buf, sizeof(buf), k))) {
|
||||
ret = remove_dirent(c, &iter, d);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -222,10 +386,10 @@ static int check_dirents(struct bch_fs *c)
|
||||
if (fsck_err_on(have_target &&
|
||||
d.v->d_type !=
|
||||
mode_to_type(le16_to_cpu(target.i_mode)), c,
|
||||
"incorrect d_type: got %u should be %u, filename %s",
|
||||
d.v->d_type,
|
||||
"incorrect d_type: should be %u:\n%s",
|
||||
mode_to_type(le16_to_cpu(target.i_mode)),
|
||||
d.v->d_name)) {
|
||||
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||
buf, sizeof(buf), k))) {
|
||||
struct bkey_i_dirent *n;
|
||||
|
||||
n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
|
||||
@ -248,6 +412,8 @@ static int check_dirents(struct bch_fs *c)
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_btree_iter_unlock(&h.chain);
|
||||
bch2_btree_iter_unlock(&h.iter);
|
||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||
}
|
||||
|
||||
@ -258,21 +424,39 @@ noinline_for_stack
|
||||
static int check_xattrs(struct bch_fs *c)
|
||||
{
|
||||
struct inode_walker w = inode_walker_init();
|
||||
struct hash_check h;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
hash_check_init(bch2_xattr_hash_desc, &h, c);
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
|
||||
POS(BCACHE_ROOT_INO, 0), k) {
|
||||
ret = walk_inode(c, &w, k.k->p.inode);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
unfixable_fsck_err_on(!w.have_inode, c,
|
||||
"xattr for missing inode %llu",
|
||||
k.k->p.inode);
|
||||
if (fsck_err_on(!w.have_inode, c,
|
||||
"xattr for missing inode %llu",
|
||||
k.k->p.inode)) {
|
||||
ret = bch2_btree_delete_at(&iter, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (w.first_this_inode && w.have_inode)
|
||||
hash_check_set_inode(&h, c, &w.inode);
|
||||
|
||||
ret = hash_check_key(bch2_xattr_hash_desc, &h, c, &iter, k);
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_btree_iter_unlock(&h.chain);
|
||||
bch2_btree_iter_unlock(&h.iter);
|
||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||
}
|
||||
|
||||
@ -445,6 +629,8 @@ static int check_directory_structure(struct bch_fs *c,
|
||||
|
||||
/* DFS: */
|
||||
restart_dfs:
|
||||
had_unreachable = false;
|
||||
|
||||
ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -478,7 +664,8 @@ next:
|
||||
d_inum = le64_to_cpu(dirent.v->d_inum);
|
||||
|
||||
if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
|
||||
"directory with multiple hardlinks")) {
|
||||
"directory %llu has multiple hardlinks",
|
||||
d_inum)) {
|
||||
ret = remove_dirent(c, &iter, dirent);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -503,8 +690,6 @@ up:
|
||||
path.nr--;
|
||||
}
|
||||
|
||||
had_unreachable = false;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
|
||||
if (k.k->type != BCH_INODE_FS ||
|
||||
!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
|
||||
@ -640,7 +825,7 @@ static int bch2_gc_do_inode(struct bch_fs *c,
|
||||
|
||||
ret = bch2_inode_unpack(inode, &u);
|
||||
if (bch2_fs_inconsistent_on(ret, c,
|
||||
"error unpacking inode %llu in fs-gc",
|
||||
"error unpacking inode %llu in fsck",
|
||||
inode.k->p.inode))
|
||||
return ret;
|
||||
|
||||
@ -894,36 +1079,59 @@ int bch2_fsck(struct bch_fs *c, bool full_fsck)
|
||||
struct bch_inode_unpacked root_inode, lostfound_inode;
|
||||
int ret;
|
||||
|
||||
ret = check_root(c, &root_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (full_fsck) {
|
||||
bch_verbose(c, "checking extents");
|
||||
ret = check_extents(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = check_lostfound(c, &root_inode, &lostfound_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
bch_verbose(c, "checking dirents");
|
||||
ret = check_dirents(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!full_fsck)
|
||||
goto check_nlinks;
|
||||
bch_verbose(c, "checking xattrs");
|
||||
ret = check_xattrs(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = check_extents(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
bch_verbose(c, "checking root directory");
|
||||
ret = check_root(c, &root_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = check_dirents(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
bch_verbose(c, "checking lost+found");
|
||||
ret = check_lostfound(c, &root_inode, &lostfound_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = check_xattrs(c);
|
||||
if (ret)
|
||||
return ret;
|
||||
bch_verbose(c, "checking directory structure");
|
||||
ret = check_directory_structure(c, &lostfound_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = check_directory_structure(c, &lostfound_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
check_nlinks:
|
||||
ret = check_inode_nlinks(c, &lostfound_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
bch_verbose(c, "checking inode nlinks");
|
||||
ret = check_inode_nlinks(c, &lostfound_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
bch_verbose(c, "checking root directory");
|
||||
ret = check_root(c, &root_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch_verbose(c, "checking lost+found");
|
||||
ret = check_lostfound(c, &root_inode, &lostfound_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch_verbose(c, "checking inode nlinks");
|
||||
ret = check_inode_nlinks(c, &lostfound_inode);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bch2_flush_fsck_errs(c);
|
||||
|
||||
return 0;
|
||||
}
|
@ -25,14 +25,12 @@ static const u8 bits_table[8] = {
|
||||
13 * 8 - 8,
|
||||
};
|
||||
|
||||
static int inode_encode_field(u8 *out, u8 *end, const u64 in[2])
|
||||
static int inode_encode_field(u8 *out, u8 *end, u64 hi, u64 lo)
|
||||
{
|
||||
unsigned bytes, bits, shift;
|
||||
|
||||
if (likely(!in[1]))
|
||||
bits = fls64(in[0]);
|
||||
else
|
||||
bits = fls64(in[1]) + 64;
|
||||
__be64 in[2] = { cpu_to_be64(hi), cpu_to_be64(lo), };
|
||||
unsigned shift, bytes, bits = likely(!hi)
|
||||
? fls64(lo)
|
||||
: fls64(hi) + 64;
|
||||
|
||||
for (shift = 1; shift <= 8; shift++)
|
||||
if (bits < bits_table[shift - 1])
|
||||
@ -44,17 +42,7 @@ got_shift:
|
||||
|
||||
BUG_ON(out + bytes > end);
|
||||
|
||||
if (likely(bytes <= 8)) {
|
||||
u64 b = cpu_to_be64(in[0]);
|
||||
|
||||
memcpy(out, (void *) &b + 8 - bytes, bytes);
|
||||
} else {
|
||||
u64 b = cpu_to_be64(in[1]);
|
||||
|
||||
memcpy(out, (void *) &b + 16 - bytes, bytes);
|
||||
put_unaligned_be64(in[0], out + bytes - 8);
|
||||
}
|
||||
|
||||
memcpy(out, (u8 *) in + 16 - bytes, bytes);
|
||||
*out |= (1 << 8) >> shift;
|
||||
|
||||
return bytes;
|
||||
@ -63,7 +51,9 @@ got_shift:
|
||||
static int inode_decode_field(const u8 *in, const u8 *end,
|
||||
u64 out[2], unsigned *out_bits)
|
||||
{
|
||||
unsigned bytes, bits, shift;
|
||||
__be64 be[2] = { 0, 0 };
|
||||
unsigned bytes, shift;
|
||||
u8 *p;
|
||||
|
||||
if (in >= end)
|
||||
return -1;
|
||||
@ -77,29 +67,18 @@ static int inode_decode_field(const u8 *in, const u8 *end,
|
||||
*/
|
||||
shift = 8 - __fls(*in); /* 1 <= shift <= 8 */
|
||||
bytes = byte_table[shift - 1];
|
||||
bits = bytes * 8 - shift;
|
||||
|
||||
if (in + bytes > end)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* we're assuming it's safe to deref up to 7 bytes < in; this will work
|
||||
* because keys always start quite a bit more than 7 bytes after the
|
||||
* start of the btree node header:
|
||||
*/
|
||||
if (likely(bytes <= 8)) {
|
||||
out[0] = get_unaligned_be64(in + bytes - 8);
|
||||
out[0] <<= 64 - bits;
|
||||
out[0] >>= 64 - bits;
|
||||
out[1] = 0;
|
||||
} else {
|
||||
out[0] = get_unaligned_be64(in + bytes - 8);
|
||||
out[1] = get_unaligned_be64(in + bytes - 16);
|
||||
out[1] <<= 128 - bits;
|
||||
out[1] >>= 128 - bits;
|
||||
}
|
||||
p = (u8 *) be + 16 - bytes;
|
||||
memcpy(p, in, bytes);
|
||||
*p ^= (1 << 8) >> shift;
|
||||
|
||||
out[0] = be64_to_cpu(be[0]);
|
||||
out[1] = be64_to_cpu(be[1]);
|
||||
*out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]);
|
||||
|
||||
*out_bits = out[1] ? 64 + fls64(out[1]) : fls64(out[0]);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@ -109,7 +88,6 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
|
||||
u8 *out = packed->inode.v.fields;
|
||||
u8 *end = (void *) &packed[1];
|
||||
u8 *last_nonzero_field = out;
|
||||
u64 field[2];
|
||||
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
|
||||
|
||||
bkey_inode_init(&packed->inode.k_i);
|
||||
@ -119,12 +97,10 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
|
||||
packed->inode.v.i_mode = cpu_to_le16(inode->i_mode);
|
||||
|
||||
#define BCH_INODE_FIELD(_name, _bits) \
|
||||
field[0] = inode->_name; \
|
||||
field[1] = 0; \
|
||||
out += inode_encode_field(out, end, field); \
|
||||
out += inode_encode_field(out, end, 0, inode->_name); \
|
||||
nr_fields++; \
|
||||
\
|
||||
if (field[0] | field[1]) { \
|
||||
if (inode->_name) { \
|
||||
last_nonzero_field = out; \
|
||||
last_nonzero_fieldnr = nr_fields; \
|
||||
}
|
||||
@ -187,7 +163,7 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
|
||||
if (field_bits > sizeof(unpacked->_name) * 8) \
|
||||
return -1; \
|
||||
\
|
||||
unpacked->_name = field[0]; \
|
||||
unpacked->_name = field[1]; \
|
||||
in += ret;
|
||||
|
||||
BCH_INODE_FIELDS()
|
||||
@ -449,3 +425,32 @@ int bch2_cached_dev_inode_find_by_uuid(struct bch_fs *c, uuid_le *uuid,
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_inode_pack_test(void)
|
||||
{
|
||||
struct bch_inode_unpacked *u, test_inodes[] = {
|
||||
{
|
||||
.i_atime = U64_MAX,
|
||||
.i_ctime = U64_MAX,
|
||||
.i_mtime = U64_MAX,
|
||||
.i_otime = U64_MAX,
|
||||
.i_size = U64_MAX,
|
||||
.i_sectors = U64_MAX,
|
||||
.i_uid = U32_MAX,
|
||||
.i_gid = U32_MAX,
|
||||
.i_nlink = U32_MAX,
|
||||
.i_generation = U32_MAX,
|
||||
.i_dev = U32_MAX,
|
||||
},
|
||||
};
|
||||
|
||||
for (u = test_inodes;
|
||||
u < test_inodes + ARRAY_SIZE(test_inodes);
|
||||
u++) {
|
||||
struct bkey_inode_buf p;
|
||||
|
||||
bch2_inode_pack(&p, u);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -54,4 +54,10 @@ static inline u64 timespec_to_bch2_time(struct bch_fs *c, struct timespec ts)
|
||||
return div_s64(ns, c->sb.time_precision);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
void bch2_inode_pack_test(void);
|
||||
#else
|
||||
static inline void bch2_inode_pack_test(void) {}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -910,8 +910,8 @@ static int bio_checksum_uncompress(struct bch_fs *c,
|
||||
bch2_encrypt_bio(c, rbio->crc.csum_type,
|
||||
nonce, src);
|
||||
|
||||
bio_copy_data_iter(dst, dst_iter,
|
||||
src, src->bi_iter);
|
||||
bio_copy_data_iter(dst, &dst_iter,
|
||||
src, &src->bi_iter);
|
||||
} else {
|
||||
bch2_encrypt_bio(c, rbio->crc.csum_type, nonce, src);
|
||||
}
|
||||
|
@ -527,62 +527,34 @@ fsck_err:
|
||||
#define JOURNAL_ENTRY_NONE 6
|
||||
#define JOURNAL_ENTRY_BAD 7
|
||||
|
||||
static int journal_entry_validate(struct bch_fs *c,
|
||||
struct jset *j, u64 sector,
|
||||
unsigned bucket_sectors_left,
|
||||
unsigned sectors_read)
|
||||
#define journal_entry_err(c, msg, ...) \
|
||||
({ \
|
||||
if (write == READ) { \
|
||||
mustfix_fsck_err(c, msg, ##__VA_ARGS__); \
|
||||
} else { \
|
||||
bch_err(c, "detected corrupt metadata before write:\n" \
|
||||
msg, ##__VA_ARGS__); \
|
||||
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
||||
goto fsck_err; \
|
||||
} \
|
||||
true; \
|
||||
})
|
||||
|
||||
#define journal_entry_err_on(cond, c, msg, ...) \
|
||||
((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
|
||||
|
||||
static int __journal_entry_validate(struct bch_fs *c, struct jset *j,
|
||||
int write)
|
||||
{
|
||||
struct jset_entry *entry;
|
||||
size_t bytes = vstruct_bytes(j);
|
||||
struct bch_csum csum;
|
||||
int ret = 0;
|
||||
|
||||
if (le64_to_cpu(j->magic) != jset_magic(c))
|
||||
return JOURNAL_ENTRY_NONE;
|
||||
|
||||
if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) {
|
||||
bch_err(c, "unknown journal entry version %u",
|
||||
le32_to_cpu(j->version));
|
||||
return BCH_FSCK_UNKNOWN_VERSION;
|
||||
}
|
||||
|
||||
if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9, c,
|
||||
"journal entry too big (%zu bytes), sector %lluu",
|
||||
bytes, sector)) {
|
||||
/* XXX: note we might have missing journal entries */
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
}
|
||||
|
||||
if (bytes > sectors_read << 9)
|
||||
return JOURNAL_ENTRY_REREAD;
|
||||
|
||||
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c,
|
||||
"journal entry with unknown csum type %llu sector %lluu",
|
||||
JSET_CSUM_TYPE(j), sector))
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
|
||||
csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
|
||||
if (mustfix_fsck_err_on(bch2_crc_cmp(csum, j->csum), c,
|
||||
"journal checksum bad, sector %llu", sector)) {
|
||||
/* XXX: retry IO, when we start retrying checksum errors */
|
||||
/* XXX: note we might have missing journal entries */
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
}
|
||||
|
||||
bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
|
||||
j->encrypted_start,
|
||||
vstruct_end(j) - (void *) j->encrypted_start);
|
||||
|
||||
if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c,
|
||||
"invalid journal entry: last_seq > seq"))
|
||||
j->last_seq = j->seq;
|
||||
|
||||
vstruct_for_each(j, entry) {
|
||||
struct bkey_i *k;
|
||||
|
||||
if (mustfix_fsck_err_on(vstruct_next(entry) >
|
||||
vstruct_last(j), c,
|
||||
"journal entry extents past end of jset")) {
|
||||
if (journal_entry_err_on(vstruct_next(entry) >
|
||||
vstruct_last(j), c,
|
||||
"journal entry extends past end of jset")) {
|
||||
j->u64s = cpu_to_le64((u64 *) entry - j->_data);
|
||||
break;
|
||||
}
|
||||
@ -602,7 +574,7 @@ static int journal_entry_validate(struct bch_fs *c,
|
||||
case JOURNAL_ENTRY_BTREE_ROOT:
|
||||
k = entry->start;
|
||||
|
||||
if (mustfix_fsck_err_on(!entry->u64s ||
|
||||
if (journal_entry_err_on(!entry->u64s ||
|
||||
le16_to_cpu(entry->u64s) != k->k.u64s, c,
|
||||
"invalid btree root journal entry: wrong number of keys")) {
|
||||
journal_entry_null_range(entry,
|
||||
@ -620,7 +592,7 @@ static int journal_entry_validate(struct bch_fs *c,
|
||||
break;
|
||||
|
||||
case JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED:
|
||||
if (mustfix_fsck_err_on(le16_to_cpu(entry->u64s) != 1, c,
|
||||
if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c,
|
||||
"invalid journal seq blacklist entry: bad size")) {
|
||||
journal_entry_null_range(entry,
|
||||
vstruct_next(entry));
|
||||
@ -628,7 +600,7 @@ static int journal_entry_validate(struct bch_fs *c,
|
||||
|
||||
break;
|
||||
default:
|
||||
mustfix_fsck_err(c, "invalid journal entry type %llu",
|
||||
journal_entry_err(c, "invalid journal entry type %llu",
|
||||
JOURNAL_ENTRY_TYPE(entry));
|
||||
journal_entry_null_range(entry, vstruct_next(entry));
|
||||
break;
|
||||
@ -639,6 +611,61 @@ fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int journal_entry_validate(struct bch_fs *c,
|
||||
struct jset *j, u64 sector,
|
||||
unsigned bucket_sectors_left,
|
||||
unsigned sectors_read,
|
||||
int write)
|
||||
{
|
||||
size_t bytes = vstruct_bytes(j);
|
||||
struct bch_csum csum;
|
||||
int ret = 0;
|
||||
|
||||
if (le64_to_cpu(j->magic) != jset_magic(c))
|
||||
return JOURNAL_ENTRY_NONE;
|
||||
|
||||
if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) {
|
||||
bch_err(c, "unknown journal entry version %u",
|
||||
le32_to_cpu(j->version));
|
||||
return BCH_FSCK_UNKNOWN_VERSION;
|
||||
}
|
||||
|
||||
if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
|
||||
"journal entry too big (%zu bytes), sector %lluu",
|
||||
bytes, sector)) {
|
||||
/* XXX: note we might have missing journal entries */
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
}
|
||||
|
||||
if (bytes > sectors_read << 9)
|
||||
return JOURNAL_ENTRY_REREAD;
|
||||
|
||||
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c,
|
||||
"journal entry with unknown csum type %llu sector %lluu",
|
||||
JSET_CSUM_TYPE(j), sector))
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
|
||||
csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
|
||||
if (journal_entry_err_on(bch2_crc_cmp(csum, j->csum), c,
|
||||
"journal checksum bad, sector %llu", sector)) {
|
||||
/* XXX: retry IO, when we start retrying checksum errors */
|
||||
/* XXX: note we might have missing journal entries */
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
}
|
||||
|
||||
bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
|
||||
j->encrypted_start,
|
||||
vstruct_end(j) - (void *) j->encrypted_start);
|
||||
|
||||
if (journal_entry_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c,
|
||||
"invalid journal entry: last_seq > seq"))
|
||||
j->last_seq = j->seq;
|
||||
|
||||
return __journal_entry_validate(c, j, write);
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct journal_read_buf {
|
||||
void *data;
|
||||
size_t size;
|
||||
@ -705,7 +732,8 @@ reread: sectors_read = min_t(unsigned,
|
||||
}
|
||||
|
||||
ret = journal_entry_validate(c, j, offset,
|
||||
end - offset, sectors_read);
|
||||
end - offset, sectors_read,
|
||||
READ);
|
||||
switch (ret) {
|
||||
case BCH_FSCK_OK:
|
||||
break;
|
||||
@ -2274,6 +2302,10 @@ static void journal_write(struct closure *cl)
|
||||
SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
|
||||
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
|
||||
|
||||
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
|
||||
__journal_entry_validate(c, jset, WRITE))
|
||||
goto err;
|
||||
|
||||
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||
jset->encrypted_start,
|
||||
vstruct_end(jset) - (void *) jset->encrypted_start);
|
||||
@ -2281,6 +2313,10 @@ static void journal_write(struct closure *cl)
|
||||
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
|
||||
journal_nonce(jset), jset);
|
||||
|
||||
if (!bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
|
||||
__journal_entry_validate(c, jset, WRITE))
|
||||
goto err;
|
||||
|
||||
sectors = vstruct_sectors(jset, c->block_bits);
|
||||
BUG_ON(sectors > j->prev_buf_sectors);
|
||||
|
||||
@ -2349,6 +2385,9 @@ no_io:
|
||||
ptr->offset += sectors;
|
||||
|
||||
closure_return_with_destructor(cl, journal_write_done);
|
||||
err:
|
||||
bch2_fatal_error(c);
|
||||
closure_return_with_destructor(cl, journal_write_done);
|
||||
}
|
||||
|
||||
static void journal_write_work(struct work_struct *work)
|
||||
|
@ -2,7 +2,9 @@
|
||||
#define _BCACHE_STR_HASH_H
|
||||
|
||||
#include "btree_iter.h"
|
||||
#include "btree_update.h"
|
||||
#include "checksum.h"
|
||||
#include "error.h"
|
||||
#include "inode.h"
|
||||
#include "siphash.h"
|
||||
#include "super.h"
|
||||
@ -341,6 +343,36 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int bch2_hash_delete_at(const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
struct btree_iter *iter,
|
||||
u64 *journal_seq)
|
||||
{
|
||||
struct btree_iter whiteout_iter;
|
||||
struct bkey_i delete;
|
||||
int ret = -ENOENT;
|
||||
|
||||
bch2_btree_iter_init(&whiteout_iter, iter->c, desc.btree_id,
|
||||
iter->pos);
|
||||
bch2_btree_iter_link(iter, &whiteout_iter);
|
||||
|
||||
ret = bch2_hash_needs_whiteout(desc, info, &whiteout_iter, iter);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
bkey_init(&delete.k);
|
||||
delete.k.p = iter->pos;
|
||||
delete.k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED;
|
||||
|
||||
ret = bch2_btree_insert_at(iter->c, NULL, NULL, journal_seq,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_ATOMIC,
|
||||
BTREE_INSERT_ENTRY(iter, &delete));
|
||||
err:
|
||||
bch2_btree_iter_unlink(&whiteout_iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int bch2_hash_delete(const struct bch_hash_desc desc,
|
||||
const struct bch_hash_info *info,
|
||||
struct bch_fs *c, u64 inode,
|
||||
@ -348,7 +380,6 @@ static inline int bch2_hash_delete(const struct bch_hash_desc desc,
|
||||
{
|
||||
struct btree_iter iter, whiteout_iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_i delete;
|
||||
int ret = -ENOENT;
|
||||
|
||||
bch2_btree_iter_init_intent(&iter, c, desc.btree_id,
|
||||
@ -361,18 +392,7 @@ retry:
|
||||
if ((ret = btree_iter_err(k)))
|
||||
goto err;
|
||||
|
||||
ret = bch2_hash_needs_whiteout(desc, info, &whiteout_iter, &iter);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
bkey_init(&delete.k);
|
||||
delete.k.p = k.k->p;
|
||||
delete.k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED;
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_ATOMIC,
|
||||
BTREE_INSERT_ENTRY(&iter, &delete));
|
||||
ret = bch2_hash_delete_at(desc, info, &iter, journal_seq);
|
||||
err:
|
||||
if (ret == -EINTR)
|
||||
goto retry;
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "debug.h"
|
||||
#include "error.h"
|
||||
#include "fs.h"
|
||||
#include "fs-gc.h"
|
||||
#include "fsck.h"
|
||||
#include "inode.h"
|
||||
#include "io.h"
|
||||
#include "journal.h"
|
||||
@ -513,6 +513,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
INIT_WORK(&c->read_retry_work, bch2_read_retry_work);
|
||||
mutex_init(&c->zlib_workspace_lock);
|
||||
|
||||
INIT_LIST_HEAD(&c->fsck_errors);
|
||||
mutex_init(&c->fsck_error_lock);
|
||||
|
||||
seqcount_init(&c->gc_pos_lock);
|
||||
|
||||
c->prio_clock[READ].hand = 1;
|
||||
@ -875,12 +878,12 @@ err:
|
||||
switch (ret) {
|
||||
case BCH_FSCK_ERRORS_NOT_FIXED:
|
||||
bch_err(c, "filesystem contains errors: please report this to the developers");
|
||||
pr_cont("mount with -o fix_errors to repair");
|
||||
pr_cont("mount with -o fix_errors to repair\n");
|
||||
err = "fsck error";
|
||||
break;
|
||||
case BCH_FSCK_REPAIR_UNIMPLEMENTED:
|
||||
bch_err(c, "filesystem contains errors: please report this to the developers");
|
||||
pr_cont("repair unimplemented: inform the developers so that it can be added");
|
||||
pr_cont("repair unimplemented: inform the developers so that it can be added\n");
|
||||
err = "fsck error";
|
||||
break;
|
||||
case BCH_FSCK_REPAIR_IMPOSSIBLE:
|
||||
@ -979,8 +982,8 @@ static void bch2_dev_free(struct bch_dev *ca)
|
||||
kvpfree(ca->disk_buckets, bucket_bytes(ca));
|
||||
kfree(ca->prio_buckets);
|
||||
kfree(ca->bio_prio);
|
||||
vfree(ca->buckets);
|
||||
vfree(ca->oldest_gens);
|
||||
kvpfree(ca->buckets, ca->mi.nbuckets * sizeof(struct bucket));
|
||||
kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
|
||||
free_heap(&ca->heap);
|
||||
free_fifo(&ca->free_inc);
|
||||
|
||||
@ -1140,10 +1143,12 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
||||
!init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
|
||||
!init_fifo(&ca->free_inc, free_inc_reserve, GFP_KERNEL) ||
|
||||
!init_heap(&ca->heap, heap_size, GFP_KERNEL) ||
|
||||
!(ca->oldest_gens = vzalloc(sizeof(u8) *
|
||||
ca->mi.nbuckets)) ||
|
||||
!(ca->buckets = vzalloc(sizeof(struct bucket) *
|
||||
ca->mi.nbuckets)) ||
|
||||
!(ca->oldest_gens = kvpmalloc(ca->mi.nbuckets *
|
||||
sizeof(u8),
|
||||
GFP_KERNEL|__GFP_ZERO)) ||
|
||||
!(ca->buckets = kvpmalloc(ca->mi.nbuckets *
|
||||
sizeof(struct bucket),
|
||||
GFP_KERNEL|__GFP_ZERO)) ||
|
||||
!(ca->prio_buckets = kzalloc(sizeof(u64) * prio_buckets(ca) *
|
||||
2, GFP_KERNEL)) ||
|
||||
!(ca->disk_buckets = kvpmalloc(bucket_bytes(ca), GFP_KERNEL)) ||
|
||||
@ -1871,6 +1876,7 @@ static void bcachefs_exit(void)
|
||||
static int __init bcachefs_init(void)
|
||||
{
|
||||
bch2_bkey_pack_test();
|
||||
bch2_inode_pack_test();
|
||||
|
||||
if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
|
||||
bch2_chardev_init() ||
|
||||
|
@ -512,7 +512,7 @@ STORE(bch2_fs_opts_dir)
|
||||
{
|
||||
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
|
||||
const struct bch_option *opt;
|
||||
enum bch_opt_id id;
|
||||
int id;
|
||||
u64 v;
|
||||
|
||||
id = bch2_parse_sysfs_opt(attr->name, buf, &v);
|
||||
|
@ -417,3 +417,17 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
|
||||
dst += bv.bv_len;
|
||||
}
|
||||
}
|
||||
|
||||
size_t bch_scnmemcpy(char *buf, size_t size, const char *src, size_t len)
|
||||
{
|
||||
size_t n;
|
||||
|
||||
if (!size)
|
||||
return 0;
|
||||
|
||||
n = min(size - 1, len);
|
||||
memcpy(buf, src, n);
|
||||
buf[n] = '\0';
|
||||
|
||||
return n;
|
||||
}
|
||||
|
@ -93,7 +93,8 @@ static inline void kvpfree(void *p, size_t size)
|
||||
static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
|
||||
{
|
||||
return size < PAGE_SIZE ? kmalloc(size, gfp_mask)
|
||||
: (void *) __get_free_pages(gfp_mask, get_order(size))
|
||||
: (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
|
||||
get_order(size))
|
||||
?: __vmalloc(size, gfp_mask, PAGE_KERNEL);
|
||||
}
|
||||
|
||||
@ -750,4 +751,6 @@ static inline struct bio_vec next_contig_bvec(struct bio *bio,
|
||||
#define bio_for_each_contig_segment(bv, bio, iter) \
|
||||
__bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter)
|
||||
|
||||
size_t bch_scnmemcpy(char *, size_t, const char *, size_t);
|
||||
|
||||
#endif /* _BCACHE_UTIL_H */
|
||||
|
@ -11,6 +11,16 @@
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
#include <linux/xattr.h>
|
||||
|
||||
static unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
|
||||
{
|
||||
return DIV_ROUND_UP(sizeof(struct bch_xattr) +
|
||||
name_len + val_len, sizeof(u64));
|
||||
}
|
||||
|
||||
#define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len)
|
||||
|
||||
static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
|
||||
|
||||
struct xattr_search_key {
|
||||
u8 type;
|
||||
struct qstr name;
|
||||
@ -31,8 +41,6 @@ static u64 bch2_xattr_hash(const struct bch_hash_info *info,
|
||||
return bch2_str_hash_end(&ctx, info);
|
||||
}
|
||||
|
||||
#define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len)
|
||||
|
||||
static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
|
||||
{
|
||||
return bch2_xattr_hash(info, key);
|
||||
@ -66,7 +74,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
|
||||
memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
|
||||
}
|
||||
|
||||
static const struct bch_hash_desc xattr_hash_desc = {
|
||||
const struct bch_hash_desc bch2_xattr_hash_desc = {
|
||||
.btree_id = BTREE_ID_XATTRS,
|
||||
.key_type = BCH_XATTR,
|
||||
.whiteout_type = BCH_XATTR_WHITEOUT,
|
||||
@ -79,12 +87,33 @@ static const struct bch_hash_desc xattr_hash_desc = {
|
||||
static const char *bch2_xattr_invalid(const struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
const struct xattr_handler *handler;
|
||||
struct bkey_s_c_xattr xattr;
|
||||
unsigned u64s;
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_XATTR:
|
||||
return bkey_val_bytes(k.k) < sizeof(struct bch_xattr)
|
||||
? "value too small"
|
||||
: NULL;
|
||||
if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
|
||||
return "value too small";
|
||||
|
||||
xattr = bkey_s_c_to_xattr(k);
|
||||
u64s = xattr_val_u64s(xattr.v->x_name_len,
|
||||
le16_to_cpu(xattr.v->x_val_len));
|
||||
|
||||
if (bkey_val_u64s(k.k) < u64s)
|
||||
return "value too small";
|
||||
|
||||
if (bkey_val_u64s(k.k) > u64s)
|
||||
return "value too big";
|
||||
|
||||
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
|
||||
if (!handler)
|
||||
return "invalid type";
|
||||
|
||||
if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
|
||||
return "xattr name has invalid characters";
|
||||
|
||||
return NULL;
|
||||
case BCH_XATTR_WHITEOUT:
|
||||
return bkey_val_bytes(k.k) != 0
|
||||
? "value size should be zero"
|
||||
@ -98,34 +127,29 @@ static const char *bch2_xattr_invalid(const struct bch_fs *c,
|
||||
static void bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
const struct xattr_handler *handler;
|
||||
struct bkey_s_c_xattr xattr;
|
||||
int n;
|
||||
size_t n = 0;
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_XATTR:
|
||||
xattr = bkey_s_c_to_xattr(k);
|
||||
|
||||
if (size) {
|
||||
n = min_t(unsigned, size, xattr.v->x_name_len);
|
||||
memcpy(buf, xattr.v->x_name, n);
|
||||
buf[size - 1] = '\0';
|
||||
buf += n;
|
||||
size -= n;
|
||||
}
|
||||
|
||||
n = scnprintf(buf, size, " -> ");
|
||||
buf += n;
|
||||
size -= n;
|
||||
|
||||
if (size) {
|
||||
n = min_t(unsigned, size,
|
||||
le16_to_cpu(xattr.v->x_val_len));
|
||||
memcpy(buf, xattr_val(xattr.v), n);
|
||||
buf[size - 1] = '\0';
|
||||
buf += n;
|
||||
size -= n;
|
||||
}
|
||||
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
|
||||
if (handler && handler->prefix)
|
||||
n += scnprintf(buf + n, size - n, "%s", handler->prefix);
|
||||
else if (handler)
|
||||
n += scnprintf(buf + n, size - n, "(type %u)",
|
||||
xattr.v->x_type);
|
||||
else
|
||||
n += scnprintf(buf + n, size - n, "(unknown type %u)",
|
||||
xattr.v->x_type);
|
||||
|
||||
n += bch_scnmemcpy(buf + n, size - n, xattr.v->x_name,
|
||||
xattr.v->x_name_len);
|
||||
n += scnprintf(buf + n, size - n, ":");
|
||||
n += bch_scnmemcpy(buf + n, size - n, xattr_val(xattr.v),
|
||||
le16_to_cpu(xattr.v->x_val_len));
|
||||
break;
|
||||
case BCH_XATTR_WHITEOUT:
|
||||
scnprintf(buf, size, "whiteout");
|
||||
@ -147,7 +171,7 @@ int bch2_xattr_get(struct bch_fs *c, struct inode *inode,
|
||||
struct bkey_s_c_xattr xattr;
|
||||
int ret;
|
||||
|
||||
k = bch2_hash_lookup(xattr_hash_desc, &ei->str_hash, c,
|
||||
k = bch2_hash_lookup(bch2_xattr_hash_desc, &ei->str_hash, c,
|
||||
ei->vfs_inode.i_ino, &iter,
|
||||
&X_SEARCH(type, name, strlen(name)));
|
||||
if (IS_ERR(k.k))
|
||||
@ -175,15 +199,13 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum,
|
||||
int ret;
|
||||
|
||||
if (!value) {
|
||||
ret = bch2_hash_delete(xattr_hash_desc, hash_info,
|
||||
ret = bch2_hash_delete(bch2_xattr_hash_desc, hash_info,
|
||||
c, inum,
|
||||
journal_seq, &search);
|
||||
} else {
|
||||
struct bkey_i_xattr *xattr;
|
||||
unsigned u64s = BKEY_U64s +
|
||||
DIV_ROUND_UP(sizeof(struct bch_xattr) +
|
||||
search.name.len + size,
|
||||
sizeof(u64));
|
||||
xattr_val_u64s(search.name.len, size);
|
||||
|
||||
if (u64s > U8_MAX)
|
||||
return -ERANGE;
|
||||
@ -200,7 +222,7 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum,
|
||||
memcpy(xattr->v.x_name, search.name.name, search.name.len);
|
||||
memcpy(xattr_val(&xattr->v), value, size);
|
||||
|
||||
ret = bch2_hash_set(xattr_hash_desc, hash_info, c,
|
||||
ret = bch2_hash_set(bch2_xattr_hash_desc, hash_info, c,
|
||||
inum, journal_seq,
|
||||
&xattr->k_i,
|
||||
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
|
||||
@ -225,8 +247,6 @@ int bch2_xattr_set(struct bch_fs *c, struct inode *inode,
|
||||
&ei->journal_seq);
|
||||
}
|
||||
|
||||
static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
|
||||
|
||||
static size_t bch2_xattr_emit(struct dentry *dentry,
|
||||
const struct bch_xattr *xattr,
|
||||
char *buffer, size_t buffer_size)
|
||||
|
@ -1,6 +1,9 @@
|
||||
#ifndef _BCACHE_XATTR_H
|
||||
#define _BCACHE_XATTR_H
|
||||
|
||||
#include "str_hash.h"
|
||||
|
||||
extern const struct bch_hash_desc bch2_xattr_hash_desc;
|
||||
extern const struct bkey_ops bch2_bkey_xattr_ops;
|
||||
|
||||
struct dentry;
|
||||
|
46
linux/bio.c
46
linux/bio.c
@ -21,32 +21,16 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter,
|
||||
struct bio *src, struct bvec_iter src_iter)
|
||||
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||
struct bio *src, struct bvec_iter *src_iter)
|
||||
{
|
||||
struct bio_vec src_bv, dst_bv;
|
||||
void *src_p, *dst_p;
|
||||
unsigned bytes;
|
||||
|
||||
while (1) {
|
||||
if (!src_iter.bi_size) {
|
||||
src = src->bi_next;
|
||||
if (!src)
|
||||
break;
|
||||
|
||||
src_iter = src->bi_iter;
|
||||
}
|
||||
|
||||
if (!dst_iter.bi_size) {
|
||||
dst = dst->bi_next;
|
||||
if (!dst)
|
||||
break;
|
||||
|
||||
dst_iter = dst->bi_iter;
|
||||
}
|
||||
|
||||
src_bv = bio_iter_iovec(src, src_iter);
|
||||
dst_bv = bio_iter_iovec(dst, dst_iter);
|
||||
while (src_iter->bi_size && dst_iter->bi_size) {
|
||||
src_bv = bio_iter_iovec(src, *src_iter);
|
||||
dst_bv = bio_iter_iovec(dst, *dst_iter);
|
||||
|
||||
bytes = min(src_bv.bv_len, dst_bv.bv_len);
|
||||
|
||||
@ -60,15 +44,27 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter,
|
||||
kunmap_atomic(dst_p);
|
||||
kunmap_atomic(src_p);
|
||||
|
||||
bio_advance_iter(src, &src_iter, bytes);
|
||||
bio_advance_iter(dst, &dst_iter, bytes);
|
||||
flush_dcache_page(dst_bv.bv_page);
|
||||
|
||||
bio_advance_iter(src, src_iter, bytes);
|
||||
bio_advance_iter(dst, dst_iter, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_copy_data - copy contents of data buffers from one bio to another
|
||||
* @src: source bio
|
||||
* @dst: destination bio
|
||||
*
|
||||
* Stops when it reaches the end of either @src or @dst - that is, copies
|
||||
* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
|
||||
*/
|
||||
void bio_copy_data(struct bio *dst, struct bio *src)
|
||||
{
|
||||
bio_copy_data_iter(dst, dst->bi_iter,
|
||||
src, src->bi_iter);
|
||||
struct bvec_iter src_iter = src->bi_iter;
|
||||
struct bvec_iter dst_iter = dst->bi_iter;
|
||||
|
||||
bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
|
||||
}
|
||||
|
||||
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
|
||||
|
Loading…
Reference in New Issue
Block a user