mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to f026e4e024
This commit is contained in:
parent
03bc9d71b1
commit
819f2dde79
@ -1 +1 @@
|
|||||||
3b4024f94489e4d8dc8eb7f1278754a2545f8026
|
f026e4e0243cc10e721504a8bfaa131ea8aa4c91
|
||||||
|
2
Makefile
2
Makefile
@ -78,7 +78,7 @@ SRCS=bcachefs.c \
|
|||||||
libbcachefs/dirent.c \
|
libbcachefs/dirent.c \
|
||||||
libbcachefs/error.c \
|
libbcachefs/error.c \
|
||||||
libbcachefs/extents.c \
|
libbcachefs/extents.c \
|
||||||
libbcachefs/fs-gc.c \
|
libbcachefs/fsck.c \
|
||||||
libbcachefs/inode.c \
|
libbcachefs/inode.c \
|
||||||
libbcachefs/io.c \
|
libbcachefs/io.c \
|
||||||
libbcachefs/journal.c \
|
libbcachefs/journal.c \
|
||||||
|
@ -288,8 +288,8 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter,
|
extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||||
struct bio *src, struct bvec_iter src_iter);
|
struct bio *src, struct bvec_iter *src_iter);
|
||||||
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
||||||
extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
|
extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
|
||||||
|
|
||||||
|
@ -458,6 +458,7 @@ enum {
|
|||||||
BCH_FS_BDEV_MOUNTED,
|
BCH_FS_BDEV_MOUNTED,
|
||||||
BCH_FS_ERROR,
|
BCH_FS_ERROR,
|
||||||
BCH_FS_FSCK_FIXED_ERRORS,
|
BCH_FS_FSCK_FIXED_ERRORS,
|
||||||
|
BCH_FS_FSCK_DONE,
|
||||||
BCH_FS_FIXED_GENS,
|
BCH_FS_FIXED_GENS,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -724,6 +725,11 @@ struct bch_fs {
|
|||||||
struct work_struct read_retry_work;
|
struct work_struct read_retry_work;
|
||||||
spinlock_t read_retry_lock;
|
spinlock_t read_retry_lock;
|
||||||
|
|
||||||
|
/* ERRORS */
|
||||||
|
struct list_head fsck_errors;
|
||||||
|
struct mutex fsck_error_lock;
|
||||||
|
bool fsck_alloc_err;
|
||||||
|
|
||||||
/* FILESYSTEM */
|
/* FILESYSTEM */
|
||||||
wait_queue_head_t writeback_wait;
|
wait_queue_head_t writeback_wait;
|
||||||
atomic_t writeback_pages;
|
atomic_t writeback_pages;
|
||||||
|
@ -89,18 +89,20 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
|
|||||||
ops->key_debugcheck(c, b, k);
|
ops->key_debugcheck(c, b, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_val_to_text(struct bch_fs *c, enum bkey_type type,
|
char *bch2_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||||
char *buf, size_t size, struct bkey_s_c k)
|
char *buf, size_t size, struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||||
|
|
||||||
if (k.k->type >= KEY_TYPE_GENERIC_NR &&
|
if (k.k->type >= KEY_TYPE_GENERIC_NR &&
|
||||||
ops->val_to_text)
|
ops->val_to_text)
|
||||||
ops->val_to_text(c, buf, size, k);
|
ops->val_to_text(c, buf, size, k);
|
||||||
|
|
||||||
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type,
|
char *bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type,
|
||||||
char *buf, size_t size, struct bkey_s_c k)
|
char *buf, size_t size, struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
const struct bkey_ops *ops = bch2_bkey_ops[type];
|
||||||
char *out = buf, *end = buf + size;
|
char *out = buf, *end = buf + size;
|
||||||
@ -109,9 +111,11 @@ void bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type,
|
|||||||
|
|
||||||
if (k.k->type >= KEY_TYPE_GENERIC_NR &&
|
if (k.k->type >= KEY_TYPE_GENERIC_NR &&
|
||||||
ops->val_to_text) {
|
ops->val_to_text) {
|
||||||
out += scnprintf(out, end - out, " -> ");
|
out += scnprintf(out, end - out, ": ");
|
||||||
ops->val_to_text(c, out, end - out, k);
|
ops->val_to_text(c, out, end - out, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_bkey_swab(enum bkey_type type,
|
void bch2_bkey_swab(enum bkey_type type,
|
||||||
|
@ -67,10 +67,10 @@ const char *bch2_btree_bkey_invalid(struct bch_fs *, struct btree *,
|
|||||||
struct bkey_s_c);
|
struct bkey_s_c);
|
||||||
|
|
||||||
void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
|
void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
|
||||||
void bch2_val_to_text(struct bch_fs *, enum bkey_type,
|
char *bch2_val_to_text(struct bch_fs *, enum bkey_type,
|
||||||
char *, size_t, struct bkey_s_c);
|
char *, size_t, struct bkey_s_c);
|
||||||
void bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type,
|
char *bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type,
|
||||||
char *, size_t, struct bkey_s_c);
|
char *, size_t, struct bkey_s_c);
|
||||||
|
|
||||||
void bch2_bkey_swab(enum bkey_type, const struct bkey_format *,
|
void bch2_bkey_swab(enum bkey_type, const struct bkey_format *,
|
||||||
struct bkey_packed *);
|
struct bkey_packed *);
|
||||||
|
@ -91,6 +91,7 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp)
|
|||||||
six_lock_init(&b->lock);
|
six_lock_init(&b->lock);
|
||||||
INIT_LIST_HEAD(&b->list);
|
INIT_LIST_HEAD(&b->list);
|
||||||
INIT_LIST_HEAD(&b->write_blocked);
|
INIT_LIST_HEAD(&b->write_blocked);
|
||||||
|
INIT_LIST_HEAD(&b->reachable);
|
||||||
|
|
||||||
mca_data_alloc(c, b, gfp);
|
mca_data_alloc(c, b, gfp);
|
||||||
return b->data ? b : NULL;
|
return b->data ? b : NULL;
|
||||||
|
@ -605,10 +605,12 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
|
|||||||
bch2_btree_interior_update_will_free_node(c, as, old_nodes[i]);
|
bch2_btree_interior_update_will_free_node(c, as, old_nodes[i]);
|
||||||
|
|
||||||
/* Repack everything with @new_format and sort down to one bset */
|
/* Repack everything with @new_format and sort down to one bset */
|
||||||
for (i = 0; i < nr_old_nodes; i++)
|
for (i = 0; i < nr_old_nodes; i++) {
|
||||||
new_nodes[i] =
|
new_nodes[i] =
|
||||||
__bch2_btree_node_alloc_replacement(c, old_nodes[i],
|
__bch2_btree_node_alloc_replacement(c, old_nodes[i],
|
||||||
new_format, res);
|
new_format, res);
|
||||||
|
list_add(&new_nodes[i]->reachable, &as->reachable_list);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Conceptually we concatenate the nodes together and slice them
|
* Conceptually we concatenate the nodes together and slice them
|
||||||
@ -645,6 +647,7 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
|
|||||||
|
|
||||||
set_btree_bset_end(n1, n1->set);
|
set_btree_bset_end(n1, n1->set);
|
||||||
|
|
||||||
|
list_del_init(&n2->reachable);
|
||||||
six_unlock_write(&n2->lock);
|
six_unlock_write(&n2->lock);
|
||||||
bch2_btree_node_free_never_inserted(c, n2);
|
bch2_btree_node_free_never_inserted(c, n2);
|
||||||
six_unlock_intent(&n2->lock);
|
six_unlock_intent(&n2->lock);
|
||||||
|
@ -872,32 +872,57 @@ static void bset_encrypt(struct bch_fs *c, struct bset *i, struct nonce nonce)
|
|||||||
vstruct_end(i) - (void *) i->_data);
|
vstruct_end(i) - (void *) i->_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define btree_node_error(b, c, ptr, fmt, ...) \
|
#define btree_node_error(c, b, ptr, msg, ...) \
|
||||||
bch2_fs_inconsistent(c, \
|
do { \
|
||||||
"btree node error at btree %u level %u/%u bucket %zu block %u u64s %u: " fmt,\
|
if (write == READ && \
|
||||||
(b)->btree_id, (b)->level, btree_node_root(c, b) \
|
!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \
|
||||||
? btree_node_root(c, b)->level : -1, \
|
mustfix_fsck_err(c, \
|
||||||
PTR_BUCKET_NR(ca, ptr), (b)->written, \
|
"btree node read error at btree %u level %u/%u\n"\
|
||||||
le16_to_cpu((i)->u64s), ##__VA_ARGS__)
|
"sector %llu node offset %u bset u64s %u: " msg,\
|
||||||
|
(b)->btree_id, (b)->level, \
|
||||||
|
(c)->btree_roots[(b)->btree_id].level, \
|
||||||
|
(u64) ptr->offset, (b)->written, \
|
||||||
|
le16_to_cpu((i)->u64s), ##__VA_ARGS__); \
|
||||||
|
} else { \
|
||||||
|
bch_err(c, "%s at btree %u level %u/%u\n" \
|
||||||
|
"sector %llu node offset %u bset u64s %u: " msg,\
|
||||||
|
write == WRITE \
|
||||||
|
? "corrupt metadata in btree node write" \
|
||||||
|
: "btree node error", \
|
||||||
|
(b)->btree_id, (b)->level, \
|
||||||
|
(c)->btree_roots[(b)->btree_id].level, \
|
||||||
|
(u64) ptr->offset, (b)->written, \
|
||||||
|
le16_to_cpu((i)->u64s), ##__VA_ARGS__); \
|
||||||
|
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
||||||
|
goto fsck_err; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
static int validate_bset(struct bch_fs *c, struct btree *b,
|
||||||
struct bch_dev *ca,
|
const struct bch_extent_ptr *ptr,
|
||||||
const struct bch_extent_ptr *ptr,
|
struct bset *i, unsigned sectors,
|
||||||
struct bset *i, unsigned sectors,
|
unsigned *whiteout_u64s,
|
||||||
unsigned *whiteout_u64s)
|
int write)
|
||||||
{
|
{
|
||||||
struct bkey_packed *k, *prev = NULL;
|
struct bkey_packed *k, *prev = NULL;
|
||||||
struct bpos prev_pos = POS_MIN;
|
struct bpos prev_pos = POS_MIN;
|
||||||
bool seen_non_whiteout = false;
|
bool seen_non_whiteout = false;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION)
|
if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION) {
|
||||||
return "unsupported bset version";
|
btree_node_error(c, b, ptr, "unsupported bset version");
|
||||||
|
i->u64s = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (b->written + sectors > c->sb.btree_node_size)
|
if (b->written + sectors > c->sb.btree_node_size) {
|
||||||
return "bset past end of btree node";
|
btree_node_error(c, b, ptr, "bset past end of btree node");
|
||||||
|
i->u64s = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (i != &b->data->keys && !i->u64s)
|
if (b->written && !i->u64s)
|
||||||
btree_node_error(b, c, ptr, "empty set");
|
btree_node_error(c, b, ptr, "empty set");
|
||||||
|
|
||||||
if (!BSET_SEPARATE_WHITEOUTS(i)) {
|
if (!BSET_SEPARATE_WHITEOUTS(i)) {
|
||||||
seen_non_whiteout = true;
|
seen_non_whiteout = true;
|
||||||
@ -911,7 +936,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
|||||||
const char *invalid;
|
const char *invalid;
|
||||||
|
|
||||||
if (!k->u64s) {
|
if (!k->u64s) {
|
||||||
btree_node_error(b, c, ptr,
|
btree_node_error(c, b, ptr,
|
||||||
"KEY_U64s 0: %zu bytes of metadata lost",
|
"KEY_U64s 0: %zu bytes of metadata lost",
|
||||||
vstruct_end(i) - (void *) k);
|
vstruct_end(i) - (void *) k);
|
||||||
|
|
||||||
@ -920,7 +945,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (bkey_next(k) > vstruct_last(i)) {
|
if (bkey_next(k) > vstruct_last(i)) {
|
||||||
btree_node_error(b, c, ptr,
|
btree_node_error(c, b, ptr,
|
||||||
"key extends past end of bset");
|
"key extends past end of bset");
|
||||||
|
|
||||||
i->u64s = cpu_to_le16((u64 *) k - i->_data);
|
i->u64s = cpu_to_le16((u64 *) k - i->_data);
|
||||||
@ -928,7 +953,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (k->format > KEY_FORMAT_CURRENT) {
|
if (k->format > KEY_FORMAT_CURRENT) {
|
||||||
btree_node_error(b, c, ptr,
|
btree_node_error(c, b, ptr,
|
||||||
"invalid bkey format %u", k->format);
|
"invalid bkey format %u", k->format);
|
||||||
|
|
||||||
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
||||||
@ -947,8 +972,8 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
|||||||
char buf[160];
|
char buf[160];
|
||||||
|
|
||||||
bch2_bkey_val_to_text(c, btree_node_type(b),
|
bch2_bkey_val_to_text(c, btree_node_type(b),
|
||||||
buf, sizeof(buf), u);
|
buf, sizeof(buf), u);
|
||||||
btree_node_error(b, c, ptr,
|
btree_node_error(c, b, ptr,
|
||||||
"invalid bkey %s: %s", buf, invalid);
|
"invalid bkey %s: %s", buf, invalid);
|
||||||
|
|
||||||
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
|
||||||
@ -969,7 +994,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
|||||||
*whiteout_u64s = k->_data - i->_data;
|
*whiteout_u64s = k->_data - i->_data;
|
||||||
seen_non_whiteout = true;
|
seen_non_whiteout = true;
|
||||||
} else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) {
|
} else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) {
|
||||||
btree_node_error(b, c, ptr,
|
btree_node_error(c, b, ptr,
|
||||||
"keys out of order: %llu:%llu > %llu:%llu",
|
"keys out of order: %llu:%llu > %llu:%llu",
|
||||||
prev_pos.inode,
|
prev_pos.inode,
|
||||||
prev_pos.offset,
|
prev_pos.offset,
|
||||||
@ -984,7 +1009,8 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b,
|
|||||||
}
|
}
|
||||||
|
|
||||||
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
|
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
|
||||||
return NULL;
|
fsck_err:
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool extent_contains_ptr(struct bkey_s_c_extent e,
|
static bool extent_contains_ptr(struct bkey_s_c_extent e,
|
||||||
@ -1012,7 +1038,7 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
|
|||||||
const char *err;
|
const char *err;
|
||||||
struct bch_csum csum;
|
struct bch_csum csum;
|
||||||
struct nonce nonce;
|
struct nonce nonce;
|
||||||
int ret;
|
int ret, write = READ;
|
||||||
|
|
||||||
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
|
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
|
||||||
__bch2_btree_node_iter_init(iter, btree_node_is_extents(b));
|
__bch2_btree_node_iter_init(iter, btree_node_is_extents(b));
|
||||||
@ -1115,9 +1141,10 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b,
|
|||||||
sectors = vstruct_sectors(bne, c->block_bits);
|
sectors = vstruct_sectors(bne, c->block_bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = validate_bset(c, b, ca, ptr, i, sectors, &whiteout_u64s);
|
ret = validate_bset(c, b, ptr, i, sectors,
|
||||||
if (err)
|
&whiteout_u64s, READ);
|
||||||
goto err;
|
if (ret)
|
||||||
|
goto fsck_err;
|
||||||
|
|
||||||
b->written += sectors;
|
b->written += sectors;
|
||||||
|
|
||||||
@ -1172,8 +1199,10 @@ out:
|
|||||||
mempool_free(iter, &c->fill_iter);
|
mempool_free(iter, &c->fill_iter);
|
||||||
return;
|
return;
|
||||||
err:
|
err:
|
||||||
|
btree_node_error(c, b, ptr, "%s", err);
|
||||||
|
fsck_err:
|
||||||
|
bch2_inconsistent_error(c);
|
||||||
set_btree_node_read_error(b);
|
set_btree_node_read_error(b);
|
||||||
btree_node_error(b, c, ptr, "%s", err);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1309,6 +1338,23 @@ static void btree_node_write_endio(struct bio *bio)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||||
|
struct bset *i, unsigned sectors)
|
||||||
|
{
|
||||||
|
const struct bch_extent_ptr *ptr;
|
||||||
|
unsigned whiteout_u64s = 0;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr)
|
||||||
|
break;
|
||||||
|
|
||||||
|
ret = validate_bset(c, b, ptr, i, sectors, &whiteout_u64s, WRITE);
|
||||||
|
if (ret)
|
||||||
|
bch2_fatal_error(c);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||||
struct closure *parent,
|
struct closure *parent,
|
||||||
enum six_lock_type lock_type_held)
|
enum six_lock_type lock_type_held)
|
||||||
@ -1343,18 +1389,24 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
if (!(old & (1 << BTREE_NODE_dirty)))
|
if (!(old & (1 << BTREE_NODE_dirty)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (b->written &&
|
||||||
|
!btree_node_may_write(b))
|
||||||
|
return;
|
||||||
|
|
||||||
if (old & (1 << BTREE_NODE_write_in_flight)) {
|
if (old & (1 << BTREE_NODE_write_in_flight)) {
|
||||||
btree_node_wait_on_io(b);
|
btree_node_wait_on_io(b);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
new &= ~(1 << BTREE_NODE_dirty);
|
new &= ~(1 << BTREE_NODE_dirty);
|
||||||
|
new &= ~(1 << BTREE_NODE_need_write);
|
||||||
new |= (1 << BTREE_NODE_write_in_flight);
|
new |= (1 << BTREE_NODE_write_in_flight);
|
||||||
new |= (1 << BTREE_NODE_just_written);
|
new |= (1 << BTREE_NODE_just_written);
|
||||||
new ^= (1 << BTREE_NODE_write_idx);
|
new ^= (1 << BTREE_NODE_write_idx);
|
||||||
} while (cmpxchg_acquire(&b->flags, old, new) != old);
|
} while (cmpxchg_acquire(&b->flags, old, new) != old);
|
||||||
|
|
||||||
BUG_ON(!list_empty(&b->write_blocked));
|
BUG_ON(!list_empty(&b->write_blocked));
|
||||||
|
BUG_ON(!list_empty_careful(&b->reachable) != !b->written);
|
||||||
|
|
||||||
BUG_ON(b->written >= c->sb.btree_node_size);
|
BUG_ON(b->written >= c->sb.btree_node_size);
|
||||||
BUG_ON(bset_written(b, btree_bset_last(b)));
|
BUG_ON(bset_written(b, btree_bset_last(b)));
|
||||||
@ -1430,13 +1482,17 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
|
|
||||||
clear_needs_whiteout(i);
|
clear_needs_whiteout(i);
|
||||||
|
|
||||||
if (b->written && !i->u64s) {
|
/* do we have data to write? */
|
||||||
/* Nothing to write: */
|
if (b->written && !i->u64s)
|
||||||
btree_bounce_free(c, order, used_mempool, data);
|
goto nowrite;
|
||||||
btree_node_write_done(c, b);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
bytes_to_write = vstruct_end(i) - data;
|
||||||
|
sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
|
||||||
|
|
||||||
|
memset(data + bytes_to_write, 0,
|
||||||
|
(sectors_to_write << 9) - bytes_to_write);
|
||||||
|
|
||||||
|
BUG_ON(b->written + sectors_to_write > c->sb.btree_node_size);
|
||||||
BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
|
BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN);
|
||||||
BUG_ON(i->seq != b->data->keys.seq);
|
BUG_ON(i->seq != b->data->keys.seq);
|
||||||
|
|
||||||
@ -1445,6 +1501,11 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
|
|
||||||
nonce = btree_nonce(b, i, b->written << 9);
|
nonce = btree_nonce(b, i, b->written << 9);
|
||||||
|
|
||||||
|
/* if we're going to be encrypting, check metadata validity first: */
|
||||||
|
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
|
||||||
|
validate_bset_for_write(c, b, i, sectors_to_write))
|
||||||
|
goto err;
|
||||||
|
|
||||||
if (bn) {
|
if (bn) {
|
||||||
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
|
bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce,
|
||||||
&bn->flags,
|
&bn->flags,
|
||||||
@ -1464,15 +1525,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
|
||||||
}
|
}
|
||||||
|
|
||||||
bytes_to_write = vstruct_end(i) - data;
|
/* if we're not encrypting, check metadata after checksumming: */
|
||||||
sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
|
if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) &&
|
||||||
|
validate_bset_for_write(c, b, i, sectors_to_write))
|
||||||
memset(data + bytes_to_write, 0,
|
goto err;
|
||||||
(sectors_to_write << 9) - bytes_to_write);
|
|
||||||
|
|
||||||
BUG_ON(b->written + sectors_to_write > c->sb.btree_node_size);
|
|
||||||
|
|
||||||
trace_btree_write(b, bytes_to_write, sectors_to_write);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We handle btree write errors by immediately halting the journal -
|
* We handle btree write errors by immediately halting the journal -
|
||||||
@ -1488,14 +1544,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
* break:
|
* break:
|
||||||
*/
|
*/
|
||||||
if (bch2_journal_error(&c->journal) ||
|
if (bch2_journal_error(&c->journal) ||
|
||||||
c->opts.nochanges) {
|
c->opts.nochanges)
|
||||||
set_btree_node_noevict(b);
|
goto err;
|
||||||
b->written += sectors_to_write;
|
|
||||||
|
|
||||||
btree_bounce_free(c, order, used_mempool, data);
|
trace_btree_write(b, bytes_to_write, sectors_to_write);
|
||||||
btree_node_write_done(c, b);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
bio = bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write);
|
bio = bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write);
|
||||||
|
|
||||||
@ -1543,6 +1595,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
b->written += sectors_to_write;
|
b->written += sectors_to_write;
|
||||||
|
|
||||||
bch2_submit_wbio_replicas(wbio, c, &k.key);
|
bch2_submit_wbio_replicas(wbio, c, &k.key);
|
||||||
|
return;
|
||||||
|
err:
|
||||||
|
set_btree_node_noevict(b);
|
||||||
|
b->written += sectors_to_write;
|
||||||
|
nowrite:
|
||||||
|
btree_bounce_free(c, order, used_mempool, data);
|
||||||
|
btree_node_write_done(c, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -27,7 +27,8 @@ static inline void btree_node_wait_on_io(struct btree *b)
|
|||||||
|
|
||||||
static inline bool btree_node_may_write(struct btree *b)
|
static inline bool btree_node_may_write(struct btree *b)
|
||||||
{
|
{
|
||||||
return list_empty_careful(&b->write_blocked);
|
return list_empty_careful(&b->write_blocked) &&
|
||||||
|
list_empty_careful(&b->reachable);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum compact_mode {
|
enum compact_mode {
|
||||||
@ -80,6 +81,8 @@ void bch2_btree_node_write(struct bch_fs *, struct btree *,
|
|||||||
#define bch2_btree_node_write_dirty(_c, _b, _cl, cond) \
|
#define bch2_btree_node_write_dirty(_c, _b, _cl, cond) \
|
||||||
do { \
|
do { \
|
||||||
while ((_b)->written && btree_node_dirty(_b) && (cond)) { \
|
while ((_b)->written && btree_node_dirty(_b) && (cond)) { \
|
||||||
|
set_btree_node_need_write(_b); \
|
||||||
|
\
|
||||||
if (!btree_node_may_write(_b)) \
|
if (!btree_node_may_write(_b)) \
|
||||||
break; \
|
break; \
|
||||||
\
|
\
|
||||||
|
@ -1109,6 +1109,26 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c,
|
|||||||
prefetch(c->btree_roots[btree_id].b);
|
prefetch(c->btree_roots[btree_id].b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bch2_btree_iter_unlink(struct btree_iter *iter)
|
||||||
|
{
|
||||||
|
struct btree_iter *linked;
|
||||||
|
|
||||||
|
__bch2_btree_iter_unlock(iter);
|
||||||
|
|
||||||
|
if (!btree_iter_linked(iter))
|
||||||
|
return;
|
||||||
|
|
||||||
|
for_each_linked_btree_iter(iter, linked) {
|
||||||
|
|
||||||
|
if (linked->next == iter) {
|
||||||
|
linked->next = iter->next;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new)
|
void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new)
|
||||||
{
|
{
|
||||||
BUG_ON(btree_iter_linked(new));
|
BUG_ON(btree_iter_linked(new));
|
||||||
@ -1128,7 +1148,7 @@ void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new)
|
|||||||
|
|
||||||
void bch2_btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
|
void bch2_btree_iter_copy(struct btree_iter *dst, struct btree_iter *src)
|
||||||
{
|
{
|
||||||
bch2_btree_iter_unlock(dst);
|
__bch2_btree_iter_unlock(dst);
|
||||||
memcpy(dst, src, offsetof(struct btree_iter, next));
|
memcpy(dst, src, offsetof(struct btree_iter, next));
|
||||||
dst->nodes_locked = dst->nodes_intent_locked = 0;
|
dst->nodes_locked = dst->nodes_intent_locked = 0;
|
||||||
}
|
}
|
||||||
|
@ -185,6 +185,7 @@ static inline void bch2_btree_iter_init_intent(struct btree_iter *iter,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void bch2_btree_iter_link(struct btree_iter *, struct btree_iter *);
|
void bch2_btree_iter_link(struct btree_iter *, struct btree_iter *);
|
||||||
|
void bch2_btree_iter_unlink(struct btree_iter *);
|
||||||
void bch2_btree_iter_copy(struct btree_iter *, struct btree_iter *);
|
void bch2_btree_iter_copy(struct btree_iter *, struct btree_iter *);
|
||||||
|
|
||||||
static inline struct bpos btree_type_successor(enum btree_id id,
|
static inline struct bpos btree_type_successor(enum btree_id id,
|
||||||
|
@ -110,6 +110,14 @@ struct btree {
|
|||||||
*/
|
*/
|
||||||
struct list_head write_blocked;
|
struct list_head write_blocked;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Also for asynchronous splits/interior node updates:
|
||||||
|
* If a btree node isn't reachable yet, we don't want to kick off
|
||||||
|
* another write - because that write also won't yet be reachable and
|
||||||
|
* marking it as completed before it's reachable would be incorrect:
|
||||||
|
*/
|
||||||
|
struct list_head reachable;
|
||||||
|
|
||||||
struct open_bucket *ob;
|
struct open_bucket *ob;
|
||||||
|
|
||||||
/* lru list */
|
/* lru list */
|
||||||
@ -136,6 +144,7 @@ enum btree_flags {
|
|||||||
BTREE_NODE_read_error,
|
BTREE_NODE_read_error,
|
||||||
BTREE_NODE_write_error,
|
BTREE_NODE_write_error,
|
||||||
BTREE_NODE_dirty,
|
BTREE_NODE_dirty,
|
||||||
|
BTREE_NODE_need_write,
|
||||||
BTREE_NODE_noevict,
|
BTREE_NODE_noevict,
|
||||||
BTREE_NODE_write_idx,
|
BTREE_NODE_write_idx,
|
||||||
BTREE_NODE_accessed,
|
BTREE_NODE_accessed,
|
||||||
@ -146,6 +155,7 @@ enum btree_flags {
|
|||||||
BTREE_FLAG(read_error);
|
BTREE_FLAG(read_error);
|
||||||
BTREE_FLAG(write_error);
|
BTREE_FLAG(write_error);
|
||||||
BTREE_FLAG(dirty);
|
BTREE_FLAG(dirty);
|
||||||
|
BTREE_FLAG(need_write);
|
||||||
BTREE_FLAG(noevict);
|
BTREE_FLAG(noevict);
|
||||||
BTREE_FLAG(write_idx);
|
BTREE_FLAG(write_idx);
|
||||||
BTREE_FLAG(accessed);
|
BTREE_FLAG(accessed);
|
||||||
|
@ -162,9 +162,11 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b,
|
|||||||
trace_btree_node_free(c, b);
|
trace_btree_node_free(c, b);
|
||||||
|
|
||||||
BUG_ON(btree_node_dirty(b));
|
BUG_ON(btree_node_dirty(b));
|
||||||
|
BUG_ON(btree_node_need_write(b));
|
||||||
BUG_ON(b == btree_node_root(c, b));
|
BUG_ON(b == btree_node_root(c, b));
|
||||||
BUG_ON(b->ob);
|
BUG_ON(b->ob);
|
||||||
BUG_ON(!list_empty(&b->write_blocked));
|
BUG_ON(!list_empty(&b->write_blocked));
|
||||||
|
BUG_ON(!list_empty(&b->reachable));
|
||||||
|
|
||||||
clear_btree_node_noevict(b);
|
clear_btree_node_noevict(b);
|
||||||
|
|
||||||
@ -589,7 +591,6 @@ struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
|
|||||||
unsigned nr_nodes = btree_reserve_required_nodes(depth) + extra_nodes;
|
unsigned nr_nodes = btree_reserve_required_nodes(depth) + extra_nodes;
|
||||||
|
|
||||||
return __bch2_btree_reserve_get(c, nr_nodes, flags, cl);
|
return __bch2_btree_reserve_get(c, nr_nodes, flags, cl);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
||||||
@ -598,6 +599,7 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
|||||||
struct closure cl;
|
struct closure cl;
|
||||||
struct btree_reserve *reserve;
|
struct btree_reserve *reserve;
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
|
LIST_HEAD(reachable_list);
|
||||||
|
|
||||||
closure_init_stack(&cl);
|
closure_init_stack(&cl);
|
||||||
|
|
||||||
@ -614,11 +616,14 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
|||||||
}
|
}
|
||||||
|
|
||||||
b = __btree_root_alloc(c, 0, id, reserve);
|
b = __btree_root_alloc(c, 0, id, reserve);
|
||||||
|
list_add(&b->reachable, &reachable_list);
|
||||||
|
|
||||||
bch2_btree_node_write(c, b, writes, SIX_LOCK_intent);
|
bch2_btree_node_write(c, b, writes, SIX_LOCK_intent);
|
||||||
|
|
||||||
bch2_btree_set_root_initial(c, b, reserve);
|
bch2_btree_set_root_initial(c, b, reserve);
|
||||||
bch2_btree_open_bucket_put(c, b);
|
bch2_btree_open_bucket_put(c, b);
|
||||||
|
|
||||||
|
list_del_init(&b->reachable);
|
||||||
six_unlock_intent(&b->lock);
|
six_unlock_intent(&b->lock);
|
||||||
|
|
||||||
bch2_btree_reserve_put(c, reserve);
|
bch2_btree_reserve_put(c, reserve);
|
||||||
@ -659,6 +664,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_iter *iter,
|
|||||||
|
|
||||||
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
|
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
|
||||||
set_btree_node_dirty(b);
|
set_btree_node_dirty(b);
|
||||||
|
set_btree_node_need_write(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Inserting into a given leaf node (last stage of insert): */
|
/* Inserting into a given leaf node (last stage of insert): */
|
||||||
@ -798,12 +804,6 @@ void bch2_btree_journal_key(struct btree_insert *trans,
|
|||||||
u64 seq = trans->journal_res.seq;
|
u64 seq = trans->journal_res.seq;
|
||||||
bool needs_whiteout = insert->k.needs_whiteout;
|
bool needs_whiteout = insert->k.needs_whiteout;
|
||||||
|
|
||||||
/*
|
|
||||||
* have a bug where we're seeing an extent with an invalid crc
|
|
||||||
* entry in the journal, trying to track it down:
|
|
||||||
*/
|
|
||||||
BUG_ON(bch2_bkey_invalid(c, b->btree_id, bkey_i_to_s_c(insert)));
|
|
||||||
|
|
||||||
/* ick */
|
/* ick */
|
||||||
insert->k.needs_whiteout = false;
|
insert->k.needs_whiteout = false;
|
||||||
bch2_journal_add_keys(j, &trans->journal_res,
|
bch2_journal_add_keys(j, &trans->journal_res,
|
||||||
@ -878,6 +878,8 @@ bch2_btree_interior_update_alloc(struct bch_fs *c)
|
|||||||
closure_init(&as->cl, &c->cl);
|
closure_init(&as->cl, &c->cl);
|
||||||
as->c = c;
|
as->c = c;
|
||||||
as->mode = BTREE_INTERIOR_NO_UPDATE;
|
as->mode = BTREE_INTERIOR_NO_UPDATE;
|
||||||
|
INIT_LIST_HEAD(&as->write_blocked_list);
|
||||||
|
INIT_LIST_HEAD(&as->reachable_list);
|
||||||
|
|
||||||
bch2_keylist_init(&as->parent_keys, as->inline_keys,
|
bch2_keylist_init(&as->parent_keys, as->inline_keys,
|
||||||
ARRAY_SIZE(as->inline_keys));
|
ARRAY_SIZE(as->inline_keys));
|
||||||
@ -908,6 +910,18 @@ static void btree_interior_update_nodes_reachable(struct closure *cl)
|
|||||||
|
|
||||||
mutex_lock(&c->btree_interior_update_lock);
|
mutex_lock(&c->btree_interior_update_lock);
|
||||||
|
|
||||||
|
while (!list_empty(&as->reachable_list)) {
|
||||||
|
struct btree *b = list_first_entry(&as->reachable_list,
|
||||||
|
struct btree, reachable);
|
||||||
|
list_del_init(&b->reachable);
|
||||||
|
mutex_unlock(&c->btree_interior_update_lock);
|
||||||
|
|
||||||
|
six_lock_read(&b->lock);
|
||||||
|
bch2_btree_node_write_dirty(c, b, NULL, btree_node_need_write(b));
|
||||||
|
six_unlock_read(&b->lock);
|
||||||
|
mutex_lock(&c->btree_interior_update_lock);
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < as->nr_pending; i++)
|
for (i = 0; i < as->nr_pending; i++)
|
||||||
bch2_btree_node_free_ondisk(c, &as->pending[i]);
|
bch2_btree_node_free_ondisk(c, &as->pending[i]);
|
||||||
as->nr_pending = 0;
|
as->nr_pending = 0;
|
||||||
@ -929,6 +943,7 @@ static void btree_interior_update_nodes_written(struct closure *cl)
|
|||||||
|
|
||||||
if (bch2_journal_error(&c->journal)) {
|
if (bch2_journal_error(&c->journal)) {
|
||||||
/* XXX what? */
|
/* XXX what? */
|
||||||
|
/* we don't want to free the nodes on disk, that's what */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX: missing error handling, damnit */
|
/* XXX: missing error handling, damnit */
|
||||||
@ -962,7 +977,8 @@ retry:
|
|||||||
list_del(&as->write_blocked_list);
|
list_del(&as->write_blocked_list);
|
||||||
mutex_unlock(&c->btree_interior_update_lock);
|
mutex_unlock(&c->btree_interior_update_lock);
|
||||||
|
|
||||||
bch2_btree_node_write_dirty(c, b, NULL, true);
|
bch2_btree_node_write_dirty(c, b, NULL,
|
||||||
|
btree_node_need_write(b));
|
||||||
six_unlock_read(&b->lock);
|
six_unlock_read(&b->lock);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -1135,6 +1151,7 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
|||||||
}
|
}
|
||||||
|
|
||||||
clear_btree_node_dirty(b);
|
clear_btree_node_dirty(b);
|
||||||
|
clear_btree_node_need_write(b);
|
||||||
w = btree_current_write(b);
|
w = btree_current_write(b);
|
||||||
|
|
||||||
llist_for_each_entry_safe(cl, cl_n, llist_del_all(&w->wait.list), list)
|
llist_for_each_entry_safe(cl, cl_n, llist_del_all(&w->wait.list), list)
|
||||||
@ -1152,6 +1169,8 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c,
|
|||||||
&as->journal, interior_update_flush);
|
&as->journal, interior_update_flush);
|
||||||
bch2_journal_pin_drop(&c->journal, &w->journal);
|
bch2_journal_pin_drop(&c->journal, &w->journal);
|
||||||
|
|
||||||
|
if (!list_empty(&b->reachable))
|
||||||
|
list_del_init(&b->reachable);
|
||||||
|
|
||||||
mutex_unlock(&c->btree_interior_update_lock);
|
mutex_unlock(&c->btree_interior_update_lock);
|
||||||
}
|
}
|
||||||
@ -1265,7 +1284,8 @@ bch2_btree_insert_keys_interior(struct btree *b,
|
|||||||
* node)
|
* node)
|
||||||
*/
|
*/
|
||||||
static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n1,
|
static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n1,
|
||||||
struct btree_reserve *reserve)
|
struct btree_reserve *reserve,
|
||||||
|
struct btree_interior_update *as)
|
||||||
{
|
{
|
||||||
size_t nr_packed = 0, nr_unpacked = 0;
|
size_t nr_packed = 0, nr_unpacked = 0;
|
||||||
struct btree *n2;
|
struct btree *n2;
|
||||||
@ -1273,6 +1293,8 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
|
|||||||
struct bkey_packed *k, *prev = NULL;
|
struct bkey_packed *k, *prev = NULL;
|
||||||
|
|
||||||
n2 = bch2_btree_node_alloc(iter->c, n1->level, iter->btree_id, reserve);
|
n2 = bch2_btree_node_alloc(iter->c, n1->level, iter->btree_id, reserve);
|
||||||
|
list_add(&n2->reachable, &as->reachable_list);
|
||||||
|
|
||||||
n2->data->max_key = n1->data->max_key;
|
n2->data->max_key = n1->data->max_key;
|
||||||
n2->data->format = n1->format;
|
n2->data->format = n1->format;
|
||||||
n2->key.k.p = n1->key.k.p;
|
n2->key.k.p = n1->key.k.p;
|
||||||
@ -1421,13 +1443,15 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
|
|||||||
bch2_btree_interior_update_will_free_node(c, as, b);
|
bch2_btree_interior_update_will_free_node(c, as, b);
|
||||||
|
|
||||||
n1 = bch2_btree_node_alloc_replacement(c, b, reserve);
|
n1 = bch2_btree_node_alloc_replacement(c, b, reserve);
|
||||||
|
list_add(&n1->reachable, &as->reachable_list);
|
||||||
|
|
||||||
if (b->level)
|
if (b->level)
|
||||||
btree_split_insert_keys(iter, n1, insert_keys, reserve);
|
btree_split_insert_keys(iter, n1, insert_keys, reserve);
|
||||||
|
|
||||||
if (vstruct_blocks(n1->data, c->block_bits) > BTREE_SPLIT_THRESHOLD(c)) {
|
if (vstruct_blocks(n1->data, c->block_bits) > BTREE_SPLIT_THRESHOLD(c)) {
|
||||||
trace_btree_node_split(c, b, b->nr.live_u64s);
|
trace_btree_node_split(c, b, b->nr.live_u64s);
|
||||||
|
|
||||||
n2 = __btree_split_node(iter, n1, reserve);
|
n2 = __btree_split_node(iter, n1, reserve, as);
|
||||||
|
|
||||||
bch2_btree_build_aux_trees(n2);
|
bch2_btree_build_aux_trees(n2);
|
||||||
bch2_btree_build_aux_trees(n1);
|
bch2_btree_build_aux_trees(n1);
|
||||||
@ -1449,6 +1473,8 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
|
|||||||
n3 = __btree_root_alloc(c, b->level + 1,
|
n3 = __btree_root_alloc(c, b->level + 1,
|
||||||
iter->btree_id,
|
iter->btree_id,
|
||||||
reserve);
|
reserve);
|
||||||
|
list_add(&n3->reachable, &as->reachable_list);
|
||||||
|
|
||||||
n3->sib_u64s[0] = U16_MAX;
|
n3->sib_u64s[0] = U16_MAX;
|
||||||
n3->sib_u64s[1] = U16_MAX;
|
n3->sib_u64s[1] = U16_MAX;
|
||||||
|
|
||||||
@ -1748,6 +1774,8 @@ retry:
|
|||||||
bch2_btree_interior_update_will_free_node(c, as, m);
|
bch2_btree_interior_update_will_free_node(c, as, m);
|
||||||
|
|
||||||
n = bch2_btree_node_alloc(c, b->level, b->btree_id, reserve);
|
n = bch2_btree_node_alloc(c, b->level, b->btree_id, reserve);
|
||||||
|
list_add(&n->reachable, &as->reachable_list);
|
||||||
|
|
||||||
n->data->min_key = prev->data->min_key;
|
n->data->min_key = prev->data->min_key;
|
||||||
n->data->max_key = next->data->max_key;
|
n->data->max_key = next->data->max_key;
|
||||||
n->data->format = new_f;
|
n->data->format = new_f;
|
||||||
@ -1914,8 +1942,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
trans_for_each_entry(trans, i) {
|
trans_for_each_entry(trans, i) {
|
||||||
EBUG_ON(i->iter->level);
|
BUG_ON(i->iter->level);
|
||||||
EBUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
|
BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
|
||||||
}
|
}
|
||||||
|
|
||||||
sort(trans->entries, trans->nr, sizeof(trans->entries[0]),
|
sort(trans->entries, trans->nr, sizeof(trans->entries[0]),
|
||||||
@ -2076,6 +2104,19 @@ err:
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags)
|
||||||
|
{
|
||||||
|
struct bkey_i k;
|
||||||
|
|
||||||
|
bkey_init(&k.k);
|
||||||
|
k.k.p = iter->pos;
|
||||||
|
|
||||||
|
return bch2_btree_insert_at(iter->c, NULL, NULL, NULL,
|
||||||
|
BTREE_INSERT_NOFAIL|
|
||||||
|
BTREE_INSERT_USE_RESERVE|flags,
|
||||||
|
BTREE_INSERT_ENTRY(iter, &k));
|
||||||
|
}
|
||||||
|
|
||||||
int bch2_btree_insert_list_at(struct btree_iter *iter,
|
int bch2_btree_insert_list_at(struct btree_iter *iter,
|
||||||
struct keylist *keys,
|
struct keylist *keys,
|
||||||
struct disk_reservation *disk_res,
|
struct disk_reservation *disk_res,
|
||||||
@ -2104,45 +2145,6 @@ int bch2_btree_insert_list_at(struct btree_iter *iter,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* bch_btree_insert_check_key - insert dummy key into btree
|
|
||||||
*
|
|
||||||
* We insert a random key on a cache miss, then compare exchange on it
|
|
||||||
* once the cache promotion or backing device read completes. This
|
|
||||||
* ensures that if this key is written to after the read, the read will
|
|
||||||
* lose and not overwrite the key with stale data.
|
|
||||||
*
|
|
||||||
* Return values:
|
|
||||||
* -EAGAIN: @iter->cl was put on a waitlist waiting for btree node allocation
|
|
||||||
* -EINTR: btree node was changed while upgrading to write lock
|
|
||||||
*/
|
|
||||||
int bch2_btree_insert_check_key(struct btree_iter *iter,
|
|
||||||
struct bkey_i *check_key)
|
|
||||||
{
|
|
||||||
struct bpos saved_pos = iter->pos;
|
|
||||||
struct bkey_i_cookie *cookie;
|
|
||||||
BKEY_PADDED(key) tmp;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
BUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&check_key->k)));
|
|
||||||
|
|
||||||
check_key->k.type = KEY_TYPE_COOKIE;
|
|
||||||
set_bkey_val_bytes(&check_key->k, sizeof(struct bch_cookie));
|
|
||||||
|
|
||||||
cookie = bkey_i_to_cookie(check_key);
|
|
||||||
get_random_bytes(&cookie->v, sizeof(cookie->v));
|
|
||||||
|
|
||||||
bkey_copy(&tmp.key, check_key);
|
|
||||||
|
|
||||||
ret = bch2_btree_insert_at(iter->c, NULL, NULL, NULL,
|
|
||||||
BTREE_INSERT_ATOMIC,
|
|
||||||
BTREE_INSERT_ENTRY(iter, &tmp.key));
|
|
||||||
|
|
||||||
bch2_btree_iter_rewind(iter, saved_pos);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* bch_btree_insert - insert keys into the extent btree
|
* bch_btree_insert - insert keys into the extent btree
|
||||||
* @c: pointer to struct bch_fs
|
* @c: pointer to struct bch_fs
|
||||||
@ -2310,6 +2312,7 @@ int bch2_btree_node_rewrite(struct btree_iter *iter, struct btree *b,
|
|||||||
bch2_btree_interior_update_will_free_node(c, as, b);
|
bch2_btree_interior_update_will_free_node(c, as, b);
|
||||||
|
|
||||||
n = bch2_btree_node_alloc_replacement(c, b, reserve);
|
n = bch2_btree_node_alloc_replacement(c, b, reserve);
|
||||||
|
list_add(&n->reachable, &as->reachable_list);
|
||||||
|
|
||||||
bch2_btree_build_aux_trees(n);
|
bch2_btree_build_aux_trees(n);
|
||||||
six_unlock_write(&n->lock);
|
six_unlock_write(&n->lock);
|
||||||
|
@ -64,7 +64,7 @@ struct pending_btree_node_free {
|
|||||||
*/
|
*/
|
||||||
struct btree_interior_update {
|
struct btree_interior_update {
|
||||||
struct closure cl;
|
struct closure cl;
|
||||||
struct bch_fs *c;
|
struct bch_fs *c;
|
||||||
|
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
|
|
||||||
@ -86,6 +86,7 @@ struct btree_interior_update {
|
|||||||
*/
|
*/
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
struct list_head write_blocked_list;
|
struct list_head write_blocked_list;
|
||||||
|
struct list_head reachable_list;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BTREE_INTERIOR_UPDATING_AS: btree node we updated was freed, so now
|
* BTREE_INTERIOR_UPDATING_AS: btree node we updated was freed, so now
|
||||||
@ -317,7 +318,6 @@ struct btree_insert {
|
|||||||
|
|
||||||
int __bch2_btree_insert_at(struct btree_insert *);
|
int __bch2_btree_insert_at(struct btree_insert *);
|
||||||
|
|
||||||
|
|
||||||
#define _TENTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N
|
#define _TENTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N
|
||||||
#define COUNT_ARGS(...) _TENTH_ARG(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
|
#define COUNT_ARGS(...) _TENTH_ARG(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
|
||||||
|
|
||||||
@ -380,6 +380,8 @@ int __bch2_btree_insert_at(struct btree_insert *);
|
|||||||
*/
|
*/
|
||||||
#define BTREE_INSERT_JOURNAL_REPLAY (1 << 3)
|
#define BTREE_INSERT_JOURNAL_REPLAY (1 << 3)
|
||||||
|
|
||||||
|
int bch2_btree_delete_at(struct btree_iter *, unsigned);
|
||||||
|
|
||||||
int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *,
|
int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *,
|
||||||
struct disk_reservation *,
|
struct disk_reservation *,
|
||||||
struct extent_insert_hook *, u64 *, unsigned);
|
struct extent_insert_hook *, u64 *, unsigned);
|
||||||
@ -403,7 +405,6 @@ static inline bool journal_res_insert_fits(struct btree_insert *trans,
|
|||||||
return u64s <= trans->journal_res.u64s;
|
return u64s <= trans->journal_res.u64s;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_btree_insert_check_key(struct btree_iter *, struct bkey_i *);
|
|
||||||
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
|
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
|
||||||
struct disk_reservation *,
|
struct disk_reservation *,
|
||||||
struct extent_insert_hook *, u64 *, int flags);
|
struct extent_insert_hook *, u64 *, int flags);
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "extents.h"
|
#include "extents.h"
|
||||||
#include "fs-gc.h"
|
#include "fsck.h"
|
||||||
#include "inode.h"
|
#include "inode.h"
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "super.h"
|
#include "super.h"
|
||||||
|
@ -20,6 +20,11 @@ unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
|
|||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned dirent_val_u64s(unsigned len)
|
||||||
|
{
|
||||||
|
return DIV_ROUND_UP(sizeof(struct bch_dirent) + len, sizeof(u64));
|
||||||
|
}
|
||||||
|
|
||||||
static u64 bch2_dirent_hash(const struct bch_hash_info *info,
|
static u64 bch2_dirent_hash(const struct bch_hash_info *info,
|
||||||
const struct qstr *name)
|
const struct qstr *name)
|
||||||
{
|
{
|
||||||
@ -64,7 +69,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
|
|||||||
return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
|
return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct bch_hash_desc dirent_hash_desc = {
|
const struct bch_hash_desc bch2_dirent_hash_desc = {
|
||||||
.btree_id = BTREE_ID_DIRENTS,
|
.btree_id = BTREE_ID_DIRENTS,
|
||||||
.key_type = BCH_DIRENT,
|
.key_type = BCH_DIRENT,
|
||||||
.whiteout_type = BCH_DIRENT_WHITEOUT,
|
.whiteout_type = BCH_DIRENT_WHITEOUT,
|
||||||
@ -77,12 +82,30 @@ static const struct bch_hash_desc dirent_hash_desc = {
|
|||||||
static const char *bch2_dirent_invalid(const struct bch_fs *c,
|
static const char *bch2_dirent_invalid(const struct bch_fs *c,
|
||||||
struct bkey_s_c k)
|
struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
|
struct bkey_s_c_dirent d;
|
||||||
|
unsigned len;
|
||||||
|
|
||||||
switch (k.k->type) {
|
switch (k.k->type) {
|
||||||
case BCH_DIRENT:
|
case BCH_DIRENT:
|
||||||
return bkey_val_bytes(k.k) < sizeof(struct bch_dirent)
|
if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
|
||||||
? "value too small"
|
return "value too small";
|
||||||
: NULL;
|
|
||||||
|
|
||||||
|
d = bkey_s_c_to_dirent(k);
|
||||||
|
len = bch2_dirent_name_bytes(d);
|
||||||
|
|
||||||
|
if (!len)
|
||||||
|
return "empty name";
|
||||||
|
|
||||||
|
if (bkey_val_u64s(k.k) > dirent_val_u64s(len))
|
||||||
|
return "value too big";
|
||||||
|
|
||||||
|
if (len > NAME_MAX)
|
||||||
|
return "dirent name too big";
|
||||||
|
|
||||||
|
if (memchr(d.v->d_name, '/', len))
|
||||||
|
return "dirent name has invalid characters";
|
||||||
|
|
||||||
|
return NULL;
|
||||||
case BCH_DIRENT_WHITEOUT:
|
case BCH_DIRENT_WHITEOUT:
|
||||||
return bkey_val_bytes(k.k) != 0
|
return bkey_val_bytes(k.k) != 0
|
||||||
? "value size should be zero"
|
? "value size should be zero"
|
||||||
@ -97,21 +120,15 @@ static void bch2_dirent_to_text(struct bch_fs *c, char *buf,
|
|||||||
size_t size, struct bkey_s_c k)
|
size_t size, struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
struct bkey_s_c_dirent d;
|
struct bkey_s_c_dirent d;
|
||||||
|
size_t n = 0;
|
||||||
|
|
||||||
switch (k.k->type) {
|
switch (k.k->type) {
|
||||||
case BCH_DIRENT:
|
case BCH_DIRENT:
|
||||||
d = bkey_s_c_to_dirent(k);
|
d = bkey_s_c_to_dirent(k);
|
||||||
|
|
||||||
if (size) {
|
n += bch_scnmemcpy(buf + n, size - n, d.v->d_name,
|
||||||
unsigned n = min_t(unsigned, size,
|
bch2_dirent_name_bytes(d));
|
||||||
bch2_dirent_name_bytes(d));
|
n += scnprintf(buf + n, size - n, " -> %llu", d.v->d_inum);
|
||||||
memcpy(buf, d.v->d_name, n);
|
|
||||||
buf[size - 1] = '\0';
|
|
||||||
buf += n;
|
|
||||||
size -= n;
|
|
||||||
}
|
|
||||||
|
|
||||||
scnprintf(buf, size, " -> %llu", d.v->d_inum);
|
|
||||||
break;
|
break;
|
||||||
case BCH_DIRENT_WHITEOUT:
|
case BCH_DIRENT_WHITEOUT:
|
||||||
scnprintf(buf, size, "whiteout");
|
scnprintf(buf, size, "whiteout");
|
||||||
@ -128,9 +145,7 @@ static struct bkey_i_dirent *dirent_create_key(u8 type,
|
|||||||
const struct qstr *name, u64 dst)
|
const struct qstr *name, u64 dst)
|
||||||
{
|
{
|
||||||
struct bkey_i_dirent *dirent;
|
struct bkey_i_dirent *dirent;
|
||||||
unsigned u64s = BKEY_U64s +
|
unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len);
|
||||||
DIV_ROUND_UP(sizeof(struct bch_dirent) + name->len,
|
|
||||||
sizeof(u64));
|
|
||||||
|
|
||||||
dirent = kmalloc(u64s * sizeof(u64), GFP_NOFS);
|
dirent = kmalloc(u64s * sizeof(u64), GFP_NOFS);
|
||||||
if (!dirent)
|
if (!dirent)
|
||||||
@ -163,7 +178,7 @@ int bch2_dirent_create(struct bch_fs *c, u64 dir_inum,
|
|||||||
if (!dirent)
|
if (!dirent)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
ret = bch2_hash_set(dirent_hash_desc, hash_info, c, dir_inum,
|
ret = bch2_hash_set(bch2_dirent_hash_desc, hash_info, c, dir_inum,
|
||||||
journal_seq, &dirent->k_i, flags);
|
journal_seq, &dirent->k_i, flags);
|
||||||
kfree(dirent);
|
kfree(dirent);
|
||||||
|
|
||||||
@ -223,13 +238,13 @@ retry:
|
|||||||
* from the original hashed position (like we do when creating dirents,
|
* from the original hashed position (like we do when creating dirents,
|
||||||
* in bch_hash_set) - we never move existing dirents to different slot:
|
* in bch_hash_set) - we never move existing dirents to different slot:
|
||||||
*/
|
*/
|
||||||
old_src = bch2_hash_lookup_at(dirent_hash_desc,
|
old_src = bch2_hash_lookup_at(bch2_dirent_hash_desc,
|
||||||
&src_ei->str_hash,
|
&src_ei->str_hash,
|
||||||
&src_iter, src_name);
|
&src_iter, src_name);
|
||||||
if ((ret = btree_iter_err(old_src)))
|
if ((ret = btree_iter_err(old_src)))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
ret = bch2_hash_needs_whiteout(dirent_hash_desc,
|
ret = bch2_hash_needs_whiteout(bch2_dirent_hash_desc,
|
||||||
&src_ei->str_hash,
|
&src_ei->str_hash,
|
||||||
&whiteout_iter, &src_iter);
|
&whiteout_iter, &src_iter);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
@ -242,8 +257,8 @@ retry:
|
|||||||
* to do that check for us for correctness:
|
* to do that check for us for correctness:
|
||||||
*/
|
*/
|
||||||
old_dst = mode == BCH_RENAME
|
old_dst = mode == BCH_RENAME
|
||||||
? bch2_hash_hole_at(dirent_hash_desc, &dst_iter)
|
? bch2_hash_hole_at(bch2_dirent_hash_desc, &dst_iter)
|
||||||
: bch2_hash_lookup_at(dirent_hash_desc,
|
: bch2_hash_lookup_at(bch2_dirent_hash_desc,
|
||||||
&dst_ei->str_hash,
|
&dst_ei->str_hash,
|
||||||
&dst_iter, dst_name);
|
&dst_iter, dst_name);
|
||||||
if ((ret = btree_iter_err(old_dst)))
|
if ((ret = btree_iter_err(old_dst)))
|
||||||
@ -330,7 +345,7 @@ int bch2_dirent_delete(struct bch_fs *c, u64 dir_inum,
|
|||||||
const struct qstr *name,
|
const struct qstr *name,
|
||||||
u64 *journal_seq)
|
u64 *journal_seq)
|
||||||
{
|
{
|
||||||
return bch2_hash_delete(dirent_hash_desc, hash_info,
|
return bch2_hash_delete(bch2_dirent_hash_desc, hash_info,
|
||||||
c, dir_inum, journal_seq, name);
|
c, dir_inum, journal_seq, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -342,7 +357,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
|
|||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
u64 inum;
|
u64 inum;
|
||||||
|
|
||||||
k = bch2_hash_lookup(dirent_hash_desc, hash_info, c,
|
k = bch2_hash_lookup(bch2_dirent_hash_desc, hash_info, c,
|
||||||
dir_inum, &iter, name);
|
dir_inum, &iter, name);
|
||||||
if (IS_ERR(k.k)) {
|
if (IS_ERR(k.k)) {
|
||||||
bch2_btree_iter_unlock(&iter);
|
bch2_btree_iter_unlock(&iter);
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
#ifndef _BCACHE_DIRENT_H
|
#ifndef _BCACHE_DIRENT_H
|
||||||
#define _BCACHE_DIRENT_H
|
#define _BCACHE_DIRENT_H
|
||||||
|
|
||||||
|
#include "str_hash.h"
|
||||||
|
|
||||||
|
extern const struct bch_hash_desc bch2_dirent_hash_desc;
|
||||||
extern const struct bkey_ops bch2_bkey_dirent_ops;
|
extern const struct bkey_ops bch2_bkey_dirent_ops;
|
||||||
|
|
||||||
struct qstr;
|
struct qstr;
|
||||||
|
@ -49,3 +49,102 @@ void bch2_nonfatal_io_error(struct bch_dev *ca)
|
|||||||
{
|
{
|
||||||
queue_work(system_long_wq, &ca->io_error_work);
|
queue_work(system_long_wq, &ca->io_error_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __KERNEL__
|
||||||
|
#define ask_yn() false
|
||||||
|
#else
|
||||||
|
#include "tools-util.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
|
||||||
|
const char *fmt, ...)
|
||||||
|
{
|
||||||
|
struct fsck_err_state *s;
|
||||||
|
va_list args;
|
||||||
|
bool fix = false, print = true, suppressing = false;
|
||||||
|
char _buf[sizeof(s->buf)], *buf = _buf;
|
||||||
|
|
||||||
|
mutex_lock(&c->fsck_error_lock);
|
||||||
|
|
||||||
|
if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
|
||||||
|
goto print;
|
||||||
|
|
||||||
|
list_for_each_entry(s, &c->fsck_errors, list)
|
||||||
|
if (s->fmt == fmt)
|
||||||
|
goto found;
|
||||||
|
|
||||||
|
s = kzalloc(sizeof(*s), GFP_KERNEL);
|
||||||
|
if (!s) {
|
||||||
|
if (!c->fsck_alloc_err)
|
||||||
|
bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
|
||||||
|
c->fsck_alloc_err = true;
|
||||||
|
buf = _buf;
|
||||||
|
goto print;
|
||||||
|
}
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&s->list);
|
||||||
|
s->fmt = fmt;
|
||||||
|
found:
|
||||||
|
list_move(&s->list, &c->fsck_errors);
|
||||||
|
s->nr++;
|
||||||
|
suppressing = s->nr == 10;
|
||||||
|
print = s->nr <= 10;
|
||||||
|
buf = s->buf;
|
||||||
|
print:
|
||||||
|
va_start(args, fmt);
|
||||||
|
vscnprintf(buf, sizeof(_buf), fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
|
||||||
|
if (flags & FSCK_CAN_FIX) {
|
||||||
|
if (c->opts.fix_errors == FSCK_ERR_ASK) {
|
||||||
|
printk(KERN_ERR "%s: fix?", buf);
|
||||||
|
fix = ask_yn();
|
||||||
|
} else if (c->opts.fix_errors == FSCK_ERR_YES ||
|
||||||
|
(c->opts.nochanges &&
|
||||||
|
!(flags & FSCK_CAN_IGNORE))) {
|
||||||
|
if (print)
|
||||||
|
bch_err(c, "%s, fixing", buf);
|
||||||
|
fix = true;
|
||||||
|
} else {
|
||||||
|
if (print)
|
||||||
|
bch_err(c, "%s, not fixing", buf);
|
||||||
|
fix = false;
|
||||||
|
}
|
||||||
|
} else if (flags & FSCK_NEED_FSCK) {
|
||||||
|
if (print)
|
||||||
|
bch_err(c, "%s (run fsck to correct)", buf);
|
||||||
|
} else {
|
||||||
|
if (print)
|
||||||
|
bch_err(c, "%s (repair unimplemented)", buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (suppressing)
|
||||||
|
bch_err(c, "Ratelimiting new instances of previous error");
|
||||||
|
|
||||||
|
mutex_unlock(&c->fsck_error_lock);
|
||||||
|
|
||||||
|
if (fix)
|
||||||
|
set_bit(BCH_FS_FSCK_FIXED_ERRORS, &c->flags);
|
||||||
|
|
||||||
|
return fix ? FSCK_ERR_FIX
|
||||||
|
: flags & FSCK_CAN_IGNORE ? FSCK_ERR_IGNORE
|
||||||
|
: FSCK_ERR_EXIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bch2_flush_fsck_errs(struct bch_fs *c)
|
||||||
|
{
|
||||||
|
struct fsck_err_state *s, *n;
|
||||||
|
|
||||||
|
mutex_lock(&c->fsck_error_lock);
|
||||||
|
set_bit(BCH_FS_FSCK_DONE, &c->flags);
|
||||||
|
|
||||||
|
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
|
||||||
|
if (s->nr > 10)
|
||||||
|
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf);
|
||||||
|
|
||||||
|
list_del(&s->list);
|
||||||
|
kfree(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&c->fsck_error_lock);
|
||||||
|
}
|
||||||
|
@ -95,62 +95,38 @@ enum {
|
|||||||
BCH_FSCK_UNKNOWN_VERSION = 4,
|
BCH_FSCK_UNKNOWN_VERSION = 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* These macros return true if error should be fixed: */
|
|
||||||
|
|
||||||
/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
|
|
||||||
|
|
||||||
enum fsck_err_opts {
|
enum fsck_err_opts {
|
||||||
FSCK_ERR_NO,
|
FSCK_ERR_NO,
|
||||||
FSCK_ERR_YES,
|
FSCK_ERR_YES,
|
||||||
FSCK_ERR_ASK,
|
FSCK_ERR_ASK,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __KERNEL__
|
enum fsck_err_ret {
|
||||||
#define __fsck_err_should_fix(c, msg, ...) \
|
FSCK_ERR_IGNORE = 0,
|
||||||
({ \
|
FSCK_ERR_FIX = 1,
|
||||||
bool _fix = (c)->opts.fix_errors; \
|
FSCK_ERR_EXIT = 2,
|
||||||
bch_err(c, msg ", %sfixing", ##__VA_ARGS__, _fix ? "" : "not ");\
|
};
|
||||||
_fix; \
|
|
||||||
})
|
|
||||||
#else
|
|
||||||
#include "tools-util.h"
|
|
||||||
|
|
||||||
#define __fsck_err_should_fix(c, msg, ...) \
|
struct fsck_err_state {
|
||||||
({ \
|
struct list_head list;
|
||||||
bool _fix = false; \
|
const char *fmt;
|
||||||
switch ((c)->opts.fix_errors) { \
|
u64 nr;
|
||||||
case FSCK_ERR_ASK: \
|
char buf[512];
|
||||||
printf(msg ": fix?", ##__VA_ARGS__); \
|
};
|
||||||
_fix = ask_yn(); \
|
|
||||||
break; \
|
|
||||||
case FSCK_ERR_YES: \
|
|
||||||
bch_err(c, msg ", fixing", ##__VA_ARGS__); \
|
|
||||||
_fix = true; \
|
|
||||||
break; \
|
|
||||||
case FSCK_ERR_NO: \
|
|
||||||
bch_err(c, msg, ##__VA_ARGS__); \
|
|
||||||
_fix = false; \
|
|
||||||
break; \
|
|
||||||
} \
|
|
||||||
_fix; \
|
|
||||||
})
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \
|
#define FSCK_CAN_FIX (1 << 0)
|
||||||
|
#define FSCK_CAN_IGNORE (1 << 1)
|
||||||
|
#define FSCK_NEED_FSCK (1 << 2)
|
||||||
|
|
||||||
|
enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
|
||||||
|
unsigned, const char *, ...);
|
||||||
|
void bch2_flush_fsck_errs(struct bch_fs *);
|
||||||
|
|
||||||
|
#define __fsck_err(c, _flags, msg, ...) \
|
||||||
({ \
|
({ \
|
||||||
bool _fix; \
|
int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\
|
||||||
\
|
\
|
||||||
if (_can_fix) { \
|
if (_fix == FSCK_ERR_EXIT) { \
|
||||||
_fix = __fsck_err_should_fix(c, msg, ##__VA_ARGS__); \
|
|
||||||
} else { \
|
|
||||||
bch_err(c, msg " ("_nofix_msg")", ##__VA_ARGS__); \
|
|
||||||
_fix = false; \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
if (_fix) \
|
|
||||||
set_bit(BCH_FS_FSCK_FIXED_ERRORS, &(c)->flags); \
|
|
||||||
\
|
|
||||||
if (!_fix && !_can_ignore) { \
|
|
||||||
bch_err(c, "Unable to continue, halting"); \
|
bch_err(c, "Unable to continue, halting"); \
|
||||||
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
||||||
goto fsck_err; \
|
goto fsck_err; \
|
||||||
@ -159,24 +135,27 @@ enum fsck_err_opts {
|
|||||||
_fix; \
|
_fix; \
|
||||||
})
|
})
|
||||||
|
|
||||||
#define __fsck_err_on(cond, c, _can_fix, _can_ignore, _nofix_msg, ...) \
|
/* These macros return true if error should be fixed: */
|
||||||
((cond) ? __fsck_err(c, _can_fix, _can_ignore, \
|
|
||||||
_nofix_msg, ##__VA_ARGS__) : false)
|
/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
|
||||||
|
|
||||||
|
#define __fsck_err_on(cond, c, _flags, ...) \
|
||||||
|
((cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false)
|
||||||
|
|
||||||
#define unfixable_fsck_err_on(cond, c, ...) \
|
#define unfixable_fsck_err_on(cond, c, ...) \
|
||||||
__fsck_err_on(cond, c, false, true, "repair unimplemented", ##__VA_ARGS__)
|
__fsck_err_on(cond, c, FSCK_CAN_IGNORE, ##__VA_ARGS__)
|
||||||
|
|
||||||
#define need_fsck_err_on(cond, c, ...) \
|
#define need_fsck_err_on(cond, c, ...) \
|
||||||
__fsck_err_on(cond, c, false, true, "run fsck to correct", ##__VA_ARGS__)
|
__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
|
||||||
|
|
||||||
#define mustfix_fsck_err(c, ...) \
|
#define mustfix_fsck_err(c, ...) \
|
||||||
__fsck_err(c, true, false, "not fixing", ##__VA_ARGS__)
|
__fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__)
|
||||||
|
|
||||||
#define mustfix_fsck_err_on(cond, c, ...) \
|
#define mustfix_fsck_err_on(cond, c, ...) \
|
||||||
__fsck_err_on(cond, c, true, false, "not fixing", ##__VA_ARGS__)
|
__fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__)
|
||||||
|
|
||||||
#define fsck_err_on(cond, c, ...) \
|
#define fsck_err_on(cond, c, ...) \
|
||||||
__fsck_err_on(cond, c, true, true, "not fixing", ##__VA_ARGS__)
|
__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fatal errors: these don't indicate a bug, but we can't continue running in RW
|
* Fatal errors: these don't indicate a bug, but we can't continue running in RW
|
||||||
|
@ -5,8 +5,8 @@
|
|||||||
#include "clock.h"
|
#include "clock.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "fs.h"
|
#include "fs.h"
|
||||||
#include "fs-gc.h"
|
|
||||||
#include "fs-io.h"
|
#include "fs-io.h"
|
||||||
|
#include "fsck.h"
|
||||||
#include "inode.h"
|
#include "inode.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
#include "dirent.h"
|
#include "dirent.h"
|
||||||
#include "extents.h"
|
#include "extents.h"
|
||||||
#include "fs.h"
|
#include "fs.h"
|
||||||
#include "fs-gc.h"
|
|
||||||
#include "fs-io.h"
|
#include "fs-io.h"
|
||||||
|
#include "fsck.h"
|
||||||
#include "inode.h"
|
#include "inode.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
#include "keylist.h"
|
#include "keylist.h"
|
||||||
|
@ -4,10 +4,11 @@
|
|||||||
#include "dirent.h"
|
#include "dirent.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "fs.h"
|
#include "fs.h"
|
||||||
#include "fs-gc.h"
|
#include "fsck.h"
|
||||||
#include "inode.h"
|
#include "inode.h"
|
||||||
#include "keylist.h"
|
#include "keylist.h"
|
||||||
#include "super.h"
|
#include "super.h"
|
||||||
|
#include "xattr.h"
|
||||||
|
|
||||||
#include <linux/dcache.h> /* struct qstr */
|
#include <linux/dcache.h> /* struct qstr */
|
||||||
#include <linux/generic-radix-tree.h>
|
#include <linux/generic-radix-tree.h>
|
||||||
@ -37,12 +38,16 @@ static int remove_dirent(struct bch_fs *c, struct btree_iter *iter,
|
|||||||
bch2_btree_iter_unlock(iter);
|
bch2_btree_iter_unlock(iter);
|
||||||
|
|
||||||
ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
|
ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode);
|
||||||
if (ret)
|
if (ret) {
|
||||||
|
bch_err(c, "remove_dirent: err %i looking up directory inode", ret);
|
||||||
goto err;
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
dir_hash_info = bch2_hash_info_init(c, &dir_inode);
|
dir_hash_info = bch2_hash_info_init(c, &dir_inode);
|
||||||
|
|
||||||
ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
|
ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
|
||||||
|
if (ret)
|
||||||
|
bch_err(c, "remove_dirent: err %i deleting dirent", ret);
|
||||||
err:
|
err:
|
||||||
kfree(buf);
|
kfree(buf);
|
||||||
return ret;
|
return ret;
|
||||||
@ -108,6 +113,118 @@ static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct hash_check {
|
||||||
|
struct bch_hash_info info;
|
||||||
|
struct btree_iter chain;
|
||||||
|
struct btree_iter iter;
|
||||||
|
u64 next;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void hash_check_init(const struct bch_hash_desc desc,
|
||||||
|
struct hash_check *h, struct bch_fs *c)
|
||||||
|
{
|
||||||
|
bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN);
|
||||||
|
bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
|
||||||
|
const struct bch_inode_unpacked *bi)
|
||||||
|
{
|
||||||
|
h->info = bch2_hash_info_init(c, bi);
|
||||||
|
h->next = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int hash_redo_key(const struct bch_hash_desc desc,
|
||||||
|
struct hash_check *h, struct bch_fs *c,
|
||||||
|
struct btree_iter *k_iter, struct bkey_s_c k,
|
||||||
|
u64 hashed)
|
||||||
|
{
|
||||||
|
struct bkey_i *tmp;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
tmp = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||||
|
if (!tmp)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
bkey_reassemble(tmp, k);
|
||||||
|
|
||||||
|
ret = bch2_btree_delete_at(k_iter, 0);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
bch2_btree_iter_unlock(k_iter);
|
||||||
|
|
||||||
|
bch2_hash_set(desc, &h->info, c, k_iter->pos.inode, NULL,
|
||||||
|
tmp, BCH_HASH_SET_MUST_CREATE);
|
||||||
|
err:
|
||||||
|
kfree(tmp);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int hash_check_key(const struct bch_hash_desc desc,
|
||||||
|
struct hash_check *h, struct bch_fs *c,
|
||||||
|
struct btree_iter *k_iter, struct bkey_s_c k)
|
||||||
|
{
|
||||||
|
char buf[200];
|
||||||
|
u64 hashed;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (k.k->type != desc.whiteout_type &&
|
||||||
|
k.k->type != desc.key_type)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (k.k->p.offset != h->next) {
|
||||||
|
if (!btree_iter_linked(&h->chain)) {
|
||||||
|
bch2_btree_iter_link(k_iter, &h->chain);
|
||||||
|
bch2_btree_iter_link(k_iter, &h->iter);
|
||||||
|
}
|
||||||
|
bch2_btree_iter_copy(&h->chain, k_iter);
|
||||||
|
}
|
||||||
|
h->next = k.k->p.offset + 1;
|
||||||
|
|
||||||
|
if (k.k->type != desc.key_type)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
hashed = desc.hash_bkey(&h->info, k);
|
||||||
|
|
||||||
|
if (fsck_err_on(hashed < h->chain.pos.offset ||
|
||||||
|
hashed > k.k->p.offset, c,
|
||||||
|
"hash table key at wrong offset: %llu, "
|
||||||
|
"hashed to %llu chain starts at %llu\n%s",
|
||||||
|
k.k->p.offset, hashed, h->chain.pos.offset,
|
||||||
|
bch2_bkey_val_to_text(c, desc.btree_id,
|
||||||
|
buf, sizeof(buf), k))) {
|
||||||
|
ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
|
||||||
|
if (ret) {
|
||||||
|
bch_err(c, "hash_redo_key err %i", ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!bkey_cmp(h->chain.pos, k_iter->pos))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
bch2_btree_iter_copy(&h->iter, &h->chain);
|
||||||
|
while (bkey_cmp(h->iter.pos, k_iter->pos) < 0) {
|
||||||
|
struct bkey_s_c k2 = bch2_btree_iter_peek(&h->iter);
|
||||||
|
|
||||||
|
if (fsck_err_on(k2.k->type == desc.key_type &&
|
||||||
|
!desc.cmp_bkey(k, k2), c,
|
||||||
|
"duplicate hash table keys:\n%s",
|
||||||
|
bch2_bkey_val_to_text(c, desc.btree_id,
|
||||||
|
buf, sizeof(buf), k))) {
|
||||||
|
ret = bch2_hash_delete_at(desc, &h->info, &h->iter, NULL);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
bch2_btree_iter_advance_pos(&h->iter);
|
||||||
|
}
|
||||||
|
fsck_err:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Walk extents: verify that extents have a corresponding S_ISREG inode, and
|
* Walk extents: verify that extents have a corresponding S_ISREG inode, and
|
||||||
* that i_size an i_sectors are consistent
|
* that i_size an i_sectors are consistent
|
||||||
@ -130,14 +247,18 @@ static int check_extents(struct bch_fs *c)
|
|||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
unfixable_fsck_err_on(!w.have_inode, c,
|
if (fsck_err_on(!w.have_inode, c,
|
||||||
"extent type %u for missing inode %llu",
|
"extent type %u for missing inode %llu",
|
||||||
k.k->type, k.k->p.inode);
|
k.k->type, k.k->p.inode) ||
|
||||||
|
fsck_err_on(w.have_inode &&
|
||||||
unfixable_fsck_err_on(w.have_inode &&
|
|
||||||
!S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c,
|
!S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c,
|
||||||
"extent type %u for non regular file, inode %llu mode %o",
|
"extent type %u for non regular file, inode %llu mode %o",
|
||||||
k.k->type, k.k->p.inode, w.inode.i_mode);
|
k.k->type, k.k->p.inode, w.inode.i_mode)) {
|
||||||
|
ret = bch2_btree_delete_at(&iter, 0);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
unfixable_fsck_err_on(w.first_this_inode &&
|
unfixable_fsck_err_on(w.first_this_inode &&
|
||||||
w.have_inode &&
|
w.have_inode &&
|
||||||
@ -154,6 +275,7 @@ static int check_extents(struct bch_fs *c)
|
|||||||
"extent type %u offset %llu past end of inode %llu, i_size %llu",
|
"extent type %u offset %llu past end of inode %llu, i_size %llu",
|
||||||
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size);
|
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size);
|
||||||
}
|
}
|
||||||
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||||
}
|
}
|
||||||
@ -166,10 +288,15 @@ noinline_for_stack
|
|||||||
static int check_dirents(struct bch_fs *c)
|
static int check_dirents(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct inode_walker w = inode_walker_init();
|
struct inode_walker w = inode_walker_init();
|
||||||
|
struct hash_check h;
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
|
unsigned name_len;
|
||||||
|
char buf[200];
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
hash_check_init(bch2_dirent_hash_desc, &h, c);
|
||||||
|
|
||||||
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
|
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
|
||||||
POS(BCACHE_ROOT_INO, 0), k) {
|
POS(BCACHE_ROOT_INO, 0), k) {
|
||||||
struct bkey_s_c_dirent d;
|
struct bkey_s_c_dirent d;
|
||||||
@ -181,13 +308,32 @@ static int check_dirents(struct bch_fs *c)
|
|||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
unfixable_fsck_err_on(!w.have_inode, c,
|
if (fsck_err_on(!w.have_inode, c,
|
||||||
"dirent in nonexisting directory %llu",
|
"dirent in nonexisting directory:\n%s",
|
||||||
k.k->p.inode);
|
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||||
|
buf, sizeof(buf), k)) ||
|
||||||
|
fsck_err_on(!S_ISDIR(w.inode.i_mode), c,
|
||||||
|
"dirent in non directory inode type %u:\n%s",
|
||||||
|
mode_to_type(w.inode.i_mode),
|
||||||
|
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||||
|
buf, sizeof(buf), k))) {
|
||||||
|
ret = bch2_btree_delete_at(&iter, 0);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
unfixable_fsck_err_on(!S_ISDIR(w.inode.i_mode), c,
|
if (w.first_this_inode && w.have_inode)
|
||||||
"dirent in non directory inode %llu, type %u",
|
hash_check_set_inode(&h, c, &w.inode);
|
||||||
k.k->p.inode, mode_to_type(w.inode.i_mode));
|
|
||||||
|
ret = hash_check_key(bch2_dirent_hash_desc, &h, c, &iter, k);
|
||||||
|
if (ret > 0) {
|
||||||
|
ret = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
goto fsck_err;
|
||||||
|
|
||||||
if (k.k->type != BCH_DIRENT)
|
if (k.k->type != BCH_DIRENT)
|
||||||
continue;
|
continue;
|
||||||
@ -195,8 +341,25 @@ static int check_dirents(struct bch_fs *c)
|
|||||||
d = bkey_s_c_to_dirent(k);
|
d = bkey_s_c_to_dirent(k);
|
||||||
d_inum = le64_to_cpu(d.v->d_inum);
|
d_inum = le64_to_cpu(d.v->d_inum);
|
||||||
|
|
||||||
|
name_len = bch2_dirent_name_bytes(d);
|
||||||
|
|
||||||
|
if (fsck_err_on(!name_len, c, "empty dirent") ||
|
||||||
|
fsck_err_on(name_len == 1 &&
|
||||||
|
!memcmp(d.v->d_name, ".", 1), c,
|
||||||
|
". dirent") ||
|
||||||
|
fsck_err_on(name_len == 2 &&
|
||||||
|
!memcmp(d.v->d_name, "..", 2), c,
|
||||||
|
".. dirent")) {
|
||||||
|
ret = remove_dirent(c, &iter, d);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (fsck_err_on(d_inum == d.k->p.inode, c,
|
if (fsck_err_on(d_inum == d.k->p.inode, c,
|
||||||
"dirent points to own directory")) {
|
"dirent points to own directory:\n%s",
|
||||||
|
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||||
|
buf, sizeof(buf), k))) {
|
||||||
ret = remove_dirent(c, &iter, d);
|
ret = remove_dirent(c, &iter, d);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
@ -211,8 +374,9 @@ static int check_dirents(struct bch_fs *c)
|
|||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
if (fsck_err_on(!have_target, c,
|
if (fsck_err_on(!have_target, c,
|
||||||
"dirent points to missing inode %llu, type %u filename %s",
|
"dirent points to missing inode:\n%s",
|
||||||
d_inum, d.v->d_type, d.v->d_name)) {
|
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||||
|
buf, sizeof(buf), k))) {
|
||||||
ret = remove_dirent(c, &iter, d);
|
ret = remove_dirent(c, &iter, d);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
@ -222,10 +386,10 @@ static int check_dirents(struct bch_fs *c)
|
|||||||
if (fsck_err_on(have_target &&
|
if (fsck_err_on(have_target &&
|
||||||
d.v->d_type !=
|
d.v->d_type !=
|
||||||
mode_to_type(le16_to_cpu(target.i_mode)), c,
|
mode_to_type(le16_to_cpu(target.i_mode)), c,
|
||||||
"incorrect d_type: got %u should be %u, filename %s",
|
"incorrect d_type: should be %u:\n%s",
|
||||||
d.v->d_type,
|
|
||||||
mode_to_type(le16_to_cpu(target.i_mode)),
|
mode_to_type(le16_to_cpu(target.i_mode)),
|
||||||
d.v->d_name)) {
|
bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS,
|
||||||
|
buf, sizeof(buf), k))) {
|
||||||
struct bkey_i_dirent *n;
|
struct bkey_i_dirent *n;
|
||||||
|
|
||||||
n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
|
n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
|
||||||
@ -248,6 +412,8 @@ static int check_dirents(struct bch_fs *c)
|
|||||||
}
|
}
|
||||||
err:
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
|
bch2_btree_iter_unlock(&h.chain);
|
||||||
|
bch2_btree_iter_unlock(&h.iter);
|
||||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -258,21 +424,39 @@ noinline_for_stack
|
|||||||
static int check_xattrs(struct bch_fs *c)
|
static int check_xattrs(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct inode_walker w = inode_walker_init();
|
struct inode_walker w = inode_walker_init();
|
||||||
|
struct hash_check h;
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
|
hash_check_init(bch2_xattr_hash_desc, &h, c);
|
||||||
|
|
||||||
for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
|
for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
|
||||||
POS(BCACHE_ROOT_INO, 0), k) {
|
POS(BCACHE_ROOT_INO, 0), k) {
|
||||||
ret = walk_inode(c, &w, k.k->p.inode);
|
ret = walk_inode(c, &w, k.k->p.inode);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
unfixable_fsck_err_on(!w.have_inode, c,
|
if (fsck_err_on(!w.have_inode, c,
|
||||||
"xattr for missing inode %llu",
|
"xattr for missing inode %llu",
|
||||||
k.k->p.inode);
|
k.k->p.inode)) {
|
||||||
|
ret = bch2_btree_delete_at(&iter, 0);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (w.first_this_inode && w.have_inode)
|
||||||
|
hash_check_set_inode(&h, c, &w.inode);
|
||||||
|
|
||||||
|
ret = hash_check_key(bch2_xattr_hash_desc, &h, c, &iter, k);
|
||||||
|
if (ret)
|
||||||
|
goto fsck_err;
|
||||||
}
|
}
|
||||||
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
|
bch2_btree_iter_unlock(&h.chain);
|
||||||
|
bch2_btree_iter_unlock(&h.iter);
|
||||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -445,6 +629,8 @@ static int check_directory_structure(struct bch_fs *c,
|
|||||||
|
|
||||||
/* DFS: */
|
/* DFS: */
|
||||||
restart_dfs:
|
restart_dfs:
|
||||||
|
had_unreachable = false;
|
||||||
|
|
||||||
ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO);
|
ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
@ -478,7 +664,8 @@ next:
|
|||||||
d_inum = le64_to_cpu(dirent.v->d_inum);
|
d_inum = le64_to_cpu(dirent.v->d_inum);
|
||||||
|
|
||||||
if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
|
if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
|
||||||
"directory with multiple hardlinks")) {
|
"directory %llu has multiple hardlinks",
|
||||||
|
d_inum)) {
|
||||||
ret = remove_dirent(c, &iter, dirent);
|
ret = remove_dirent(c, &iter, dirent);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
@ -503,8 +690,6 @@ up:
|
|||||||
path.nr--;
|
path.nr--;
|
||||||
}
|
}
|
||||||
|
|
||||||
had_unreachable = false;
|
|
||||||
|
|
||||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
|
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
|
||||||
if (k.k->type != BCH_INODE_FS ||
|
if (k.k->type != BCH_INODE_FS ||
|
||||||
!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
|
!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
|
||||||
@ -640,7 +825,7 @@ static int bch2_gc_do_inode(struct bch_fs *c,
|
|||||||
|
|
||||||
ret = bch2_inode_unpack(inode, &u);
|
ret = bch2_inode_unpack(inode, &u);
|
||||||
if (bch2_fs_inconsistent_on(ret, c,
|
if (bch2_fs_inconsistent_on(ret, c,
|
||||||
"error unpacking inode %llu in fs-gc",
|
"error unpacking inode %llu in fsck",
|
||||||
inode.k->p.inode))
|
inode.k->p.inode))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -894,36 +1079,59 @@ int bch2_fsck(struct bch_fs *c, bool full_fsck)
|
|||||||
struct bch_inode_unpacked root_inode, lostfound_inode;
|
struct bch_inode_unpacked root_inode, lostfound_inode;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = check_root(c, &root_inode);
|
if (full_fsck) {
|
||||||
if (ret)
|
bch_verbose(c, "checking extents");
|
||||||
return ret;
|
ret = check_extents(c);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
ret = check_lostfound(c, &root_inode, &lostfound_inode);
|
bch_verbose(c, "checking dirents");
|
||||||
if (ret)
|
ret = check_dirents(c);
|
||||||
return ret;
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (!full_fsck)
|
bch_verbose(c, "checking xattrs");
|
||||||
goto check_nlinks;
|
ret = check_xattrs(c);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
ret = check_extents(c);
|
bch_verbose(c, "checking root directory");
|
||||||
if (ret)
|
ret = check_root(c, &root_inode);
|
||||||
return ret;
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
ret = check_dirents(c);
|
bch_verbose(c, "checking lost+found");
|
||||||
if (ret)
|
ret = check_lostfound(c, &root_inode, &lostfound_inode);
|
||||||
return ret;
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
ret = check_xattrs(c);
|
bch_verbose(c, "checking directory structure");
|
||||||
if (ret)
|
ret = check_directory_structure(c, &lostfound_inode);
|
||||||
return ret;
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
ret = check_directory_structure(c, &lostfound_inode);
|
bch_verbose(c, "checking inode nlinks");
|
||||||
if (ret)
|
ret = check_inode_nlinks(c, &lostfound_inode);
|
||||||
return ret;
|
if (ret)
|
||||||
check_nlinks:
|
return ret;
|
||||||
ret = check_inode_nlinks(c, &lostfound_inode);
|
} else {
|
||||||
if (ret)
|
bch_verbose(c, "checking root directory");
|
||||||
return ret;
|
ret = check_root(c, &root_inode);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
bch_verbose(c, "checking lost+found");
|
||||||
|
ret = check_lostfound(c, &root_inode, &lostfound_inode);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
bch_verbose(c, "checking inode nlinks");
|
||||||
|
ret = check_inode_nlinks(c, &lostfound_inode);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bch2_flush_fsck_errs(c);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
@ -25,14 +25,12 @@ static const u8 bits_table[8] = {
|
|||||||
13 * 8 - 8,
|
13 * 8 - 8,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int inode_encode_field(u8 *out, u8 *end, const u64 in[2])
|
static int inode_encode_field(u8 *out, u8 *end, u64 hi, u64 lo)
|
||||||
{
|
{
|
||||||
unsigned bytes, bits, shift;
|
__be64 in[2] = { cpu_to_be64(hi), cpu_to_be64(lo), };
|
||||||
|
unsigned shift, bytes, bits = likely(!hi)
|
||||||
if (likely(!in[1]))
|
? fls64(lo)
|
||||||
bits = fls64(in[0]);
|
: fls64(hi) + 64;
|
||||||
else
|
|
||||||
bits = fls64(in[1]) + 64;
|
|
||||||
|
|
||||||
for (shift = 1; shift <= 8; shift++)
|
for (shift = 1; shift <= 8; shift++)
|
||||||
if (bits < bits_table[shift - 1])
|
if (bits < bits_table[shift - 1])
|
||||||
@ -44,17 +42,7 @@ got_shift:
|
|||||||
|
|
||||||
BUG_ON(out + bytes > end);
|
BUG_ON(out + bytes > end);
|
||||||
|
|
||||||
if (likely(bytes <= 8)) {
|
memcpy(out, (u8 *) in + 16 - bytes, bytes);
|
||||||
u64 b = cpu_to_be64(in[0]);
|
|
||||||
|
|
||||||
memcpy(out, (void *) &b + 8 - bytes, bytes);
|
|
||||||
} else {
|
|
||||||
u64 b = cpu_to_be64(in[1]);
|
|
||||||
|
|
||||||
memcpy(out, (void *) &b + 16 - bytes, bytes);
|
|
||||||
put_unaligned_be64(in[0], out + bytes - 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
*out |= (1 << 8) >> shift;
|
*out |= (1 << 8) >> shift;
|
||||||
|
|
||||||
return bytes;
|
return bytes;
|
||||||
@ -63,7 +51,9 @@ got_shift:
|
|||||||
static int inode_decode_field(const u8 *in, const u8 *end,
|
static int inode_decode_field(const u8 *in, const u8 *end,
|
||||||
u64 out[2], unsigned *out_bits)
|
u64 out[2], unsigned *out_bits)
|
||||||
{
|
{
|
||||||
unsigned bytes, bits, shift;
|
__be64 be[2] = { 0, 0 };
|
||||||
|
unsigned bytes, shift;
|
||||||
|
u8 *p;
|
||||||
|
|
||||||
if (in >= end)
|
if (in >= end)
|
||||||
return -1;
|
return -1;
|
||||||
@ -77,29 +67,18 @@ static int inode_decode_field(const u8 *in, const u8 *end,
|
|||||||
*/
|
*/
|
||||||
shift = 8 - __fls(*in); /* 1 <= shift <= 8 */
|
shift = 8 - __fls(*in); /* 1 <= shift <= 8 */
|
||||||
bytes = byte_table[shift - 1];
|
bytes = byte_table[shift - 1];
|
||||||
bits = bytes * 8 - shift;
|
|
||||||
|
|
||||||
if (in + bytes > end)
|
if (in + bytes > end)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
/*
|
p = (u8 *) be + 16 - bytes;
|
||||||
* we're assuming it's safe to deref up to 7 bytes < in; this will work
|
memcpy(p, in, bytes);
|
||||||
* because keys always start quite a bit more than 7 bytes after the
|
*p ^= (1 << 8) >> shift;
|
||||||
* start of the btree node header:
|
|
||||||
*/
|
out[0] = be64_to_cpu(be[0]);
|
||||||
if (likely(bytes <= 8)) {
|
out[1] = be64_to_cpu(be[1]);
|
||||||
out[0] = get_unaligned_be64(in + bytes - 8);
|
*out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]);
|
||||||
out[0] <<= 64 - bits;
|
|
||||||
out[0] >>= 64 - bits;
|
|
||||||
out[1] = 0;
|
|
||||||
} else {
|
|
||||||
out[0] = get_unaligned_be64(in + bytes - 8);
|
|
||||||
out[1] = get_unaligned_be64(in + bytes - 16);
|
|
||||||
out[1] <<= 128 - bits;
|
|
||||||
out[1] >>= 128 - bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
*out_bits = out[1] ? 64 + fls64(out[1]) : fls64(out[0]);
|
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,7 +88,6 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
|
|||||||
u8 *out = packed->inode.v.fields;
|
u8 *out = packed->inode.v.fields;
|
||||||
u8 *end = (void *) &packed[1];
|
u8 *end = (void *) &packed[1];
|
||||||
u8 *last_nonzero_field = out;
|
u8 *last_nonzero_field = out;
|
||||||
u64 field[2];
|
|
||||||
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
|
unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
|
||||||
|
|
||||||
bkey_inode_init(&packed->inode.k_i);
|
bkey_inode_init(&packed->inode.k_i);
|
||||||
@ -119,12 +97,10 @@ void bch2_inode_pack(struct bkey_inode_buf *packed,
|
|||||||
packed->inode.v.i_mode = cpu_to_le16(inode->i_mode);
|
packed->inode.v.i_mode = cpu_to_le16(inode->i_mode);
|
||||||
|
|
||||||
#define BCH_INODE_FIELD(_name, _bits) \
|
#define BCH_INODE_FIELD(_name, _bits) \
|
||||||
field[0] = inode->_name; \
|
out += inode_encode_field(out, end, 0, inode->_name); \
|
||||||
field[1] = 0; \
|
|
||||||
out += inode_encode_field(out, end, field); \
|
|
||||||
nr_fields++; \
|
nr_fields++; \
|
||||||
\
|
\
|
||||||
if (field[0] | field[1]) { \
|
if (inode->_name) { \
|
||||||
last_nonzero_field = out; \
|
last_nonzero_field = out; \
|
||||||
last_nonzero_fieldnr = nr_fields; \
|
last_nonzero_fieldnr = nr_fields; \
|
||||||
}
|
}
|
||||||
@ -187,7 +163,7 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode,
|
|||||||
if (field_bits > sizeof(unpacked->_name) * 8) \
|
if (field_bits > sizeof(unpacked->_name) * 8) \
|
||||||
return -1; \
|
return -1; \
|
||||||
\
|
\
|
||||||
unpacked->_name = field[0]; \
|
unpacked->_name = field[1]; \
|
||||||
in += ret;
|
in += ret;
|
||||||
|
|
||||||
BCH_INODE_FIELDS()
|
BCH_INODE_FIELDS()
|
||||||
@ -449,3 +425,32 @@ int bch2_cached_dev_inode_find_by_uuid(struct bch_fs *c, uuid_le *uuid,
|
|||||||
bch2_btree_iter_unlock(&iter);
|
bch2_btree_iter_unlock(&iter);
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||||
|
void bch2_inode_pack_test(void)
|
||||||
|
{
|
||||||
|
struct bch_inode_unpacked *u, test_inodes[] = {
|
||||||
|
{
|
||||||
|
.i_atime = U64_MAX,
|
||||||
|
.i_ctime = U64_MAX,
|
||||||
|
.i_mtime = U64_MAX,
|
||||||
|
.i_otime = U64_MAX,
|
||||||
|
.i_size = U64_MAX,
|
||||||
|
.i_sectors = U64_MAX,
|
||||||
|
.i_uid = U32_MAX,
|
||||||
|
.i_gid = U32_MAX,
|
||||||
|
.i_nlink = U32_MAX,
|
||||||
|
.i_generation = U32_MAX,
|
||||||
|
.i_dev = U32_MAX,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
for (u = test_inodes;
|
||||||
|
u < test_inodes + ARRAY_SIZE(test_inodes);
|
||||||
|
u++) {
|
||||||
|
struct bkey_inode_buf p;
|
||||||
|
|
||||||
|
bch2_inode_pack(&p, u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@ -54,4 +54,10 @@ static inline u64 timespec_to_bch2_time(struct bch_fs *c, struct timespec ts)
|
|||||||
return div_s64(ns, c->sb.time_precision);
|
return div_s64(ns, c->sb.time_precision);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||||
|
void bch2_inode_pack_test(void);
|
||||||
|
#else
|
||||||
|
static inline void bch2_inode_pack_test(void) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -910,8 +910,8 @@ static int bio_checksum_uncompress(struct bch_fs *c,
|
|||||||
bch2_encrypt_bio(c, rbio->crc.csum_type,
|
bch2_encrypt_bio(c, rbio->crc.csum_type,
|
||||||
nonce, src);
|
nonce, src);
|
||||||
|
|
||||||
bio_copy_data_iter(dst, dst_iter,
|
bio_copy_data_iter(dst, &dst_iter,
|
||||||
src, src->bi_iter);
|
src, &src->bi_iter);
|
||||||
} else {
|
} else {
|
||||||
bch2_encrypt_bio(c, rbio->crc.csum_type, nonce, src);
|
bch2_encrypt_bio(c, rbio->crc.csum_type, nonce, src);
|
||||||
}
|
}
|
||||||
|
@ -527,62 +527,34 @@ fsck_err:
|
|||||||
#define JOURNAL_ENTRY_NONE 6
|
#define JOURNAL_ENTRY_NONE 6
|
||||||
#define JOURNAL_ENTRY_BAD 7
|
#define JOURNAL_ENTRY_BAD 7
|
||||||
|
|
||||||
static int journal_entry_validate(struct bch_fs *c,
|
#define journal_entry_err(c, msg, ...) \
|
||||||
struct jset *j, u64 sector,
|
({ \
|
||||||
unsigned bucket_sectors_left,
|
if (write == READ) { \
|
||||||
unsigned sectors_read)
|
mustfix_fsck_err(c, msg, ##__VA_ARGS__); \
|
||||||
|
} else { \
|
||||||
|
bch_err(c, "detected corrupt metadata before write:\n" \
|
||||||
|
msg, ##__VA_ARGS__); \
|
||||||
|
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
|
||||||
|
goto fsck_err; \
|
||||||
|
} \
|
||||||
|
true; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define journal_entry_err_on(cond, c, msg, ...) \
|
||||||
|
((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
|
||||||
|
|
||||||
|
static int __journal_entry_validate(struct bch_fs *c, struct jset *j,
|
||||||
|
int write)
|
||||||
{
|
{
|
||||||
struct jset_entry *entry;
|
struct jset_entry *entry;
|
||||||
size_t bytes = vstruct_bytes(j);
|
|
||||||
struct bch_csum csum;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (le64_to_cpu(j->magic) != jset_magic(c))
|
|
||||||
return JOURNAL_ENTRY_NONE;
|
|
||||||
|
|
||||||
if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) {
|
|
||||||
bch_err(c, "unknown journal entry version %u",
|
|
||||||
le32_to_cpu(j->version));
|
|
||||||
return BCH_FSCK_UNKNOWN_VERSION;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9, c,
|
|
||||||
"journal entry too big (%zu bytes), sector %lluu",
|
|
||||||
bytes, sector)) {
|
|
||||||
/* XXX: note we might have missing journal entries */
|
|
||||||
return JOURNAL_ENTRY_BAD;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bytes > sectors_read << 9)
|
|
||||||
return JOURNAL_ENTRY_REREAD;
|
|
||||||
|
|
||||||
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c,
|
|
||||||
"journal entry with unknown csum type %llu sector %lluu",
|
|
||||||
JSET_CSUM_TYPE(j), sector))
|
|
||||||
return JOURNAL_ENTRY_BAD;
|
|
||||||
|
|
||||||
csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
|
|
||||||
if (mustfix_fsck_err_on(bch2_crc_cmp(csum, j->csum), c,
|
|
||||||
"journal checksum bad, sector %llu", sector)) {
|
|
||||||
/* XXX: retry IO, when we start retrying checksum errors */
|
|
||||||
/* XXX: note we might have missing journal entries */
|
|
||||||
return JOURNAL_ENTRY_BAD;
|
|
||||||
}
|
|
||||||
|
|
||||||
bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
|
|
||||||
j->encrypted_start,
|
|
||||||
vstruct_end(j) - (void *) j->encrypted_start);
|
|
||||||
|
|
||||||
if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c,
|
|
||||||
"invalid journal entry: last_seq > seq"))
|
|
||||||
j->last_seq = j->seq;
|
|
||||||
|
|
||||||
vstruct_for_each(j, entry) {
|
vstruct_for_each(j, entry) {
|
||||||
struct bkey_i *k;
|
struct bkey_i *k;
|
||||||
|
|
||||||
if (mustfix_fsck_err_on(vstruct_next(entry) >
|
if (journal_entry_err_on(vstruct_next(entry) >
|
||||||
vstruct_last(j), c,
|
vstruct_last(j), c,
|
||||||
"journal entry extents past end of jset")) {
|
"journal entry extends past end of jset")) {
|
||||||
j->u64s = cpu_to_le64((u64 *) entry - j->_data);
|
j->u64s = cpu_to_le64((u64 *) entry - j->_data);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -602,7 +574,7 @@ static int journal_entry_validate(struct bch_fs *c,
|
|||||||
case JOURNAL_ENTRY_BTREE_ROOT:
|
case JOURNAL_ENTRY_BTREE_ROOT:
|
||||||
k = entry->start;
|
k = entry->start;
|
||||||
|
|
||||||
if (mustfix_fsck_err_on(!entry->u64s ||
|
if (journal_entry_err_on(!entry->u64s ||
|
||||||
le16_to_cpu(entry->u64s) != k->k.u64s, c,
|
le16_to_cpu(entry->u64s) != k->k.u64s, c,
|
||||||
"invalid btree root journal entry: wrong number of keys")) {
|
"invalid btree root journal entry: wrong number of keys")) {
|
||||||
journal_entry_null_range(entry,
|
journal_entry_null_range(entry,
|
||||||
@ -620,7 +592,7 @@ static int journal_entry_validate(struct bch_fs *c,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED:
|
case JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED:
|
||||||
if (mustfix_fsck_err_on(le16_to_cpu(entry->u64s) != 1, c,
|
if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c,
|
||||||
"invalid journal seq blacklist entry: bad size")) {
|
"invalid journal seq blacklist entry: bad size")) {
|
||||||
journal_entry_null_range(entry,
|
journal_entry_null_range(entry,
|
||||||
vstruct_next(entry));
|
vstruct_next(entry));
|
||||||
@ -628,7 +600,7 @@ static int journal_entry_validate(struct bch_fs *c,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
mustfix_fsck_err(c, "invalid journal entry type %llu",
|
journal_entry_err(c, "invalid journal entry type %llu",
|
||||||
JOURNAL_ENTRY_TYPE(entry));
|
JOURNAL_ENTRY_TYPE(entry));
|
||||||
journal_entry_null_range(entry, vstruct_next(entry));
|
journal_entry_null_range(entry, vstruct_next(entry));
|
||||||
break;
|
break;
|
||||||
@ -639,6 +611,61 @@ fsck_err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int journal_entry_validate(struct bch_fs *c,
|
||||||
|
struct jset *j, u64 sector,
|
||||||
|
unsigned bucket_sectors_left,
|
||||||
|
unsigned sectors_read,
|
||||||
|
int write)
|
||||||
|
{
|
||||||
|
size_t bytes = vstruct_bytes(j);
|
||||||
|
struct bch_csum csum;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (le64_to_cpu(j->magic) != jset_magic(c))
|
||||||
|
return JOURNAL_ENTRY_NONE;
|
||||||
|
|
||||||
|
if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) {
|
||||||
|
bch_err(c, "unknown journal entry version %u",
|
||||||
|
le32_to_cpu(j->version));
|
||||||
|
return BCH_FSCK_UNKNOWN_VERSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
|
||||||
|
"journal entry too big (%zu bytes), sector %lluu",
|
||||||
|
bytes, sector)) {
|
||||||
|
/* XXX: note we might have missing journal entries */
|
||||||
|
return JOURNAL_ENTRY_BAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bytes > sectors_read << 9)
|
||||||
|
return JOURNAL_ENTRY_REREAD;
|
||||||
|
|
||||||
|
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c,
|
||||||
|
"journal entry with unknown csum type %llu sector %lluu",
|
||||||
|
JSET_CSUM_TYPE(j), sector))
|
||||||
|
return JOURNAL_ENTRY_BAD;
|
||||||
|
|
||||||
|
csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
|
||||||
|
if (journal_entry_err_on(bch2_crc_cmp(csum, j->csum), c,
|
||||||
|
"journal checksum bad, sector %llu", sector)) {
|
||||||
|
/* XXX: retry IO, when we start retrying checksum errors */
|
||||||
|
/* XXX: note we might have missing journal entries */
|
||||||
|
return JOURNAL_ENTRY_BAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
|
||||||
|
j->encrypted_start,
|
||||||
|
vstruct_end(j) - (void *) j->encrypted_start);
|
||||||
|
|
||||||
|
if (journal_entry_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c,
|
||||||
|
"invalid journal entry: last_seq > seq"))
|
||||||
|
j->last_seq = j->seq;
|
||||||
|
|
||||||
|
return __journal_entry_validate(c, j, write);
|
||||||
|
fsck_err:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
struct journal_read_buf {
|
struct journal_read_buf {
|
||||||
void *data;
|
void *data;
|
||||||
size_t size;
|
size_t size;
|
||||||
@ -705,7 +732,8 @@ reread: sectors_read = min_t(unsigned,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ret = journal_entry_validate(c, j, offset,
|
ret = journal_entry_validate(c, j, offset,
|
||||||
end - offset, sectors_read);
|
end - offset, sectors_read,
|
||||||
|
READ);
|
||||||
switch (ret) {
|
switch (ret) {
|
||||||
case BCH_FSCK_OK:
|
case BCH_FSCK_OK:
|
||||||
break;
|
break;
|
||||||
@ -2274,6 +2302,10 @@ static void journal_write(struct closure *cl)
|
|||||||
SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
|
SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
|
||||||
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
|
SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c));
|
||||||
|
|
||||||
|
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
|
||||||
|
__journal_entry_validate(c, jset, WRITE))
|
||||||
|
goto err;
|
||||||
|
|
||||||
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||||
jset->encrypted_start,
|
jset->encrypted_start,
|
||||||
vstruct_end(jset) - (void *) jset->encrypted_start);
|
vstruct_end(jset) - (void *) jset->encrypted_start);
|
||||||
@ -2281,6 +2313,10 @@ static void journal_write(struct closure *cl)
|
|||||||
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
|
jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
|
||||||
journal_nonce(jset), jset);
|
journal_nonce(jset), jset);
|
||||||
|
|
||||||
|
if (!bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) &&
|
||||||
|
__journal_entry_validate(c, jset, WRITE))
|
||||||
|
goto err;
|
||||||
|
|
||||||
sectors = vstruct_sectors(jset, c->block_bits);
|
sectors = vstruct_sectors(jset, c->block_bits);
|
||||||
BUG_ON(sectors > j->prev_buf_sectors);
|
BUG_ON(sectors > j->prev_buf_sectors);
|
||||||
|
|
||||||
@ -2349,6 +2385,9 @@ no_io:
|
|||||||
ptr->offset += sectors;
|
ptr->offset += sectors;
|
||||||
|
|
||||||
closure_return_with_destructor(cl, journal_write_done);
|
closure_return_with_destructor(cl, journal_write_done);
|
||||||
|
err:
|
||||||
|
bch2_fatal_error(c);
|
||||||
|
closure_return_with_destructor(cl, journal_write_done);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void journal_write_work(struct work_struct *work)
|
static void journal_write_work(struct work_struct *work)
|
||||||
|
@ -2,7 +2,9 @@
|
|||||||
#define _BCACHE_STR_HASH_H
|
#define _BCACHE_STR_HASH_H
|
||||||
|
|
||||||
#include "btree_iter.h"
|
#include "btree_iter.h"
|
||||||
|
#include "btree_update.h"
|
||||||
#include "checksum.h"
|
#include "checksum.h"
|
||||||
|
#include "error.h"
|
||||||
#include "inode.h"
|
#include "inode.h"
|
||||||
#include "siphash.h"
|
#include "siphash.h"
|
||||||
#include "super.h"
|
#include "super.h"
|
||||||
@ -341,6 +343,36 @@ err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int bch2_hash_delete_at(const struct bch_hash_desc desc,
|
||||||
|
const struct bch_hash_info *info,
|
||||||
|
struct btree_iter *iter,
|
||||||
|
u64 *journal_seq)
|
||||||
|
{
|
||||||
|
struct btree_iter whiteout_iter;
|
||||||
|
struct bkey_i delete;
|
||||||
|
int ret = -ENOENT;
|
||||||
|
|
||||||
|
bch2_btree_iter_init(&whiteout_iter, iter->c, desc.btree_id,
|
||||||
|
iter->pos);
|
||||||
|
bch2_btree_iter_link(iter, &whiteout_iter);
|
||||||
|
|
||||||
|
ret = bch2_hash_needs_whiteout(desc, info, &whiteout_iter, iter);
|
||||||
|
if (ret < 0)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
bkey_init(&delete.k);
|
||||||
|
delete.k.p = iter->pos;
|
||||||
|
delete.k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED;
|
||||||
|
|
||||||
|
ret = bch2_btree_insert_at(iter->c, NULL, NULL, journal_seq,
|
||||||
|
BTREE_INSERT_NOFAIL|
|
||||||
|
BTREE_INSERT_ATOMIC,
|
||||||
|
BTREE_INSERT_ENTRY(iter, &delete));
|
||||||
|
err:
|
||||||
|
bch2_btree_iter_unlink(&whiteout_iter);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int bch2_hash_delete(const struct bch_hash_desc desc,
|
static inline int bch2_hash_delete(const struct bch_hash_desc desc,
|
||||||
const struct bch_hash_info *info,
|
const struct bch_hash_info *info,
|
||||||
struct bch_fs *c, u64 inode,
|
struct bch_fs *c, u64 inode,
|
||||||
@ -348,7 +380,6 @@ static inline int bch2_hash_delete(const struct bch_hash_desc desc,
|
|||||||
{
|
{
|
||||||
struct btree_iter iter, whiteout_iter;
|
struct btree_iter iter, whiteout_iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
struct bkey_i delete;
|
|
||||||
int ret = -ENOENT;
|
int ret = -ENOENT;
|
||||||
|
|
||||||
bch2_btree_iter_init_intent(&iter, c, desc.btree_id,
|
bch2_btree_iter_init_intent(&iter, c, desc.btree_id,
|
||||||
@ -361,18 +392,7 @@ retry:
|
|||||||
if ((ret = btree_iter_err(k)))
|
if ((ret = btree_iter_err(k)))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
ret = bch2_hash_needs_whiteout(desc, info, &whiteout_iter, &iter);
|
ret = bch2_hash_delete_at(desc, info, &iter, journal_seq);
|
||||||
if (ret < 0)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
bkey_init(&delete.k);
|
|
||||||
delete.k.p = k.k->p;
|
|
||||||
delete.k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED;
|
|
||||||
|
|
||||||
ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq,
|
|
||||||
BTREE_INSERT_NOFAIL|
|
|
||||||
BTREE_INSERT_ATOMIC,
|
|
||||||
BTREE_INSERT_ENTRY(&iter, &delete));
|
|
||||||
err:
|
err:
|
||||||
if (ret == -EINTR)
|
if (ret == -EINTR)
|
||||||
goto retry;
|
goto retry;
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "fs.h"
|
#include "fs.h"
|
||||||
#include "fs-gc.h"
|
#include "fsck.h"
|
||||||
#include "inode.h"
|
#include "inode.h"
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
@ -513,6 +513,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
|||||||
INIT_WORK(&c->read_retry_work, bch2_read_retry_work);
|
INIT_WORK(&c->read_retry_work, bch2_read_retry_work);
|
||||||
mutex_init(&c->zlib_workspace_lock);
|
mutex_init(&c->zlib_workspace_lock);
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&c->fsck_errors);
|
||||||
|
mutex_init(&c->fsck_error_lock);
|
||||||
|
|
||||||
seqcount_init(&c->gc_pos_lock);
|
seqcount_init(&c->gc_pos_lock);
|
||||||
|
|
||||||
c->prio_clock[READ].hand = 1;
|
c->prio_clock[READ].hand = 1;
|
||||||
@ -875,12 +878,12 @@ err:
|
|||||||
switch (ret) {
|
switch (ret) {
|
||||||
case BCH_FSCK_ERRORS_NOT_FIXED:
|
case BCH_FSCK_ERRORS_NOT_FIXED:
|
||||||
bch_err(c, "filesystem contains errors: please report this to the developers");
|
bch_err(c, "filesystem contains errors: please report this to the developers");
|
||||||
pr_cont("mount with -o fix_errors to repair");
|
pr_cont("mount with -o fix_errors to repair\n");
|
||||||
err = "fsck error";
|
err = "fsck error";
|
||||||
break;
|
break;
|
||||||
case BCH_FSCK_REPAIR_UNIMPLEMENTED:
|
case BCH_FSCK_REPAIR_UNIMPLEMENTED:
|
||||||
bch_err(c, "filesystem contains errors: please report this to the developers");
|
bch_err(c, "filesystem contains errors: please report this to the developers");
|
||||||
pr_cont("repair unimplemented: inform the developers so that it can be added");
|
pr_cont("repair unimplemented: inform the developers so that it can be added\n");
|
||||||
err = "fsck error";
|
err = "fsck error";
|
||||||
break;
|
break;
|
||||||
case BCH_FSCK_REPAIR_IMPOSSIBLE:
|
case BCH_FSCK_REPAIR_IMPOSSIBLE:
|
||||||
@ -979,8 +982,8 @@ static void bch2_dev_free(struct bch_dev *ca)
|
|||||||
kvpfree(ca->disk_buckets, bucket_bytes(ca));
|
kvpfree(ca->disk_buckets, bucket_bytes(ca));
|
||||||
kfree(ca->prio_buckets);
|
kfree(ca->prio_buckets);
|
||||||
kfree(ca->bio_prio);
|
kfree(ca->bio_prio);
|
||||||
vfree(ca->buckets);
|
kvpfree(ca->buckets, ca->mi.nbuckets * sizeof(struct bucket));
|
||||||
vfree(ca->oldest_gens);
|
kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
|
||||||
free_heap(&ca->heap);
|
free_heap(&ca->heap);
|
||||||
free_fifo(&ca->free_inc);
|
free_fifo(&ca->free_inc);
|
||||||
|
|
||||||
@ -1140,10 +1143,12 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
|
|||||||
!init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
|
!init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
|
||||||
!init_fifo(&ca->free_inc, free_inc_reserve, GFP_KERNEL) ||
|
!init_fifo(&ca->free_inc, free_inc_reserve, GFP_KERNEL) ||
|
||||||
!init_heap(&ca->heap, heap_size, GFP_KERNEL) ||
|
!init_heap(&ca->heap, heap_size, GFP_KERNEL) ||
|
||||||
!(ca->oldest_gens = vzalloc(sizeof(u8) *
|
!(ca->oldest_gens = kvpmalloc(ca->mi.nbuckets *
|
||||||
ca->mi.nbuckets)) ||
|
sizeof(u8),
|
||||||
!(ca->buckets = vzalloc(sizeof(struct bucket) *
|
GFP_KERNEL|__GFP_ZERO)) ||
|
||||||
ca->mi.nbuckets)) ||
|
!(ca->buckets = kvpmalloc(ca->mi.nbuckets *
|
||||||
|
sizeof(struct bucket),
|
||||||
|
GFP_KERNEL|__GFP_ZERO)) ||
|
||||||
!(ca->prio_buckets = kzalloc(sizeof(u64) * prio_buckets(ca) *
|
!(ca->prio_buckets = kzalloc(sizeof(u64) * prio_buckets(ca) *
|
||||||
2, GFP_KERNEL)) ||
|
2, GFP_KERNEL)) ||
|
||||||
!(ca->disk_buckets = kvpmalloc(bucket_bytes(ca), GFP_KERNEL)) ||
|
!(ca->disk_buckets = kvpmalloc(bucket_bytes(ca), GFP_KERNEL)) ||
|
||||||
@ -1871,6 +1876,7 @@ static void bcachefs_exit(void)
|
|||||||
static int __init bcachefs_init(void)
|
static int __init bcachefs_init(void)
|
||||||
{
|
{
|
||||||
bch2_bkey_pack_test();
|
bch2_bkey_pack_test();
|
||||||
|
bch2_inode_pack_test();
|
||||||
|
|
||||||
if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
|
if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) ||
|
||||||
bch2_chardev_init() ||
|
bch2_chardev_init() ||
|
||||||
|
@ -512,7 +512,7 @@ STORE(bch2_fs_opts_dir)
|
|||||||
{
|
{
|
||||||
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
|
struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
|
||||||
const struct bch_option *opt;
|
const struct bch_option *opt;
|
||||||
enum bch_opt_id id;
|
int id;
|
||||||
u64 v;
|
u64 v;
|
||||||
|
|
||||||
id = bch2_parse_sysfs_opt(attr->name, buf, &v);
|
id = bch2_parse_sysfs_opt(attr->name, buf, &v);
|
||||||
|
@ -417,3 +417,17 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
|
|||||||
dst += bv.bv_len;
|
dst += bv.bv_len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t bch_scnmemcpy(char *buf, size_t size, const char *src, size_t len)
|
||||||
|
{
|
||||||
|
size_t n;
|
||||||
|
|
||||||
|
if (!size)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
n = min(size - 1, len);
|
||||||
|
memcpy(buf, src, n);
|
||||||
|
buf[n] = '\0';
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
@ -93,7 +93,8 @@ static inline void kvpfree(void *p, size_t size)
|
|||||||
static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
|
static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
return size < PAGE_SIZE ? kmalloc(size, gfp_mask)
|
return size < PAGE_SIZE ? kmalloc(size, gfp_mask)
|
||||||
: (void *) __get_free_pages(gfp_mask, get_order(size))
|
: (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
|
||||||
|
get_order(size))
|
||||||
?: __vmalloc(size, gfp_mask, PAGE_KERNEL);
|
?: __vmalloc(size, gfp_mask, PAGE_KERNEL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -750,4 +751,6 @@ static inline struct bio_vec next_contig_bvec(struct bio *bio,
|
|||||||
#define bio_for_each_contig_segment(bv, bio, iter) \
|
#define bio_for_each_contig_segment(bv, bio, iter) \
|
||||||
__bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter)
|
__bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter)
|
||||||
|
|
||||||
|
size_t bch_scnmemcpy(char *, size_t, const char *, size_t);
|
||||||
|
|
||||||
#endif /* _BCACHE_UTIL_H */
|
#endif /* _BCACHE_UTIL_H */
|
||||||
|
@ -11,6 +11,16 @@
|
|||||||
#include <linux/posix_acl_xattr.h>
|
#include <linux/posix_acl_xattr.h>
|
||||||
#include <linux/xattr.h>
|
#include <linux/xattr.h>
|
||||||
|
|
||||||
|
static unsigned xattr_val_u64s(unsigned name_len, unsigned val_len)
|
||||||
|
{
|
||||||
|
return DIV_ROUND_UP(sizeof(struct bch_xattr) +
|
||||||
|
name_len + val_len, sizeof(u64));
|
||||||
|
}
|
||||||
|
|
||||||
|
#define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len)
|
||||||
|
|
||||||
|
static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
|
||||||
|
|
||||||
struct xattr_search_key {
|
struct xattr_search_key {
|
||||||
u8 type;
|
u8 type;
|
||||||
struct qstr name;
|
struct qstr name;
|
||||||
@ -31,8 +41,6 @@ static u64 bch2_xattr_hash(const struct bch_hash_info *info,
|
|||||||
return bch2_str_hash_end(&ctx, info);
|
return bch2_str_hash_end(&ctx, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len)
|
|
||||||
|
|
||||||
static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
|
static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
|
||||||
{
|
{
|
||||||
return bch2_xattr_hash(info, key);
|
return bch2_xattr_hash(info, key);
|
||||||
@ -66,7 +74,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
|
|||||||
memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
|
memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct bch_hash_desc xattr_hash_desc = {
|
const struct bch_hash_desc bch2_xattr_hash_desc = {
|
||||||
.btree_id = BTREE_ID_XATTRS,
|
.btree_id = BTREE_ID_XATTRS,
|
||||||
.key_type = BCH_XATTR,
|
.key_type = BCH_XATTR,
|
||||||
.whiteout_type = BCH_XATTR_WHITEOUT,
|
.whiteout_type = BCH_XATTR_WHITEOUT,
|
||||||
@ -79,12 +87,33 @@ static const struct bch_hash_desc xattr_hash_desc = {
|
|||||||
static const char *bch2_xattr_invalid(const struct bch_fs *c,
|
static const char *bch2_xattr_invalid(const struct bch_fs *c,
|
||||||
struct bkey_s_c k)
|
struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
|
const struct xattr_handler *handler;
|
||||||
|
struct bkey_s_c_xattr xattr;
|
||||||
|
unsigned u64s;
|
||||||
|
|
||||||
switch (k.k->type) {
|
switch (k.k->type) {
|
||||||
case BCH_XATTR:
|
case BCH_XATTR:
|
||||||
return bkey_val_bytes(k.k) < sizeof(struct bch_xattr)
|
if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
|
||||||
? "value too small"
|
return "value too small";
|
||||||
: NULL;
|
|
||||||
|
|
||||||
|
xattr = bkey_s_c_to_xattr(k);
|
||||||
|
u64s = xattr_val_u64s(xattr.v->x_name_len,
|
||||||
|
le16_to_cpu(xattr.v->x_val_len));
|
||||||
|
|
||||||
|
if (bkey_val_u64s(k.k) < u64s)
|
||||||
|
return "value too small";
|
||||||
|
|
||||||
|
if (bkey_val_u64s(k.k) > u64s)
|
||||||
|
return "value too big";
|
||||||
|
|
||||||
|
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
|
||||||
|
if (!handler)
|
||||||
|
return "invalid type";
|
||||||
|
|
||||||
|
if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len))
|
||||||
|
return "xattr name has invalid characters";
|
||||||
|
|
||||||
|
return NULL;
|
||||||
case BCH_XATTR_WHITEOUT:
|
case BCH_XATTR_WHITEOUT:
|
||||||
return bkey_val_bytes(k.k) != 0
|
return bkey_val_bytes(k.k) != 0
|
||||||
? "value size should be zero"
|
? "value size should be zero"
|
||||||
@ -98,34 +127,29 @@ static const char *bch2_xattr_invalid(const struct bch_fs *c,
|
|||||||
static void bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
static void bch2_xattr_to_text(struct bch_fs *c, char *buf,
|
||||||
size_t size, struct bkey_s_c k)
|
size_t size, struct bkey_s_c k)
|
||||||
{
|
{
|
||||||
|
const struct xattr_handler *handler;
|
||||||
struct bkey_s_c_xattr xattr;
|
struct bkey_s_c_xattr xattr;
|
||||||
int n;
|
size_t n = 0;
|
||||||
|
|
||||||
switch (k.k->type) {
|
switch (k.k->type) {
|
||||||
case BCH_XATTR:
|
case BCH_XATTR:
|
||||||
xattr = bkey_s_c_to_xattr(k);
|
xattr = bkey_s_c_to_xattr(k);
|
||||||
|
|
||||||
if (size) {
|
handler = bch2_xattr_type_to_handler(xattr.v->x_type);
|
||||||
n = min_t(unsigned, size, xattr.v->x_name_len);
|
if (handler && handler->prefix)
|
||||||
memcpy(buf, xattr.v->x_name, n);
|
n += scnprintf(buf + n, size - n, "%s", handler->prefix);
|
||||||
buf[size - 1] = '\0';
|
else if (handler)
|
||||||
buf += n;
|
n += scnprintf(buf + n, size - n, "(type %u)",
|
||||||
size -= n;
|
xattr.v->x_type);
|
||||||
}
|
else
|
||||||
|
n += scnprintf(buf + n, size - n, "(unknown type %u)",
|
||||||
n = scnprintf(buf, size, " -> ");
|
xattr.v->x_type);
|
||||||
buf += n;
|
|
||||||
size -= n;
|
|
||||||
|
|
||||||
if (size) {
|
|
||||||
n = min_t(unsigned, size,
|
|
||||||
le16_to_cpu(xattr.v->x_val_len));
|
|
||||||
memcpy(buf, xattr_val(xattr.v), n);
|
|
||||||
buf[size - 1] = '\0';
|
|
||||||
buf += n;
|
|
||||||
size -= n;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
n += bch_scnmemcpy(buf + n, size - n, xattr.v->x_name,
|
||||||
|
xattr.v->x_name_len);
|
||||||
|
n += scnprintf(buf + n, size - n, ":");
|
||||||
|
n += bch_scnmemcpy(buf + n, size - n, xattr_val(xattr.v),
|
||||||
|
le16_to_cpu(xattr.v->x_val_len));
|
||||||
break;
|
break;
|
||||||
case BCH_XATTR_WHITEOUT:
|
case BCH_XATTR_WHITEOUT:
|
||||||
scnprintf(buf, size, "whiteout");
|
scnprintf(buf, size, "whiteout");
|
||||||
@ -147,7 +171,7 @@ int bch2_xattr_get(struct bch_fs *c, struct inode *inode,
|
|||||||
struct bkey_s_c_xattr xattr;
|
struct bkey_s_c_xattr xattr;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
k = bch2_hash_lookup(xattr_hash_desc, &ei->str_hash, c,
|
k = bch2_hash_lookup(bch2_xattr_hash_desc, &ei->str_hash, c,
|
||||||
ei->vfs_inode.i_ino, &iter,
|
ei->vfs_inode.i_ino, &iter,
|
||||||
&X_SEARCH(type, name, strlen(name)));
|
&X_SEARCH(type, name, strlen(name)));
|
||||||
if (IS_ERR(k.k))
|
if (IS_ERR(k.k))
|
||||||
@ -175,15 +199,13 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum,
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!value) {
|
if (!value) {
|
||||||
ret = bch2_hash_delete(xattr_hash_desc, hash_info,
|
ret = bch2_hash_delete(bch2_xattr_hash_desc, hash_info,
|
||||||
c, inum,
|
c, inum,
|
||||||
journal_seq, &search);
|
journal_seq, &search);
|
||||||
} else {
|
} else {
|
||||||
struct bkey_i_xattr *xattr;
|
struct bkey_i_xattr *xattr;
|
||||||
unsigned u64s = BKEY_U64s +
|
unsigned u64s = BKEY_U64s +
|
||||||
DIV_ROUND_UP(sizeof(struct bch_xattr) +
|
xattr_val_u64s(search.name.len, size);
|
||||||
search.name.len + size,
|
|
||||||
sizeof(u64));
|
|
||||||
|
|
||||||
if (u64s > U8_MAX)
|
if (u64s > U8_MAX)
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
@ -200,7 +222,7 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum,
|
|||||||
memcpy(xattr->v.x_name, search.name.name, search.name.len);
|
memcpy(xattr->v.x_name, search.name.name, search.name.len);
|
||||||
memcpy(xattr_val(&xattr->v), value, size);
|
memcpy(xattr_val(&xattr->v), value, size);
|
||||||
|
|
||||||
ret = bch2_hash_set(xattr_hash_desc, hash_info, c,
|
ret = bch2_hash_set(bch2_xattr_hash_desc, hash_info, c,
|
||||||
inum, journal_seq,
|
inum, journal_seq,
|
||||||
&xattr->k_i,
|
&xattr->k_i,
|
||||||
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
|
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
|
||||||
@ -225,8 +247,6 @@ int bch2_xattr_set(struct bch_fs *c, struct inode *inode,
|
|||||||
&ei->journal_seq);
|
&ei->journal_seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned);
|
|
||||||
|
|
||||||
static size_t bch2_xattr_emit(struct dentry *dentry,
|
static size_t bch2_xattr_emit(struct dentry *dentry,
|
||||||
const struct bch_xattr *xattr,
|
const struct bch_xattr *xattr,
|
||||||
char *buffer, size_t buffer_size)
|
char *buffer, size_t buffer_size)
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
#ifndef _BCACHE_XATTR_H
|
#ifndef _BCACHE_XATTR_H
|
||||||
#define _BCACHE_XATTR_H
|
#define _BCACHE_XATTR_H
|
||||||
|
|
||||||
|
#include "str_hash.h"
|
||||||
|
|
||||||
|
extern const struct bch_hash_desc bch2_xattr_hash_desc;
|
||||||
extern const struct bkey_ops bch2_bkey_xattr_ops;
|
extern const struct bkey_ops bch2_bkey_xattr_ops;
|
||||||
|
|
||||||
struct dentry;
|
struct dentry;
|
||||||
|
46
linux/bio.c
46
linux/bio.c
@ -21,32 +21,16 @@
|
|||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
|
|
||||||
void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter,
|
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||||
struct bio *src, struct bvec_iter src_iter)
|
struct bio *src, struct bvec_iter *src_iter)
|
||||||
{
|
{
|
||||||
struct bio_vec src_bv, dst_bv;
|
struct bio_vec src_bv, dst_bv;
|
||||||
void *src_p, *dst_p;
|
void *src_p, *dst_p;
|
||||||
unsigned bytes;
|
unsigned bytes;
|
||||||
|
|
||||||
while (1) {
|
while (src_iter->bi_size && dst_iter->bi_size) {
|
||||||
if (!src_iter.bi_size) {
|
src_bv = bio_iter_iovec(src, *src_iter);
|
||||||
src = src->bi_next;
|
dst_bv = bio_iter_iovec(dst, *dst_iter);
|
||||||
if (!src)
|
|
||||||
break;
|
|
||||||
|
|
||||||
src_iter = src->bi_iter;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!dst_iter.bi_size) {
|
|
||||||
dst = dst->bi_next;
|
|
||||||
if (!dst)
|
|
||||||
break;
|
|
||||||
|
|
||||||
dst_iter = dst->bi_iter;
|
|
||||||
}
|
|
||||||
|
|
||||||
src_bv = bio_iter_iovec(src, src_iter);
|
|
||||||
dst_bv = bio_iter_iovec(dst, dst_iter);
|
|
||||||
|
|
||||||
bytes = min(src_bv.bv_len, dst_bv.bv_len);
|
bytes = min(src_bv.bv_len, dst_bv.bv_len);
|
||||||
|
|
||||||
@ -60,15 +44,27 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter,
|
|||||||
kunmap_atomic(dst_p);
|
kunmap_atomic(dst_p);
|
||||||
kunmap_atomic(src_p);
|
kunmap_atomic(src_p);
|
||||||
|
|
||||||
bio_advance_iter(src, &src_iter, bytes);
|
flush_dcache_page(dst_bv.bv_page);
|
||||||
bio_advance_iter(dst, &dst_iter, bytes);
|
|
||||||
|
bio_advance_iter(src, src_iter, bytes);
|
||||||
|
bio_advance_iter(dst, dst_iter, bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bio_copy_data - copy contents of data buffers from one bio to another
|
||||||
|
* @src: source bio
|
||||||
|
* @dst: destination bio
|
||||||
|
*
|
||||||
|
* Stops when it reaches the end of either @src or @dst - that is, copies
|
||||||
|
* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
|
||||||
|
*/
|
||||||
void bio_copy_data(struct bio *dst, struct bio *src)
|
void bio_copy_data(struct bio *dst, struct bio *src)
|
||||||
{
|
{
|
||||||
bio_copy_data_iter(dst, dst->bi_iter,
|
struct bvec_iter src_iter = src->bi_iter;
|
||||||
src, src->bi_iter);
|
struct bvec_iter dst_iter = dst->bi_iter;
|
||||||
|
|
||||||
|
bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
|
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
|
||||||
|
Loading…
Reference in New Issue
Block a user