Update bcachefs sources to 09d4c2acbf4c bcachefs: reconstruct_inode()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-04-03 16:47:22 -04:00
parent 9f4ed5ce05
commit 5639fb38ca
39 changed files with 876 additions and 217 deletions

View File

@ -1 +1 @@
794723fc10c4a1ff28d4b11c436277ba783f47e6 09d4c2acbf4c864fef0f520bbcba256c9a19102e

View File

@ -38,7 +38,7 @@ as-option = $(call try-run,\
# Usage: aflags-y += $(call as-instr,instr,option1,option2) # Usage: aflags-y += $(call as-instr,instr,option1,option2)
as-instr = $(call try-run,\ as-instr = $(call try-run,\
printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3)) printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
# __cc-option # __cc-option
# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586) # Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)

View File

@ -676,12 +676,12 @@ static int migrate_fs(const char *fs_path,
struct dev_opts dev = dev_opts_default(); struct dev_opts dev = dev_opts_default();
dev.path = dev_t_to_path(stat.st_dev); dev.path = dev_t_to_path(stat.st_dev);
dev.handle = bdev_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL); dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
int ret = PTR_ERR_OR_ZERO(dev.handle); int ret = PTR_ERR_OR_ZERO(dev.file);
if (ret < 0) if (ret < 0)
die("Error opening device to format %s: %s", dev.path, strerror(-ret)); die("Error opening device to format %s: %s", dev.path, strerror(-ret));
dev.bdev = dev.handle->bdev; dev.bdev = file_bdev(dev.file);
opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd)); opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));

View File

@ -52,7 +52,7 @@ static inline struct format_opts format_opts_default()
} }
struct dev_opts { struct dev_opts {
struct bdev_handle *handle; struct file *file;
struct block_device *bdev; struct block_device *bdev;
char *path; char *path;
u64 size; /* bytes*/ u64 size; /* bytes*/

View File

@ -189,13 +189,13 @@ int open_for_format(struct dev_opts *dev, bool force)
const char *fs_type = NULL, *fs_label = NULL; const char *fs_type = NULL, *fs_label = NULL;
size_t fs_type_len, fs_label_len; size_t fs_type_len, fs_label_len;
dev->handle = bdev_open_by_path(dev->path, dev->file = bdev_file_open_by_path(dev->path,
BLK_OPEN_READ|BLK_OPEN_WRITE|BLK_OPEN_EXCL|BLK_OPEN_BUFFERED, BLK_OPEN_READ|BLK_OPEN_WRITE|BLK_OPEN_EXCL|BLK_OPEN_BUFFERED,
dev, NULL); dev, NULL);
int ret = PTR_ERR_OR_ZERO(dev->handle); int ret = PTR_ERR_OR_ZERO(dev->file);
if (ret < 0) if (ret < 0)
die("Error opening device to format %s: %s", dev->path, strerror(-ret)); die("Error opening device to format %s: %s", dev->path, strerror(-ret));
dev->bdev = dev->handle->bdev; dev->bdev = file_bdev(dev->file);
if (!(pr = blkid_new_probe())) if (!(pr = blkid_new_probe()))
die("blkid error 1"); die("blkid error 1");

View File

@ -16,6 +16,28 @@ typedef void (bio_end_io_t) (struct bio *);
#define BDEVNAME_SIZE 32 #define BDEVNAME_SIZE 32
typedef unsigned int __bitwise blk_mode_t;
/* open for reading */
#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0))
/* open for writing */
#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1))
/* open exclusively (vs other exclusive openers */
#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2))
/* opened with O_NDELAY */
#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
/* open for "writes" only for ioctls (specialy hack for floppy.c) */
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
#define BLK_OPEN_BUFFERED ((__force blk_mode_t)(1 << 5))
struct inode {
unsigned long i_ino;
loff_t i_size;
struct super_block *i_sb;
blk_mode_t mode;
};
struct request_queue { struct request_queue {
struct backing_dev_info *backing_dev_info; struct backing_dev_info *backing_dev_info;
}; };
@ -34,6 +56,7 @@ struct block_device {
dev_t bd_dev; dev_t bd_dev;
char name[BDEVNAME_SIZE]; char name[BDEVNAME_SIZE];
struct inode *bd_inode; struct inode *bd_inode;
struct inode __bd_inode;
struct request_queue queue; struct request_queue queue;
void *bd_holder; void *bd_holder;
struct gendisk * bd_disk; struct gendisk * bd_disk;

View File

@ -23,27 +23,6 @@ struct user_namespace;
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) #define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) #define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
typedef unsigned int __bitwise blk_mode_t;
/* open for reading */
#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0))
/* open for writing */
#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1))
/* open exclusively (vs other exclusive openers */
#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2))
/* opened with O_NDELAY */
#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
/* open for "writes" only for ioctls (specialy hack for floppy.c) */
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
#define BLK_OPEN_BUFFERED ((__force blk_mode_t)(1 << 5))
struct inode {
unsigned long i_ino;
loff_t i_size;
struct super_block *i_sb;
};
struct file { struct file {
struct inode *f_inode; struct inode *f_inode;
}; };
@ -89,15 +68,14 @@ struct blk_holder_ops {
void (*mark_dead)(struct block_device *bdev); void (*mark_dead)(struct block_device *bdev);
}; };
struct bdev_handle { static inline struct block_device *file_bdev(struct file *file)
struct block_device *bdev; {
void *holder; return container_of(file->f_inode, struct block_device, __bd_inode);
blk_mode_t mode; }
};
void bdev_release(struct bdev_handle *); void fput(struct file *);
struct bdev_handle *bdev_open_by_path(const char *, blk_mode_t, void *, struct file *bdev_file_open_by_path(const char *, blk_mode_t, void *,
const struct blk_holder_ops *); const struct blk_holder_ops *);
int lookup_bdev(const char *path, dev_t *); int lookup_bdev(const char *path, dev_t *);
struct super_block { struct super_block {

View File

@ -92,4 +92,7 @@
/********** VFS **********/ /********** VFS **********/
#define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA)) #define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA))
/********** lib/stackdepot.c **********/
#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA))
#endif #endif

View File

@ -1713,34 +1713,37 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
if (ret) if (ret)
goto out; goto out;
if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { if (a->v.dirty_sectors) {
a->v.gen++; if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); trans, "attempting to discard bucket with dirty data\n%s",
goto write; (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
}
if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
bch2_trans_inconsistent(trans,
"clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
"%s",
a->v.journal_seq,
c->journal.flushed_seq_ondisk,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
ret = -EIO; ret = -EIO;
}
goto out; goto out;
} }
if (a->v.data_type != BCH_DATA_need_discard) { if (a->v.data_type != BCH_DATA_need_discard) {
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) { if (data_type_is_empty(a->v.data_type) &&
bch2_trans_inconsistent(trans, BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
"bucket incorrectly set in need_discard btree\n" a->v.gen++;
"%s", SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)); goto write;
ret = -EIO;
} }
if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
trans, "bucket incorrectly set in need_discard btree\n"
"%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = -EIO;
goto out;
}
if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s",
a->v.journal_seq,
c->journal.flushed_seq_ondisk,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = -EIO;
goto out; goto out;
} }
@ -1835,6 +1838,7 @@ static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpo
if (ret) if (ret)
goto err; goto err;
BUG_ON(a->v.dirty_sectors);
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
a->v.data_type = alloc_data_type(a->v, a->v.data_type); a->v.data_type = alloc_data_type(a->v, a->v.data_type);
@ -1942,6 +1946,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
goto out; goto out;
BUG_ON(a->v.data_type != BCH_DATA_cached); BUG_ON(a->v.data_type != BCH_DATA_cached);
BUG_ON(a->v.dirty_sectors);
if (!a->v.cached_sectors) if (!a->v.cached_sectors)
bch_err(c, "invalidating empty bucket, confused"); bch_err(c, "invalidating empty bucket, confused");

View File

@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
static inline unsigned open_buckets_reserved(enum bch_watermark watermark) static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
{ {
switch (watermark) { switch (watermark) {
case BCH_WATERMARK_reclaim: case BCH_WATERMARK_interior_updates:
return 0; return 0;
case BCH_WATERMARK_reclaim:
return OPEN_BUCKETS_COUNT / 6;
case BCH_WATERMARK_btree: case BCH_WATERMARK_btree:
case BCH_WATERMARK_btree_copygc: case BCH_WATERMARK_btree_copygc:
return OPEN_BUCKETS_COUNT / 4; return OPEN_BUCKETS_COUNT / 4;

View File

@ -22,7 +22,8 @@ struct bucket_alloc_state {
x(copygc) \ x(copygc) \
x(btree) \ x(btree) \
x(btree_copygc) \ x(btree_copygc) \
x(reclaim) x(reclaim) \
x(interior_updates)
enum bch_watermark { enum bch_watermark {
#define x(name) BCH_WATERMARK_##name, #define x(name) BCH_WATERMARK_##name,

View File

@ -8,6 +8,7 @@
#include "btree_update.h" #include "btree_update.h"
#include "btree_update_interior.h" #include "btree_update_interior.h"
#include "btree_write_buffer.h" #include "btree_write_buffer.h"
#include "checksum.h"
#include "error.h" #include "error.h"
#include <linux/mm.h> #include <linux/mm.h>
@ -418,6 +419,84 @@ struct extents_to_bp_state {
struct bkey_buf last_flushed; struct bkey_buf last_flushed;
}; };
static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
struct bkey_s_c extent, unsigned dev)
{
struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
int ret = PTR_ERR_OR_ZERO(n);
if (ret)
return ret;
bch2_bkey_drop_device(bkey_i_to_s(n), dev);
return bch2_btree_insert_trans(trans, btree, n, 0);
}
static int check_extent_checksum(struct btree_trans *trans,
enum btree_id btree, struct bkey_s_c extent,
enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
struct printbuf buf = PRINTBUF;
void *data_buf = NULL;
struct bio *bio = NULL;
size_t bytes;
int ret = 0;
if (bkey_is_btree_ptr(extent.k))
return false;
bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
if (p.ptr.dev == dev)
goto found;
BUG();
found:
if (!p.crc.csum_type)
return false;
bytes = p.crc.compressed_size << 9;
struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
if (!bch2_dev_get_ioref(ca, READ))
return false;
data_buf = kvmalloc(bytes, GFP_KERNEL);
if (!data_buf) {
ret = -ENOMEM;
goto err;
}
bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL);
bio->bi_iter.bi_sector = p.ptr.offset;
bch2_bio_map(bio, data_buf, bytes);
ret = submit_bio_wait(bio);
if (ret)
goto err;
prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree));
bch2_bkey_val_to_text(&buf, c, extent);
prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree));
bch2_bkey_val_to_text(&buf, c, extent2);
struct nonce nonce = extent_nonce(extent.k->version, p.crc);
struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
c, dup_backpointer_to_bad_csum_extent,
"%s", buf.buf))
ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
fsck_err:
err:
if (bio)
bio_put(bio);
kvfree(data_buf);
percpu_ref_put(&ca->io_ref);
printbuf_exit(&buf);
return ret;
}
static int check_bp_exists(struct btree_trans *trans, static int check_bp_exists(struct btree_trans *trans,
struct extents_to_bp_state *s, struct extents_to_bp_state *s,
struct bpos bucket, struct bpos bucket,
@ -425,7 +504,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct bkey_s_c orig_k) struct bkey_s_c orig_k)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter bp_iter = { NULL }; struct btree_iter bp_iter = {};
struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k; struct bkey_s_c bp_k;
struct bkey_buf tmp; struct bkey_buf tmp;
@ -433,13 +513,19 @@ static int check_bp_exists(struct btree_trans *trans,
bch2_bkey_buf_init(&tmp); bch2_bkey_buf_init(&tmp);
if (!bch2_dev_bucket_exists(c, bucket)) {
prt_str(&buf, "extent for nonexistent device:bucket ");
bch2_bpos_to_text(&buf, bucket);
prt_str(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, orig_k);
bch_err(c, "%s", buf.buf);
return -BCH_ERR_fsck_repair_unimplemented;
}
if (bpos_lt(bucket, s->bucket_start) || if (bpos_lt(bucket, s->bucket_start) ||
bpos_gt(bucket, s->bucket_end)) bpos_gt(bucket, s->bucket_end))
return 0; return 0;
if (!bch2_dev_bucket_exists(c, bucket))
goto missing;
bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp(c, bucket, bp.bucket_offset), bucket_pos_to_bp(c, bucket, bp.bucket_offset),
0); 0);
@ -465,21 +551,94 @@ static int check_bp_exists(struct btree_trans *trans,
ret = -BCH_ERR_transaction_restart_write_buffer_flush; ret = -BCH_ERR_transaction_restart_write_buffer_flush;
goto out; goto out;
} }
goto missing;
goto check_existing_bp;
} }
out: out:
err: err:
fsck_err: fsck_err:
bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter); bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&tmp, c); bch2_bkey_buf_exit(&tmp, c);
printbuf_exit(&buf); printbuf_exit(&buf);
return ret; return ret;
check_existing_bp:
/* Do we have a backpointer for a different extent? */
if (bp_k.k->type != KEY_TYPE_backpointer)
goto missing;
struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
struct bkey_s_c other_extent =
bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
ret = bkey_err(other_extent);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
ret = 0;
if (ret)
goto err;
if (!other_extent.k)
goto missing;
if (bch2_extents_match(orig_k, other_extent)) {
printbuf_reset(&buf);
prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
bch2_bkey_val_to_text(&buf, c, orig_k);
prt_str(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, other_extent);
bch_err(c, "%s", buf.buf);
if (other_extent.k->size <= orig_k.k->size) {
ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
if (ret)
goto err;
goto out;
} else {
ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
if (ret)
goto err;
goto missing;
}
}
ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
if (ret < 0)
goto err;
if (ret) {
ret = 0;
goto missing;
}
ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
if (ret < 0)
goto err;
if (ret) {
ret = 0;
goto out;
}
printbuf_reset(&buf);
prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode);
bch2_bkey_val_to_text(&buf, c, orig_k);
prt_str(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, other_extent);
bch_err(c, "%s", buf.buf);
ret = -BCH_ERR_fsck_repair_unimplemented;
goto err;
missing: missing:
printbuf_reset(&buf);
prt_printf(&buf, "missing backpointer for btree=%s l=%u ", prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
bch2_btree_id_str(bp.btree_id), bp.level); bch2_btree_id_str(bp.btree_id), bp.level);
bch2_bkey_val_to_text(&buf, c, orig_k); bch2_bkey_val_to_text(&buf, c, orig_k);
prt_printf(&buf, "\nbp pos "); prt_printf(&buf, "\n got: ");
bch2_bpos_to_text(&buf, bp_iter.pos); bch2_bkey_val_to_text(&buf, c, bp_k);
struct bkey_i_backpointer n_bp_k;
bkey_backpointer_init(&n_bp_k.k_i);
n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
n_bp_k.v = bp;
prt_printf(&buf, "\n want: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);

View File

@ -615,6 +615,7 @@ struct bch_dev {
*/ */
#define BCH_FS_FLAGS() \ #define BCH_FS_FLAGS() \
x(new_fs) \
x(started) \ x(started) \
x(may_go_rw) \ x(may_go_rw) \
x(rw) \ x(rw) \
@ -797,6 +798,7 @@ struct bch_fs {
u64 features; u64 features;
u64 compat; u64 compat;
unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)]; unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
u64 btrees_lost_data;
} sb; } sb;
@ -826,7 +828,6 @@ struct bch_fs {
struct btree_root btree_roots_known[BTREE_ID_NR]; struct btree_root btree_roots_known[BTREE_ID_NR];
DARRAY(struct btree_root) btree_roots_extra; DARRAY(struct btree_root) btree_roots_extra;
struct mutex btree_root_lock; struct mutex btree_root_lock;
unsigned long btrees_lost_data; /* bitmask */
struct btree_cache btree_cache; struct btree_cache btree_cache;

View File

@ -818,6 +818,7 @@ struct bch_sb_field_ext {
struct bch_sb_field field; struct bch_sb_field field;
__le64 recovery_passes_required[2]; __le64 recovery_passes_required[2];
__le64 errors_silent[8]; __le64 errors_silent[8];
__le64 btrees_lost_data;
}; };
struct bch_sb_field_downgrade_entry { struct bch_sb_field_downgrade_entry {

View File

@ -1264,11 +1264,11 @@ out:
return retry_read; return retry_read;
fsck_err: fsck_err:
if (ret == -BCH_ERR_btree_node_read_err_want_retry || if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
ret == -BCH_ERR_btree_node_read_err_must_retry) ret == -BCH_ERR_btree_node_read_err_must_retry) {
retry_read = 1; retry_read = 1;
else { } else {
set_btree_node_read_error(b); set_btree_node_read_error(b);
set_bit(b->c.btree_id, &c->btrees_lost_data); bch2_btree_lost_data(c, b->c.btree_id);
} }
goto out; goto out;
} }
@ -1330,7 +1330,7 @@ start:
if (!can_retry) { if (!can_retry) {
set_btree_node_read_error(b); set_btree_node_read_error(b);
set_bit(b->c.btree_id, &c->btrees_lost_data); bch2_btree_lost_data(c, b->c.btree_id);
break; break;
} }
} }
@ -1532,7 +1532,7 @@ fsck_err:
if (ret) { if (ret) {
set_btree_node_read_error(b); set_btree_node_read_error(b);
set_bit(b->c.btree_id, &c->btrees_lost_data); bch2_btree_lost_data(c, b->c.btree_id);
} else if (*saw_error) } else if (*saw_error)
bch2_btree_node_rewrite_async(c, b); bch2_btree_node_rewrite_async(c, b);
@ -1669,7 +1669,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
bch2_fatal_error(c); bch2_fatal_error(c);
set_btree_node_read_error(b); set_btree_node_read_error(b);
set_bit(b->c.btree_id, &c->btrees_lost_data); bch2_btree_lost_data(c, b->c.btree_id);
clear_btree_node_read_in_flight(b); clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
printbuf_exit(&buf); printbuf_exit(&buf);
@ -1866,7 +1866,7 @@ static void btree_node_write_work(struct work_struct *work)
} else { } else {
ret = bch2_trans_do(c, NULL, NULL, 0, ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
BCH_WATERMARK_reclaim| BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim| BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw, BCH_TRANS_COMMIT_no_check_rw,

View File

@ -887,6 +887,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
int ret, unsigned long trace_ip) int ret, unsigned long trace_ip)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
switch (ret) { switch (ret) {
case -BCH_ERR_btree_insert_btree_node_full: case -BCH_ERR_btree_insert_btree_node_full:
@ -905,7 +906,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
* flag * flag
*/ */
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
(flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) { watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock; ret = -BCH_ERR_journal_reclaim_would_deadlock;
break; break;
} }

View File

@ -26,6 +26,13 @@
#include <linux/random.h> #include <linux/random.h>
const char * const bch2_btree_update_modes[] = {
#define x(t) #t,
BCH_WATERMARKS()
#undef x
NULL
};
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
btree_path_idx_t, struct btree *, struct keylist *); btree_path_idx_t, struct btree *, struct keylist *);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
@ -303,7 +310,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct open_buckets obs = { .nr = 0 }; struct open_buckets obs = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
? BTREE_NODE_RESERVE ? BTREE_NODE_RESERVE
: 0; : 0;
int ret; int ret;
@ -687,7 +694,7 @@ static void btree_update_nodes_written(struct btree_update *as)
* which may require allocations as well. * which may require allocations as well.
*/ */
ret = commit_do(trans, &as->disk_res, &journal_seq, ret = commit_do(trans, &as->disk_res, &journal_seq,
BCH_WATERMARK_reclaim| BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw| BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_journal_reclaim, BCH_TRANS_COMMIT_journal_reclaim,
@ -846,11 +853,11 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
mutex_lock(&c->btree_interior_update_lock); mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); BUG_ON(as->mode != BTREE_UPDATE_none);
BUG_ON(!btree_node_dirty(b)); BUG_ON(!btree_node_dirty(b));
BUG_ON(!b->c.level); BUG_ON(!b->c.level);
as->mode = BTREE_INTERIOR_UPDATING_NODE; as->mode = BTREE_UPDATE_node;
as->b = b; as->b = b;
set_btree_node_write_blocked(b); set_btree_node_write_blocked(b);
@ -873,7 +880,7 @@ static void btree_update_reparent(struct btree_update *as,
lockdep_assert_held(&c->btree_interior_update_lock); lockdep_assert_held(&c->btree_interior_update_lock);
child->b = NULL; child->b = NULL;
child->mode = BTREE_INTERIOR_UPDATING_AS; child->mode = BTREE_UPDATE_update;
bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal,
bch2_update_reparent_journal_pin_flush); bch2_update_reparent_journal_pin_flush);
@ -884,7 +891,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
struct bkey_i *insert = &b->key; struct bkey_i *insert = &b->key;
struct bch_fs *c = as->c; struct bch_fs *c = as->c;
BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); BUG_ON(as->mode != BTREE_UPDATE_none);
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) > BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
ARRAY_SIZE(as->journal_entries)); ARRAY_SIZE(as->journal_entries));
@ -898,7 +905,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
mutex_lock(&c->btree_interior_update_lock); mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
as->mode = BTREE_INTERIOR_UPDATING_ROOT; as->mode = BTREE_UPDATE_root;
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
} }
@ -1076,7 +1083,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
struct bch_fs *c = as->c; struct bch_fs *c = as->c;
u64 start_time = as->start_time; u64 start_time = as->start_time;
BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE); BUG_ON(as->mode == BTREE_UPDATE_none);
if (as->took_gc_lock) if (as->took_gc_lock)
up_read(&as->c->gc_lock); up_read(&as->c->gc_lock);
@ -1121,7 +1128,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK; unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark != BCH_WATERMARK_reclaim) watermark < BCH_WATERMARK_reclaim)
journal_flags |= JOURNAL_RES_GET_NONBLOCK; journal_flags |= JOURNAL_RES_GET_NONBLOCK;
ret = drop_locks_do(trans, ret = drop_locks_do(trans,
@ -1172,7 +1179,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
as->c = c; as->c = c;
as->start_time = start_time; as->start_time = start_time;
as->ip_started = _RET_IP_; as->ip_started = _RET_IP_;
as->mode = BTREE_INTERIOR_NO_UPDATE; as->mode = BTREE_UPDATE_none;
as->watermark = watermark;
as->took_gc_lock = true; as->took_gc_lock = true;
as->btree_id = path->btree_id; as->btree_id = path->btree_id;
as->update_level = update_level; as->update_level = update_level;
@ -1217,7 +1225,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
*/ */
if (bch2_err_matches(ret, ENOSPC) && if (bch2_err_matches(ret, ENOSPC) &&
(flags & BCH_TRANS_COMMIT_journal_reclaim) && (flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark != BCH_WATERMARK_reclaim) { watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock; ret = -BCH_ERR_journal_reclaim_would_deadlock;
goto err; goto err;
} }
@ -2509,18 +2517,25 @@ void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned lev
bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level)); bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level));
} }
static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
{
prt_printf(out, "%ps: btree=%s watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
(void *) as->ip_started,
bch2_btree_id_str(as->btree_id),
bch2_watermarks[as->watermark],
bch2_btree_update_modes[as->mode],
as->nodes_written,
closure_nr_remaining(&as->cl),
as->journal.seq);
}
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
{ {
struct btree_update *as; struct btree_update *as;
mutex_lock(&c->btree_interior_update_lock); mutex_lock(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list) list_for_each_entry(as, &c->btree_interior_update_list, list)
prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n", bch2_btree_update_to_text(out, as);
(void *) as->ip_started,
as->mode,
as->nodes_written,
closure_nr_remaining(&as->cl),
as->journal.seq);
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
} }

View File

@ -12,6 +12,18 @@
int bch2_btree_node_check_topology(struct btree_trans *, struct btree *); int bch2_btree_node_check_topology(struct btree_trans *, struct btree *);
#define BTREE_UPDATE_MODES() \
x(none) \
x(node) \
x(root) \
x(update)
enum btree_update_mode {
#define x(n) BTREE_UPDATE_##n,
BTREE_UPDATE_MODES()
#undef x
};
/* /*
* Tracks an in progress split/rewrite of a btree node and the update to the * Tracks an in progress split/rewrite of a btree node and the update to the
* parent node: * parent node:
@ -39,14 +51,8 @@ struct btree_update {
struct list_head list; struct list_head list;
struct list_head unwritten_list; struct list_head unwritten_list;
/* What kind of update are we doing? */ enum btree_update_mode mode;
enum { enum bch_watermark watermark;
BTREE_INTERIOR_NO_UPDATE,
BTREE_INTERIOR_UPDATING_NODE,
BTREE_INTERIOR_UPDATING_ROOT,
BTREE_INTERIOR_UPDATING_AS,
} mode;
unsigned nodes_written:1; unsigned nodes_written:1;
unsigned took_gc_lock:1; unsigned took_gc_lock:1;
@ -56,7 +62,7 @@ struct btree_update {
struct disk_reservation disk_res; struct disk_reservation disk_res;
/* /*
* BTREE_INTERIOR_UPDATING_NODE: * BTREE_UPDATE_node:
* The update that made the new nodes visible was a regular update to an * The update that made the new nodes visible was a regular update to an
* existing interior node - @b. We can't write out the update to @b * existing interior node - @b. We can't write out the update to @b
* until the new nodes we created are finished writing, so we block @b * until the new nodes we created are finished writing, so we block @b

View File

@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma
fallthrough; fallthrough;
case BCH_WATERMARK_btree_copygc: case BCH_WATERMARK_btree_copygc:
case BCH_WATERMARK_reclaim: case BCH_WATERMARK_reclaim:
case BCH_WATERMARK_interior_updates:
break; break;
} }

View File

@ -580,8 +580,7 @@ int bch2_data_update_init(struct btree_trans *trans,
move_ctxt_wait_event(ctxt, move_ctxt_wait_event(ctxt,
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks, (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0)) || PTR_BUCKET_POS(c, &p.ptr), 0)) ||
(!atomic_read(&ctxt->read_sectors) && list_empty(&ctxt->ios));
!atomic_read(&ctxt->write_sectors)));
if (!locked) if (!locked)
bch2_bucket_nocow_lock(&c->nocow_locks, bch2_bucket_nocow_lock(&c->nocow_locks,

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h" #include "bcachefs.h"
#include "error.h" #include "error.h"
#include "journal.h"
#include "recovery_passes.h" #include "recovery_passes.h"
#include "super.h" #include "super.h"
#include "thread_with_file.h" #include "thread_with_file.h"
@ -16,7 +17,8 @@ bool bch2_inconsistent_error(struct bch_fs *c)
return false; return false;
case BCH_ON_ERROR_ro: case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c)) if (bch2_fs_emergency_read_only(c))
bch_err(c, "inconsistency detected - emergency read only"); bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
journal_cur_seq(&c->journal));
return true; return true;
case BCH_ON_ERROR_panic: case BCH_ON_ERROR_panic:
panic(bch2_fmt(c, "panic after error")); panic(bch2_fmt(c, "panic after error"));

View File

@ -115,7 +115,7 @@ static void swap_bytes(void *a, void *b, size_t n)
struct wrapper { struct wrapper {
cmp_func_t cmp; cmp_func_t cmp;
swap_func_t swap_f; swap_func_t swap;
}; };
/* /*
@ -125,7 +125,7 @@ struct wrapper {
static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv) static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
{ {
if (swap_func == SWAP_WRAPPER) { if (swap_func == SWAP_WRAPPER) {
((const struct wrapper *)priv)->swap_f(a, b, (int)size); ((const struct wrapper *)priv)->swap(a, b, (int)size);
return; return;
} }
@ -174,7 +174,7 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size,
int i, c, r; int i, c, r;
/* called from 'sort' without swap function, let's pick the default */ /* called from 'sort' without swap function, let's pick the default */
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_f) if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
swap_func = NULL; swap_func = NULL;
if (!swap_func) { if (!swap_func) {
@ -227,7 +227,7 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
{ {
struct wrapper w = { struct wrapper w = {
.cmp = cmp_func, .cmp = cmp_func,
.swap_f = swap_func, .swap = swap_func,
}; };
return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);

View File

@ -63,9 +63,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol,
u32 *snapshot, u64 *inum) u32 *snapshot, u64 *inum)
{ {
struct bch_subvolume s; struct bch_subvolume s;
int ret; int ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
*snapshot = le32_to_cpu(s.snapshot); *snapshot = le32_to_cpu(s.snapshot);
*inum = le64_to_cpu(s.inode); *inum = le64_to_cpu(s.inode);
@ -158,9 +156,10 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, ret = bch2_btree_iter_traverse(&iter) ?:
&dir_hash_info, &iter, bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); &dir_hash_info, &iter,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
err: err:
bch_err_fn(c, ret); bch_err_fn(c, ret);
@ -169,7 +168,8 @@ err:
/* Get lost+found, create if it doesn't exist: */ /* Get lost+found, create if it doesn't exist: */
static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
struct bch_inode_unpacked *lostfound) struct bch_inode_unpacked *lostfound,
u64 reattaching_inum)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct qstr lostfound_str = QSTR("lost+found"); struct qstr lostfound_str = QSTR("lost+found");
@ -184,19 +184,36 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
return ret; return ret;
subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
u32 subvol_snapshot;
ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol), struct bch_subvolume subvol;
&subvol_snapshot, &root_inum.inum); ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol),
bch_err_msg(c, ret, "looking up root subvol"); false, 0, &subvol);
bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u",
le32_to_cpu(st.master_subvol), snapshot);
if (ret) if (ret)
return ret; return ret;
if (!subvol.inode) {
struct btree_iter iter;
struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)),
0, subvolume);
ret = PTR_ERR_OR_ZERO(subvol);
if (ret)
return ret;
subvol->v.inode = cpu_to_le64(reattaching_inum);
bch2_trans_iter_exit(trans, &iter);
}
root_inum.inum = le64_to_cpu(subvol.inode);
struct bch_inode_unpacked root_inode; struct bch_inode_unpacked root_inode;
struct bch_hash_info root_hash_info; struct bch_hash_info root_hash_info;
u32 root_inode_snapshot = snapshot; u32 root_inode_snapshot = snapshot;
ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot);
bch_err_msg(c, ret, "looking up root inode"); bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
root_inum.inum, le32_to_cpu(st.master_subvol));
if (ret) if (ret)
return ret; return ret;
@ -292,7 +309,7 @@ static int reattach_inode(struct btree_trans *trans,
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
} }
ret = lookup_lostfound(trans, dirent_snapshot, &lostfound); ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum);
if (ret) if (ret)
return ret; return ret;
@ -363,6 +380,112 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume
return ret; return ret;
} }
static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum)
{
struct bch_fs *c = trans->c;
if (!bch2_snapshot_is_leaf(c, snapshotid)) {
bch_err(c, "need to reconstruct subvol, but have interior node snapshot");
return -BCH_ERR_fsck_repair_unimplemented;
}
/*
* If inum isn't set, that means we're being called from check_dirents,
* not check_inodes - the root of this subvolume doesn't exist or we
* would have found it there:
*/
if (!inum) {
struct btree_iter inode_iter = {};
struct bch_inode_unpacked new_inode;
u64 cpu = raw_smp_processor_id();
bch2_inode_init_early(c, &new_inode);
bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL);
new_inode.bi_subvol = subvolid;
int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?:
bch2_btree_iter_traverse(&inode_iter) ?:
bch2_inode_write(trans, &inode_iter, &new_inode);
bch2_trans_iter_exit(trans, &inode_iter);
if (ret)
return ret;
inum = new_inode.bi_inum;
}
bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum);
struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
int ret = PTR_ERR_OR_ZERO(new_subvol);
if (ret)
return ret;
bkey_subvolume_init(&new_subvol->k_i);
new_subvol->k.p.offset = subvolid;
new_subvol->v.snapshot = cpu_to_le32(snapshotid);
new_subvol->v.inode = cpu_to_le64(inum);
ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0);
if (ret)
return ret;
struct btree_iter iter;
struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_snapshots, POS(0, snapshotid),
0, snapshot);
ret = PTR_ERR_OR_ZERO(s);
bch_err_msg(c, ret, "getting snapshot %u", snapshotid);
if (ret)
return ret;
u32 snapshot_tree = le32_to_cpu(s->v.tree);
s->v.subvol = cpu_to_le32(subvolid);
SET_BCH_SNAPSHOT_SUBVOL(&s->v, true);
bch2_trans_iter_exit(trans, &iter);
struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_snapshot_trees, POS(0, snapshot_tree),
0, snapshot_tree);
ret = PTR_ERR_OR_ZERO(st);
bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree);
if (ret)
return ret;
if (!st->v.master_subvol)
st->v.master_subvol = cpu_to_le32(subvolid);
bch2_trans_iter_exit(trans, &iter);
return 0;
}
static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode)
{
struct bch_fs *c = trans->c;
struct bch_inode_unpacked new_inode;
bch2_inode_init_early(c, &new_inode);
bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL);
new_inode.bi_size = size;
new_inode.bi_inum = inum;
return __bch2_fsck_write_inode(trans, &new_inode, snapshot);
}
static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum)
{
struct btree_iter iter = {};
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0);
struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter);
bch2_trans_iter_exit(trans, &iter);
int ret = bkey_err(k);
if (ret)
return ret;
return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG);
}
struct snapshots_seen_entry { struct snapshots_seen_entry {
u32 id; u32 id;
u32 equiv; u32 equiv;
@ -1064,6 +1187,11 @@ static int check_inode(struct btree_trans *trans,
if (ret && !bch2_err_matches(ret, ENOENT)) if (ret && !bch2_err_matches(ret, ENOENT))
goto err; goto err;
if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum);
goto do_update;
}
if (fsck_err_on(ret, if (fsck_err_on(ret,
c, inode_bi_subvol_missing, c, inode_bi_subvol_missing,
"inode %llu:%u bi_subvol points to missing subvolume %u", "inode %llu:%u bi_subvol points to missing subvolume %u",
@ -1081,7 +1209,7 @@ static int check_inode(struct btree_trans *trans,
do_update = true; do_update = true;
} }
} }
do_update:
if (do_update) { if (do_update) {
ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
bch_err_msg(c, ret, "in fsck updating inode"); bch_err_msg(c, ret, "in fsck updating inode");
@ -1130,8 +1258,8 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
i->count = count2; i->count = count2;
if (i->count != count2) { if (i->count != count2) {
bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
w->last_pos.inode, i->snapshot, i->count, count2); w->last_pos.inode, i->snapshot, i->count, count2);
return -BCH_ERR_internal_fsck_err; return -BCH_ERR_internal_fsck_err;
} }
@ -1371,10 +1499,6 @@ static int check_overlapping_extents(struct btree_trans *trans,
goto err; goto err;
} }
ret = extent_ends_at(c, extent_ends, seen, k);
if (ret)
goto err;
extent_ends->last_pos = k.k->p; extent_ends->last_pos = k.k->p;
err: err:
return ret; return ret;
@ -1438,6 +1562,17 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto err; goto err;
if (k.k->type != KEY_TYPE_whiteout) { if (k.k->type != KEY_TYPE_whiteout) {
if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
goto err;
inode->last_pos.inode--;
ret = -BCH_ERR_transaction_restart_nested;
goto err;
}
if (fsck_err_on(!i, c, extent_in_missing_inode, if (fsck_err_on(!i, c, extent_in_missing_inode,
"extent in missing inode:\n %s", "extent in missing inode:\n %s",
(printbuf_reset(&buf), (printbuf_reset(&buf),
@ -1504,6 +1639,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
i->seen_this_pos = true; i->seen_this_pos = true;
} }
if (k.k->type != KEY_TYPE_whiteout) {
ret = extent_ends_at(c, extent_ends, s, k);
if (ret)
goto err;
}
out: out:
err: err:
fsck_err: fsck_err:
@ -1584,8 +1725,8 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
return count2; return count2;
if (i->count != count2) { if (i->count != count2) {
bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu", bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
i->count, count2); w->last_pos.inode, i->snapshot, i->count, count2);
i->count = count2; i->count = count2;
if (i->inode.bi_nlink == i->count) if (i->inode.bi_nlink == i->count)
continue; continue;
@ -1782,6 +1923,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
u32 parent_snapshot; u32 parent_snapshot;
u32 new_parent_subvol = 0;
u64 parent_inum; u64 parent_inum;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
@ -1790,6 +1932,27 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
if (ret && !bch2_err_matches(ret, ENOENT)) if (ret && !bch2_err_matches(ret, ENOENT))
return ret; return ret;
if (ret ||
(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) {
int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
if (ret2 && !bch2_err_matches(ret, ENOENT))
return ret2;
}
if (ret &&
!new_parent_subvol &&
(c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
/*
* Couldn't find a subvol for dirent's snapshot - but we lost
* subvols, so we need to reconstruct:
*/
ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0);
if (ret)
return ret;
parent_snapshot = d.k->p.snapshot;
}
if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol, if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
"dirent parent_subvol points to missing subvolume\n%s", "dirent parent_subvol points to missing subvolume\n%s",
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
@ -1798,10 +1961,10 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
"dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
parent_snapshot, parent_snapshot,
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
u32 new_parent_subvol; if (!new_parent_subvol) {
ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot);
if (ret) return -BCH_ERR_fsck_repair_unimplemented;
goto err; }
struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
ret = PTR_ERR_OR_ZERO(new_dirent); ret = PTR_ERR_OR_ZERO(new_dirent);
@ -1847,9 +2010,16 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
if (ret && !bch2_err_matches(ret, ENOENT)) if (ret && !bch2_err_matches(ret, ENOENT))
return ret; goto err;
if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol, if (ret) {
bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum);
ret = -BCH_ERR_fsck_repair_unimplemented;
ret = 0;
goto err;
}
if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol,
c, inode_bi_parent_wrong, c, inode_bi_parent_wrong,
"subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
target_inum, target_inum,
@ -1857,13 +2027,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
subvol_root.bi_parent_subvol = parent_subvol; subvol_root.bi_parent_subvol = parent_subvol;
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
if (ret) if (ret)
return ret; goto err;
} }
ret = check_dirent_target(trans, iter, d, &subvol_root, ret = check_dirent_target(trans, iter, d, &subvol_root,
target_snapshot); target_snapshot);
if (ret) if (ret)
return ret; goto err;
out: out:
err: err:
fsck_err: fsck_err:
@ -1880,7 +2050,6 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct snapshots_seen *s) struct snapshots_seen *s)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct bkey_s_c_dirent d;
struct inode_walker_entry *i; struct inode_walker_entry *i;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
struct bpos equiv; struct bpos equiv;
@ -1919,6 +2088,17 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
dir->first_this_inode = false; dir->first_this_inode = false;
if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
goto err;
dir->last_pos.inode--;
ret = -BCH_ERR_transaction_restart_nested;
goto err;
}
if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, if (fsck_err_on(!i, c, dirent_in_missing_dir_inode,
"dirent in nonexisting directory:\n%s", "dirent in nonexisting directory:\n%s",
(printbuf_reset(&buf), (printbuf_reset(&buf),
@ -1953,7 +2133,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (k.k->type != KEY_TYPE_dirent) if (k.k->type != KEY_TYPE_dirent)
goto out; goto out;
d = bkey_s_c_to_dirent(k); struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
if (d.v->d_type == DT_SUBVOL) { if (d.v->d_type == DT_SUBVOL) {
ret = check_dirent_to_subvol(trans, iter, d); ret = check_dirent_to_subvol(trans, iter, d);

View File

@ -37,7 +37,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
struct bkey_buf sk; struct bkey_buf sk;
u32 restart_count = trans->restart_count; u32 restart_count = trans->restart_count;
int ret;
if (!fn) if (!fn)
return 0; return 0;
@ -45,11 +44,11 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
bch2_bkey_buf_init(&sk); bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k); bch2_bkey_buf_reassemble(&sk, c, k);
ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?: fn->resume(trans, sk.k);
fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&sk, c);
return ret;
return trans_was_restarted(trans, restart_count);
} }
int bch2_resume_logged_ops(struct bch_fs *c) int bch2_resume_logged_ops(struct bch_fs *c)

View File

@ -7,6 +7,7 @@
#include "disk_groups.h" #include "disk_groups.h"
#include "error.h" #include "error.h"
#include "opts.h" #include "opts.h"
#include "recovery_passes.h"
#include "super-io.h" #include "super-io.h"
#include "util.h" #include "util.h"
@ -205,6 +206,9 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_STR(_choices) .type = BCH_OPT_STR, \ #define OPT_STR(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = ARRAY_SIZE(_choices), \ .min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices .choices = _choices
#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = U64_MAX, \
.choices = _choices
#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn #define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \ #define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \

View File

@ -362,7 +362,12 @@ enum fsck_err_opts {
OPT_FS|OPT_MOUNT, \ OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \ OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \ BCH2_NO_SB_OPT, false, \
NULL, "Don't replay the journal") \ NULL, "Exit recovery immediately prior to journal replay")\
x(recovery_pass_last, u8, \
OPT_FS|OPT_MOUNT, \
OPT_STR_NOLIMIT(bch2_recovery_passes), \
BCH2_NO_SB_OPT, 0, \
NULL, "Exit recovery after specified pass") \
x(retain_recovery_info, u8, \ x(retain_recovery_info, u8, \
0, \ 0, \
OPT_BOOL(), \ OPT_BOOL(), \

View File

@ -33,6 +33,20 @@
#define QSTR(n) { { { .len = strlen(n) } }, .name = n } #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
{
u64 b = BIT_ULL(btree);
if (!(c->sb.btrees_lost_data & b)) {
bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree));
mutex_lock(&c->sb_lock);
bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
}
static bool btree_id_is_alloc(enum btree_id id) static bool btree_id_is_alloc(enum btree_id id)
{ {
switch (id) { switch (id) {
@ -272,7 +286,8 @@ int bch2_journal_replay(struct bch_fs *c)
bch2_trans_put(trans); bch2_trans_put(trans);
trans = NULL; trans = NULL;
if (!c->opts.retain_recovery_info) if (!c->opts.retain_recovery_info &&
c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c); bch2_journal_keys_put_initial(c);
replay_now_at(j, j->replay_journal_seq_end); replay_now_at(j, j->replay_journal_seq_end);
@ -468,8 +483,8 @@ static int read_btree_roots(struct bch_fs *c)
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
} }
set_bit(i, &c->btrees_lost_data);
ret = 0; ret = 0;
bch2_btree_lost_data(c, i);
} }
} }
@ -590,27 +605,14 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err; goto err;
} }
if (c->opts.fsck && c->opts.norecovery) { if (c->opts.norecovery)
bch_err(c, "cannot select both norecovery and fsck"); c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
ret = -EINVAL;
goto err;
}
c->opts.retain_recovery_info |= c->opts.norecovery;
c->opts.nochanges |= c->opts.norecovery;
if (!c->opts.nochanges) { if (!c->opts.nochanges) {
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false; bool write_sb = false;
struct bch_sb_field_ext *ext =
bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
if (!ext) {
ret = -BCH_ERR_ENOSPC_sb;
mutex_unlock(&c->sb_lock);
goto err;
}
if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
ext->recovery_passes_required[0] |= ext->recovery_passes_required[0] |=
cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
@ -841,6 +843,7 @@ use_clean:
} }
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false; bool write_sb = false;
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
@ -854,15 +857,18 @@ use_clean:
write_sb = true; write_sb = true;
} }
if (!test_bit(BCH_FS_error, &c->flags)) { if (!test_bit(BCH_FS_error, &c->flags) &&
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) {
if (ext && memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
(!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) || write_sb = true;
!bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) { }
memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); if (c->opts.fsck &&
write_sb = true; !test_bit(BCH_FS_error, &c->flags) &&
} c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
ext->btrees_lost_data) {
ext->btrees_lost_data = 0;
write_sb = true;
} }
if (c->opts.fsck && if (c->opts.fsck &&
@ -932,6 +938,7 @@ int bch2_fs_initialize(struct bch_fs *c)
int ret; int ret;
bch_notice(c, "initializing new filesystem"); bch_notice(c, "initializing new filesystem");
set_bit(BCH_FS_new_fs, &c->flags);
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);

View File

@ -2,6 +2,8 @@
#ifndef _BCACHEFS_RECOVERY_H #ifndef _BCACHEFS_RECOVERY_H
#define _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H
void bch2_btree_lost_data(struct bch_fs *, enum btree_id);
int bch2_journal_replay(struct bch_fs *); int bch2_journal_replay(struct bch_fs *);
int bch2_fs_recovery(struct bch_fs *); int bch2_fs_recovery(struct bch_fs *);

View File

@ -17,6 +17,7 @@
#include "snapshot.h" #include "snapshot.h"
#include "subvolume.h" #include "subvolume.h"
#include "super.h" #include "super.h"
#include "super-io.h"
const char * const bch2_recovery_passes[] = { const char * const bch2_recovery_passes[] = {
#define x(_fn, ...) #_fn, #define x(_fn, ...) #_fn,
@ -27,7 +28,7 @@ const char * const bch2_recovery_passes[] = {
static int bch2_check_allocations(struct bch_fs *c) static int bch2_check_allocations(struct bch_fs *c)
{ {
return bch2_gc(c, true, c->opts.norecovery); return bch2_gc(c, true, false);
} }
static int bch2_set_may_go_rw(struct bch_fs *c) static int bch2_set_may_go_rw(struct bch_fs *c)
@ -59,18 +60,23 @@ static struct recovery_pass_fn recovery_pass_fns[] = {
#undef x #undef x
}; };
u64 bch2_recovery_passes_to_stable(u64 v) static const u8 passes_to_stable_map[] = {
{
static const u8 map[] = {
#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, #define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
BCH_RECOVERY_PASSES() BCH_RECOVERY_PASSES()
#undef x #undef x
}; };
static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
{
return passes_to_stable_map[pass];
}
u64 bch2_recovery_passes_to_stable(u64 v)
{
u64 ret = 0; u64 ret = 0;
for (unsigned i = 0; i < ARRAY_SIZE(map); i++) for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
if (v & BIT_ULL(i)) if (v & BIT_ULL(i))
ret |= BIT_ULL(map[i]); ret |= BIT_ULL(passes_to_stable_map[i]);
return ret; return ret;
} }
@ -113,6 +119,38 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
} }
} }
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
enum bch_recovery_pass pass)
{
enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (!test_bit_le64(s, ext->recovery_passes_required)) {
__set_bit_le64(s, ext->recovery_passes_required);
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock);
return bch2_run_explicit_recovery_pass(c, pass);
}
static void bch2_clear_recovery_pass_required(struct bch_fs *c,
enum bch_recovery_pass pass)
{
enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (test_bit_le64(s, ext->recovery_passes_required)) {
__clear_bit_le64(s, ext->recovery_passes_required);
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock);
}
u64 bch2_fsck_recovery_passes(void) u64 bch2_fsck_recovery_passes(void)
{ {
u64 ret = 0; u64 ret = 0;
@ -127,8 +165,6 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa
{ {
struct recovery_pass_fn *p = recovery_pass_fns + pass; struct recovery_pass_fn *p = recovery_pass_fns + pass;
if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
return false;
if (c->recovery_passes_explicit & BIT_ULL(pass)) if (c->recovery_passes_explicit & BIT_ULL(pass))
return true; return true;
if ((p->when & PASS_FSCK) && c->opts.fsck) if ((p->when & PASS_FSCK) && c->opts.fsck)
@ -184,6 +220,10 @@ int bch2_run_recovery_passes(struct bch_fs *c)
int ret = 0; int ret = 0;
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
if (c->opts.recovery_pass_last &&
c->curr_recovery_pass > c->opts.recovery_pass_last)
break;
if (should_run_recovery_pass(c, c->curr_recovery_pass)) { if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
unsigned pass = c->curr_recovery_pass; unsigned pass = c->curr_recovery_pass;
@ -196,8 +236,13 @@ int bch2_run_recovery_passes(struct bch_fs *c)
c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
} }
c->curr_recovery_pass++;
c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
if (!test_bit(BCH_FS_error, &c->flags))
bch2_clear_recovery_pass_required(c, c->curr_recovery_pass);
c->curr_recovery_pass++;
} }
return ret; return ret;

View File

@ -9,6 +9,7 @@ u64 bch2_recovery_passes_from_stable(u64 v);
u64 bch2_fsck_recovery_passes(void); u64 bch2_fsck_recovery_passes(void);
int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
int bch2_run_online_recovery_passes(struct bch_fs *); int bch2_run_online_recovery_passes(struct bch_fs *);
int bch2_run_recovery_passes(struct bch_fs *); int bch2_run_recovery_passes(struct bch_fs *);

View File

@ -32,6 +32,7 @@
x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, 17, 0) \ x(bucket_gens_init, 17, 0) \
x(reconstruct_snapshots, 38, 0) \
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \ x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \ x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \ x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \

View File

@ -268,7 +268,9 @@
x(btree_node_bkey_bad_u64s, 260) \ x(btree_node_bkey_bad_u64s, 260) \
x(btree_node_topology_empty_interior_node, 261) \ x(btree_node_topology_empty_interior_node, 261) \
x(btree_ptr_v2_min_key_bad, 262) \ x(btree_ptr_v2_min_key_bad, 262) \
x(btree_root_unreadable_and_scan_found_nothing, 263) x(btree_root_unreadable_and_scan_found_nothing, 263) \
x(snapshot_node_missing, 264) \
x(dup_backpointer_to_bad_csum_extent, 265)
enum bch_sb_error_id { enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n, #define x(t, n) BCH_FSCK_ERR_##t = n,

View File

@ -8,6 +8,7 @@
#include "errcode.h" #include "errcode.h"
#include "error.h" #include "error.h"
#include "fs.h" #include "fs.h"
#include "recovery_passes.h"
#include "snapshot.h" #include "snapshot.h"
#include <linux/random.h> #include <linux/random.h>
@ -131,7 +132,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
rcu_read_lock(); rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots); struct snapshot_table *t = rcu_dereference(c->snapshots);
if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) { if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor); ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out; goto out;
} }
@ -574,6 +575,13 @@ static int check_snapshot_tree(struct btree_trans *trans,
u32 subvol_id; u32 subvol_id;
ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id); ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
bch_err_fn(c, ret);
if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
ret = 0;
goto err;
}
if (ret) if (ret)
goto err; goto err;
@ -731,7 +739,6 @@ static int check_snapshot(struct btree_trans *trans,
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
u32 real_depth; u32 real_depth;
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
bool should_have_subvol;
u32 i, id; u32 i, id;
int ret = 0; int ret = 0;
@ -777,7 +784,7 @@ static int check_snapshot(struct btree_trans *trans,
} }
} }
should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) && bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(&s); !BCH_SNAPSHOT_DELETED(&s);
if (should_have_subvol) { if (should_have_subvol) {
@ -879,6 +886,154 @@ int bch2_check_snapshots(struct bch_fs *c)
return ret; return ret;
} }
static int check_snapshot_exists(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
if (bch2_snapshot_equiv(c, id))
return 0;
u32 tree_id;
int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
if (ret)
return ret;
struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
ret = PTR_ERR_OR_ZERO(snapshot);
if (ret)
return ret;
bkey_snapshot_init(&snapshot->k_i);
snapshot->k.p = POS(0, id);
snapshot->v.tree = cpu_to_le32(tree_id);
snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c));
return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
}
/* Figure out which snapshot nodes belong in the same tree: */
struct snapshot_tree_reconstruct {
enum btree_id btree;
struct bpos cur_pos;
snapshot_id_list cur_ids;
DARRAY(snapshot_id_list) trees;
};
static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
{
darray_for_each(r->trees, i)
darray_exit(i);
darray_exit(&r->trees);
darray_exit(&r->cur_ids);
}
static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
{
return r->btree == BTREE_ID_inodes
? r->cur_pos.offset == pos.offset
: r->cur_pos.inode == pos.inode;
}
static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
{
darray_for_each(*l, i)
if (snapshot_list_has_id(r, *i))
return true;
return false;
}
static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
{
bool first = true;
darray_for_each(*s, i) {
if (!first)
prt_char(out, ' ');
first = false;
prt_printf(out, "%u", *i);
}
}
static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
{
if (r->cur_ids.nr) {
darray_for_each(r->trees, i)
if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
int ret = snapshot_list_merge(c, i, &r->cur_ids);
if (ret)
return ret;
goto out;
}
darray_push(&r->trees, r->cur_ids);
darray_init(&r->cur_ids);
}
out:
r->cur_ids.nr = 0;
return 0;
}
static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
{
if (!same_snapshot(r, pos))
snapshot_tree_reconstruct_next(c, r);
r->cur_pos = pos;
return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
}
int bch2_reconstruct_snapshots(struct bch_fs *c)
{
struct btree_trans *trans = bch2_trans_get(c);
struct printbuf buf = PRINTBUF;
struct snapshot_tree_reconstruct r = {};
int ret = 0;
for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
if (btree_type_has_snapshots(btree)) {
r.btree = btree;
ret = for_each_btree_key(trans, iter, btree, POS_MIN,
BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
get_snapshot_trees(c, &r, k.k->p);
}));
if (ret)
goto err;
snapshot_tree_reconstruct_next(c, &r);
}
}
darray_for_each(r.trees, t) {
printbuf_reset(&buf);
snapshot_id_list_to_text(&buf, t);
darray_for_each(*t, id) {
if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
c, snapshot_node_missing,
"snapshot node %u from tree %s missing", *id, buf.buf)) {
if (t->nr > 1) {
bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
ret = -BCH_ERR_fsck_repair_unimplemented;
goto err;
}
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_snapshot_exists(trans, *id));
if (ret)
goto err;
}
}
}
fsck_err:
err:
bch2_trans_put(trans);
snapshot_tree_reconstruct_exit(&r);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
/* /*
* Mark a snapshot as deleted, for future cleanup: * Mark a snapshot as deleted, for future cleanup:
*/ */
@ -1689,6 +1844,20 @@ int bch2_snapshots_read(struct bch_fs *c)
POS_MIN, 0, k, POS_MIN, 0, k,
(set_is_ancestor_bitmap(c, k.k->p.offset), 0))); (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
bch_err_fn(c, ret); bch_err_fn(c, ret);
/*
* It's important that we check if we need to reconstruct snapshots
* before going RW, so we mark that pass as required in the superblock -
* otherwise, we could end up deleting keys with missing snapshot nodes
* instead
*/
BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
test_bit(BCH_FS_may_go_rw, &c->flags));
if (bch2_err_matches(ret, EIO) ||
(c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
return ret; return ret;
} }

View File

@ -209,15 +209,34 @@ static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list
static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id) static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
{ {
int ret;
BUG_ON(snapshot_list_has_id(s, id)); BUG_ON(snapshot_list_has_id(s, id));
ret = darray_push(s, id); int ret = darray_push(s, id);
if (ret) if (ret)
bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size); bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
return ret; return ret;
} }
static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id)
{
int ret = snapshot_list_has_id(s, id)
? 0
: darray_push(s, id);
if (ret)
bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
return ret;
}
static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src)
{
darray_for_each(*src, i) {
int ret = snapshot_list_add_nodup(c, dst, *i);
if (ret)
return ret;
}
return 0;
}
int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s); struct bch_snapshot *s);
int bch2_snapshot_get_subvol(struct btree_trans *, u32, int bch2_snapshot_get_subvol(struct btree_trans *, u32,
@ -229,6 +248,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *); int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *); int bch2_check_snapshots(struct bch_fs *);
int bch2_reconstruct_snapshots(struct bch_fs *);
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
void bch2_delete_dead_snapshots_work(struct work_struct *); void bch2_delete_dead_snapshots_work(struct work_struct *);

View File

@ -142,8 +142,8 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb,
void bch2_free_super(struct bch_sb_handle *sb) void bch2_free_super(struct bch_sb_handle *sb)
{ {
kfree(sb->bio); kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->bdev_handle)) if (!IS_ERR_OR_NULL(sb->s_bdev_file))
bdev_release(sb->bdev_handle); fput(sb->s_bdev_file);
kfree(sb->holder); kfree(sb->holder);
kfree(sb->sb_name); kfree(sb->sb_name);
@ -527,9 +527,11 @@ static void bch2_sb_update(struct bch_fs *c)
memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent)); memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
if (ext) if (ext) {
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
sizeof(c->sb.errors_silent) * 8); sizeof(c->sb.errors_silent) * 8);
c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
}
for_each_member_device(c, ca) { for_each_member_device(c, ca) {
struct bch_member m = bch2_sb_member_get(src, ca->dev_idx); struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
@ -712,23 +714,23 @@ retry:
if (!opt_get(*opts, nochanges)) if (!opt_get(*opts, nochanges))
sb->mode |= BLK_OPEN_WRITE; sb->mode |= BLK_OPEN_WRITE;
sb->bdev_handle = bdev_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (IS_ERR(sb->bdev_handle) && if (IS_ERR(sb->s_bdev_file) &&
PTR_ERR(sb->bdev_handle) == -EACCES && PTR_ERR(sb->s_bdev_file) == -EACCES &&
opt_get(*opts, read_only)) { opt_get(*opts, read_only)) {
sb->mode &= ~BLK_OPEN_WRITE; sb->mode &= ~BLK_OPEN_WRITE;
sb->bdev_handle = bdev_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (!IS_ERR(sb->bdev_handle)) if (!IS_ERR(sb->s_bdev_file))
opt_set(*opts, nochanges, true); opt_set(*opts, nochanges, true);
} }
if (IS_ERR(sb->bdev_handle)) { if (IS_ERR(sb->s_bdev_file)) {
ret = PTR_ERR(sb->bdev_handle); ret = PTR_ERR(sb->s_bdev_file);
prt_printf(&err, "error opening %s: %s", path, bch2_err_str(ret)); prt_printf(&err, "error opening %s: %s", path, bch2_err_str(ret));
goto err; goto err;
} }
sb->bdev = sb->bdev_handle->bdev; sb->bdev = file_bdev(sb->s_bdev_file);
ret = bch2_sb_realloc(sb, 0); ret = bch2_sb_realloc(sb, 0);
if (ret) { if (ret) {
@ -1162,6 +1164,11 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
kfree(errors_silent); kfree(errors_silent);
} }
prt_printf(out, "Btrees with missing data:");
prt_tab(out);
prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data));
prt_newline(out);
} }
static const struct bch_sb_field_ops bch_sb_field_ops_ext = { static const struct bch_sb_field_ops bch_sb_field_ops_ext = {

View File

@ -366,7 +366,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_emergency_ro, &c->flags) && !test_bit(BCH_FS_emergency_ro, &c->flags) &&
test_bit(BCH_FS_started, &c->flags) && test_bit(BCH_FS_started, &c->flags) &&
test_bit(BCH_FS_clean_shutdown, &c->flags) && test_bit(BCH_FS_clean_shutdown, &c->flags) &&
!c->opts.norecovery) { c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal)); BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
BUG_ON(atomic_read(&c->btree_cache.dirty)); BUG_ON(atomic_read(&c->btree_cache.dirty));
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty)); BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
@ -511,7 +511,8 @@ err:
int bch2_fs_read_write(struct bch_fs *c) int bch2_fs_read_write(struct bch_fs *c)
{ {
if (c->opts.norecovery) if (c->opts.recovery_pass_last &&
c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
return -BCH_ERR_erofs_norecovery; return -BCH_ERR_erofs_norecovery;
if (c->opts.nochanges) if (c->opts.nochanges)
@ -1018,8 +1019,16 @@ int bch2_fs_start(struct bch_fs *c)
for_each_online_member(c, ca) for_each_online_member(c, ca)
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now); bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
struct bch_sb_field_ext *ext =
bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
if (!ext) {
bch_err(c, "insufficient space in superblock for sb_field_ext");
ret = -BCH_ERR_ENOSPC_sb;
goto err;
}
for_each_rw_member(c, ca) for_each_rw_member(c, ca)
bch2_dev_allocator_add(c, ca); bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c); bch2_recalc_capacity(c);

View File

@ -4,7 +4,7 @@
struct bch_sb_handle { struct bch_sb_handle {
struct bch_sb *sb; struct bch_sb *sb;
struct bdev_handle *bdev_handle; struct file *s_bdev_file;
struct block_device *bdev; struct block_device *bdev;
char *sb_name; char *sb_name;
struct bio *bio; struct bio *bio;

View File

@ -793,4 +793,14 @@ static inline void __set_bit_le64(size_t bit, __le64 *addr)
addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64)); addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
} }
static inline void __clear_bit_le64(size_t bit, __le64 *addr)
{
addr[bit / 64] &= !cpu_to_le64(BIT_ULL(bit % 64));
}
static inline bool test_bit_le64(size_t bit, __le64 *addr)
{
return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0;
}
#endif /* _BCACHEFS_UTIL_H */ #endif /* _BCACHEFS_UTIL_H */

View File

@ -162,16 +162,18 @@ sector_t get_capacity(struct gendisk *disk)
return bytes >> 9; return bytes >> 9;
} }
void bdev_release(struct bdev_handle *handle) void fput(struct file *file)
{ {
fdatasync(handle->bdev->bd_fd); struct block_device *bdev = file_bdev(file);
close(handle->bdev->bd_fd);
free(handle->bdev); fdatasync(bdev->bd_fd);
free(handle); close(bdev->bd_fd);
free(bdev);
free(file);
} }
struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hop) void *holder, const struct blk_holder_ops *hop)
{ {
int fd, flags = 0; int fd, flags = 0;
@ -204,13 +206,12 @@ struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
bdev->bd_disk = &bdev->__bd_disk; bdev->bd_disk = &bdev->__bd_disk;
bdev->bd_disk->bdi = &bdev->bd_disk->__bdi; bdev->bd_disk->bdi = &bdev->bd_disk->__bdi;
bdev->queue.backing_dev_info = bdev->bd_disk->bdi; bdev->queue.backing_dev_info = bdev->bd_disk->bdi;
bdev->bd_inode = &bdev->__bd_inode;
struct bdev_handle *handle = calloc(sizeof(*handle), 1); struct file *file = calloc(sizeof(*file), 1);
handle->bdev = bdev; file->f_inode = bdev->bd_inode;
handle->holder = holder;
handle->mode = mode;
return handle; return file;
} }
int lookup_bdev(const char *path, dev_t *dev) int lookup_bdev(const char *path, dev_t *dev)