Update bcachefs sources to 09d4c2acbf4c bcachefs: reconstruct_inode()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-04-03 16:47:22 -04:00
parent 9f4ed5ce05
commit 5639fb38ca
39 changed files with 876 additions and 217 deletions

View File

@ -1 +1 @@
794723fc10c4a1ff28d4b11c436277ba783f47e6
09d4c2acbf4c864fef0f520bbcba256c9a19102e

View File

@ -38,7 +38,7 @@ as-option = $(call try-run,\
# Usage: aflags-y += $(call as-instr,instr,option1,option2)
as-instr = $(call try-run,\
printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3))
# __cc-option
# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)

View File

@ -676,12 +676,12 @@ static int migrate_fs(const char *fs_path,
struct dev_opts dev = dev_opts_default();
dev.path = dev_t_to_path(stat.st_dev);
dev.handle = bdev_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
int ret = PTR_ERR_OR_ZERO(dev.handle);
int ret = PTR_ERR_OR_ZERO(dev.file);
if (ret < 0)
die("Error opening device to format %s: %s", dev.path, strerror(-ret));
dev.bdev = dev.handle->bdev;
dev.bdev = file_bdev(dev.file);
opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));

View File

@ -52,7 +52,7 @@ static inline struct format_opts format_opts_default()
}
struct dev_opts {
struct bdev_handle *handle;
struct file *file;
struct block_device *bdev;
char *path;
u64 size; /* bytes*/

View File

@ -189,13 +189,13 @@ int open_for_format(struct dev_opts *dev, bool force)
const char *fs_type = NULL, *fs_label = NULL;
size_t fs_type_len, fs_label_len;
dev->handle = bdev_open_by_path(dev->path,
dev->file = bdev_file_open_by_path(dev->path,
BLK_OPEN_READ|BLK_OPEN_WRITE|BLK_OPEN_EXCL|BLK_OPEN_BUFFERED,
dev, NULL);
int ret = PTR_ERR_OR_ZERO(dev->handle);
int ret = PTR_ERR_OR_ZERO(dev->file);
if (ret < 0)
die("Error opening device to format %s: %s", dev->path, strerror(-ret));
dev->bdev = dev->handle->bdev;
dev->bdev = file_bdev(dev->file);
if (!(pr = blkid_new_probe()))
die("blkid error 1");

View File

@ -16,6 +16,28 @@ typedef void (bio_end_io_t) (struct bio *);
#define BDEVNAME_SIZE 32
typedef unsigned int __bitwise blk_mode_t;
/* open for reading */
#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0))
/* open for writing */
#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1))
/* open exclusively (vs other exclusive openers */
#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2))
/* opened with O_NDELAY */
#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
/* open for "writes" only for ioctls (specialy hack for floppy.c) */
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
#define BLK_OPEN_BUFFERED ((__force blk_mode_t)(1 << 5))
struct inode {
unsigned long i_ino;
loff_t i_size;
struct super_block *i_sb;
blk_mode_t mode;
};
struct request_queue {
struct backing_dev_info *backing_dev_info;
};
@ -34,6 +56,7 @@ struct block_device {
dev_t bd_dev;
char name[BDEVNAME_SIZE];
struct inode *bd_inode;
struct inode __bd_inode;
struct request_queue queue;
void *bd_holder;
struct gendisk * bd_disk;

View File

@ -23,27 +23,6 @@ struct user_namespace;
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
typedef unsigned int __bitwise blk_mode_t;
/* open for reading */
#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0))
/* open for writing */
#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1))
/* open exclusively (vs other exclusive openers */
#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2))
/* opened with O_NDELAY */
#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
/* open for "writes" only for ioctls (specialy hack for floppy.c) */
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
#define BLK_OPEN_BUFFERED ((__force blk_mode_t)(1 << 5))
struct inode {
unsigned long i_ino;
loff_t i_size;
struct super_block *i_sb;
};
struct file {
struct inode *f_inode;
};
@ -89,15 +68,14 @@ struct blk_holder_ops {
void (*mark_dead)(struct block_device *bdev);
};
struct bdev_handle {
struct block_device *bdev;
void *holder;
blk_mode_t mode;
};
static inline struct block_device *file_bdev(struct file *file)
{
return container_of(file->f_inode, struct block_device, __bd_inode);
}
void bdev_release(struct bdev_handle *);
struct bdev_handle *bdev_open_by_path(const char *, blk_mode_t, void *,
const struct blk_holder_ops *);
void fput(struct file *);
struct file *bdev_file_open_by_path(const char *, blk_mode_t, void *,
const struct blk_holder_ops *);
int lookup_bdev(const char *path, dev_t *);
struct super_block {

View File

@ -92,4 +92,7 @@
/********** VFS **********/
#define VFS_PTR_POISON ((void *)(0xF5 + POISON_POINTER_DELTA))
/********** lib/stackdepot.c **********/
#define STACK_DEPOT_POISON ((void *)(0xD390 + POISON_POINTER_DELTA))
#endif

View File

@ -1713,34 +1713,37 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
if (ret)
goto out;
if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
a->v.gen++;
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
goto write;
}
if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
bch2_trans_inconsistent(trans,
"clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
"%s",
a->v.journal_seq,
c->journal.flushed_seq_ondisk,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
if (a->v.dirty_sectors) {
if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
trans, "attempting to discard bucket with dirty data\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = -EIO;
}
goto out;
}
if (a->v.data_type != BCH_DATA_need_discard) {
if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
bch2_trans_inconsistent(trans,
"bucket incorrectly set in need_discard btree\n"
"%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
ret = -EIO;
if (data_type_is_empty(a->v.data_type) &&
BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
a->v.gen++;
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
goto write;
}
if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
trans, "bucket incorrectly set in need_discard btree\n"
"%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = -EIO;
goto out;
}
if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s",
a->v.journal_seq,
c->journal.flushed_seq_ondisk,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = -EIO;
goto out;
}
@ -1835,6 +1838,7 @@ static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpo
if (ret)
goto err;
BUG_ON(a->v.dirty_sectors);
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
a->v.data_type = alloc_data_type(a->v, a->v.data_type);
@ -1942,6 +1946,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
goto out;
BUG_ON(a->v.data_type != BCH_DATA_cached);
BUG_ON(a->v.dirty_sectors);
if (!a->v.cached_sectors)
bch_err(c, "invalidating empty bucket, confused");

View File

@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
{
switch (watermark) {
case BCH_WATERMARK_reclaim:
case BCH_WATERMARK_interior_updates:
return 0;
case BCH_WATERMARK_reclaim:
return OPEN_BUCKETS_COUNT / 6;
case BCH_WATERMARK_btree:
case BCH_WATERMARK_btree_copygc:
return OPEN_BUCKETS_COUNT / 4;

View File

@ -22,7 +22,8 @@ struct bucket_alloc_state {
x(copygc) \
x(btree) \
x(btree_copygc) \
x(reclaim)
x(reclaim) \
x(interior_updates)
enum bch_watermark {
#define x(name) BCH_WATERMARK_##name,

View File

@ -8,6 +8,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "checksum.h"
#include "error.h"
#include <linux/mm.h>
@ -418,6 +419,84 @@ struct extents_to_bp_state {
struct bkey_buf last_flushed;
};
static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
struct bkey_s_c extent, unsigned dev)
{
struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
int ret = PTR_ERR_OR_ZERO(n);
if (ret)
return ret;
bch2_bkey_drop_device(bkey_i_to_s(n), dev);
return bch2_btree_insert_trans(trans, btree, n, 0);
}
static int check_extent_checksum(struct btree_trans *trans,
enum btree_id btree, struct bkey_s_c extent,
enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
struct printbuf buf = PRINTBUF;
void *data_buf = NULL;
struct bio *bio = NULL;
size_t bytes;
int ret = 0;
if (bkey_is_btree_ptr(extent.k))
return false;
bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
if (p.ptr.dev == dev)
goto found;
BUG();
found:
if (!p.crc.csum_type)
return false;
bytes = p.crc.compressed_size << 9;
struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
if (!bch2_dev_get_ioref(ca, READ))
return false;
data_buf = kvmalloc(bytes, GFP_KERNEL);
if (!data_buf) {
ret = -ENOMEM;
goto err;
}
bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL);
bio->bi_iter.bi_sector = p.ptr.offset;
bch2_bio_map(bio, data_buf, bytes);
ret = submit_bio_wait(bio);
if (ret)
goto err;
prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree));
bch2_bkey_val_to_text(&buf, c, extent);
prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree));
bch2_bkey_val_to_text(&buf, c, extent2);
struct nonce nonce = extent_nonce(extent.k->version, p.crc);
struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
c, dup_backpointer_to_bad_csum_extent,
"%s", buf.buf))
ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
fsck_err:
err:
if (bio)
bio_put(bio);
kvfree(data_buf);
percpu_ref_put(&ca->io_ref);
printbuf_exit(&buf);
return ret;
}
static int check_bp_exists(struct btree_trans *trans,
struct extents_to_bp_state *s,
struct bpos bucket,
@ -425,7 +504,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct bkey_s_c orig_k)
{
struct bch_fs *c = trans->c;
struct btree_iter bp_iter = { NULL };
struct btree_iter bp_iter = {};
struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k;
struct bkey_buf tmp;
@ -433,13 +513,19 @@ static int check_bp_exists(struct btree_trans *trans,
bch2_bkey_buf_init(&tmp);
if (!bch2_dev_bucket_exists(c, bucket)) {
prt_str(&buf, "extent for nonexistent device:bucket ");
bch2_bpos_to_text(&buf, bucket);
prt_str(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, orig_k);
bch_err(c, "%s", buf.buf);
return -BCH_ERR_fsck_repair_unimplemented;
}
if (bpos_lt(bucket, s->bucket_start) ||
bpos_gt(bucket, s->bucket_end))
return 0;
if (!bch2_dev_bucket_exists(c, bucket))
goto missing;
bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp(c, bucket, bp.bucket_offset),
0);
@ -465,21 +551,94 @@ static int check_bp_exists(struct btree_trans *trans,
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
goto missing;
goto check_existing_bp;
}
out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&tmp, c);
printbuf_exit(&buf);
return ret;
check_existing_bp:
/* Do we have a backpointer for a different extent? */
if (bp_k.k->type != KEY_TYPE_backpointer)
goto missing;
struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
struct bkey_s_c other_extent =
bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
ret = bkey_err(other_extent);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
ret = 0;
if (ret)
goto err;
if (!other_extent.k)
goto missing;
if (bch2_extents_match(orig_k, other_extent)) {
printbuf_reset(&buf);
prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
bch2_bkey_val_to_text(&buf, c, orig_k);
prt_str(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, other_extent);
bch_err(c, "%s", buf.buf);
if (other_extent.k->size <= orig_k.k->size) {
ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
if (ret)
goto err;
goto out;
} else {
ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
if (ret)
goto err;
goto missing;
}
}
ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
if (ret < 0)
goto err;
if (ret) {
ret = 0;
goto missing;
}
ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
if (ret < 0)
goto err;
if (ret) {
ret = 0;
goto out;
}
printbuf_reset(&buf);
prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode);
bch2_bkey_val_to_text(&buf, c, orig_k);
prt_str(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, other_extent);
bch_err(c, "%s", buf.buf);
ret = -BCH_ERR_fsck_repair_unimplemented;
goto err;
missing:
printbuf_reset(&buf);
prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
bch2_btree_id_str(bp.btree_id), bp.level);
bch2_bkey_val_to_text(&buf, c, orig_k);
prt_printf(&buf, "\nbp pos ");
bch2_bpos_to_text(&buf, bp_iter.pos);
prt_printf(&buf, "\n got: ");
bch2_bkey_val_to_text(&buf, c, bp_k);
struct bkey_i_backpointer n_bp_k;
bkey_backpointer_init(&n_bp_k.k_i);
n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
n_bp_k.v = bp;
prt_printf(&buf, "\n want: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);

View File

@ -615,6 +615,7 @@ struct bch_dev {
*/
#define BCH_FS_FLAGS() \
x(new_fs) \
x(started) \
x(may_go_rw) \
x(rw) \
@ -797,6 +798,7 @@ struct bch_fs {
u64 features;
u64 compat;
unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
u64 btrees_lost_data;
} sb;
@ -826,7 +828,6 @@ struct bch_fs {
struct btree_root btree_roots_known[BTREE_ID_NR];
DARRAY(struct btree_root) btree_roots_extra;
struct mutex btree_root_lock;
unsigned long btrees_lost_data; /* bitmask */
struct btree_cache btree_cache;

View File

@ -818,6 +818,7 @@ struct bch_sb_field_ext {
struct bch_sb_field field;
__le64 recovery_passes_required[2];
__le64 errors_silent[8];
__le64 btrees_lost_data;
};
struct bch_sb_field_downgrade_entry {

View File

@ -1264,11 +1264,11 @@ out:
return retry_read;
fsck_err:
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
ret == -BCH_ERR_btree_node_read_err_must_retry)
ret == -BCH_ERR_btree_node_read_err_must_retry) {
retry_read = 1;
else {
} else {
set_btree_node_read_error(b);
set_bit(b->c.btree_id, &c->btrees_lost_data);
bch2_btree_lost_data(c, b->c.btree_id);
}
goto out;
}
@ -1330,7 +1330,7 @@ start:
if (!can_retry) {
set_btree_node_read_error(b);
set_bit(b->c.btree_id, &c->btrees_lost_data);
bch2_btree_lost_data(c, b->c.btree_id);
break;
}
}
@ -1532,7 +1532,7 @@ fsck_err:
if (ret) {
set_btree_node_read_error(b);
set_bit(b->c.btree_id, &c->btrees_lost_data);
bch2_btree_lost_data(c, b->c.btree_id);
} else if (*saw_error)
bch2_btree_node_rewrite_async(c, b);
@ -1669,7 +1669,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
bch2_fatal_error(c);
set_btree_node_read_error(b);
set_bit(b->c.btree_id, &c->btrees_lost_data);
bch2_btree_lost_data(c, b->c.btree_id);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
printbuf_exit(&buf);
@ -1866,7 +1866,7 @@ static void btree_node_write_work(struct work_struct *work)
} else {
ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
BCH_WATERMARK_reclaim|
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw,

View File

@ -887,6 +887,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
int ret, unsigned long trace_ip)
{
struct bch_fs *c = trans->c;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
switch (ret) {
case -BCH_ERR_btree_insert_btree_node_full:
@ -905,7 +906,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
* flag
*/
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
(flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) {
watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
break;
}

View File

@ -26,6 +26,13 @@
#include <linux/random.h>
const char * const bch2_btree_update_modes[] = {
#define x(t) #t,
BCH_WATERMARKS()
#undef x
NULL
};
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
btree_path_idx_t, struct btree *, struct keylist *);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
@ -303,7 +310,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct open_buckets obs = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
? BTREE_NODE_RESERVE
: 0;
int ret;
@ -687,7 +694,7 @@ static void btree_update_nodes_written(struct btree_update *as)
* which may require allocations as well.
*/
ret = commit_do(trans, &as->disk_res, &journal_seq,
BCH_WATERMARK_reclaim|
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_journal_reclaim,
@ -846,11 +853,11 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
BUG_ON(as->mode != BTREE_UPDATE_none);
BUG_ON(!btree_node_dirty(b));
BUG_ON(!b->c.level);
as->mode = BTREE_INTERIOR_UPDATING_NODE;
as->mode = BTREE_UPDATE_node;
as->b = b;
set_btree_node_write_blocked(b);
@ -873,7 +880,7 @@ static void btree_update_reparent(struct btree_update *as,
lockdep_assert_held(&c->btree_interior_update_lock);
child->b = NULL;
child->mode = BTREE_INTERIOR_UPDATING_AS;
child->mode = BTREE_UPDATE_update;
bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal,
bch2_update_reparent_journal_pin_flush);
@ -884,7 +891,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
struct bkey_i *insert = &b->key;
struct bch_fs *c = as->c;
BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
BUG_ON(as->mode != BTREE_UPDATE_none);
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
ARRAY_SIZE(as->journal_entries));
@ -898,7 +905,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
as->mode = BTREE_INTERIOR_UPDATING_ROOT;
as->mode = BTREE_UPDATE_root;
mutex_unlock(&c->btree_interior_update_lock);
}
@ -1076,7 +1083,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
struct bch_fs *c = as->c;
u64 start_time = as->start_time;
BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
BUG_ON(as->mode == BTREE_UPDATE_none);
if (as->took_gc_lock)
up_read(&as->c->gc_lock);
@ -1121,7 +1128,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark != BCH_WATERMARK_reclaim)
watermark < BCH_WATERMARK_reclaim)
journal_flags |= JOURNAL_RES_GET_NONBLOCK;
ret = drop_locks_do(trans,
@ -1172,7 +1179,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
as->c = c;
as->start_time = start_time;
as->ip_started = _RET_IP_;
as->mode = BTREE_INTERIOR_NO_UPDATE;
as->mode = BTREE_UPDATE_none;
as->watermark = watermark;
as->took_gc_lock = true;
as->btree_id = path->btree_id;
as->update_level = update_level;
@ -1217,7 +1225,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
*/
if (bch2_err_matches(ret, ENOSPC) &&
(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark != BCH_WATERMARK_reclaim) {
watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
goto err;
}
@ -2509,18 +2517,25 @@ void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned lev
bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level));
}
static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
{
prt_printf(out, "%ps: btree=%s watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
(void *) as->ip_started,
bch2_btree_id_str(as->btree_id),
bch2_watermarks[as->watermark],
bch2_btree_update_modes[as->mode],
as->nodes_written,
closure_nr_remaining(&as->cl),
as->journal.seq);
}
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
{
struct btree_update *as;
mutex_lock(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list)
prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
(void *) as->ip_started,
as->mode,
as->nodes_written,
closure_nr_remaining(&as->cl),
as->journal.seq);
bch2_btree_update_to_text(out, as);
mutex_unlock(&c->btree_interior_update_lock);
}

View File

@ -12,6 +12,18 @@
int bch2_btree_node_check_topology(struct btree_trans *, struct btree *);
#define BTREE_UPDATE_MODES() \
x(none) \
x(node) \
x(root) \
x(update)
enum btree_update_mode {
#define x(n) BTREE_UPDATE_##n,
BTREE_UPDATE_MODES()
#undef x
};
/*
* Tracks an in progress split/rewrite of a btree node and the update to the
* parent node:
@ -39,14 +51,8 @@ struct btree_update {
struct list_head list;
struct list_head unwritten_list;
/* What kind of update are we doing? */
enum {
BTREE_INTERIOR_NO_UPDATE,
BTREE_INTERIOR_UPDATING_NODE,
BTREE_INTERIOR_UPDATING_ROOT,
BTREE_INTERIOR_UPDATING_AS,
} mode;
enum btree_update_mode mode;
enum bch_watermark watermark;
unsigned nodes_written:1;
unsigned took_gc_lock:1;
@ -56,7 +62,7 @@ struct btree_update {
struct disk_reservation disk_res;
/*
* BTREE_INTERIOR_UPDATING_NODE:
* BTREE_UPDATE_node:
* The update that made the new nodes visible was a regular update to an
* existing interior node - @b. We can't write out the update to @b
* until the new nodes we created are finished writing, so we block @b

View File

@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma
fallthrough;
case BCH_WATERMARK_btree_copygc:
case BCH_WATERMARK_reclaim:
case BCH_WATERMARK_interior_updates:
break;
}

View File

@ -580,8 +580,7 @@ int bch2_data_update_init(struct btree_trans *trans,
move_ctxt_wait_event(ctxt,
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0)) ||
(!atomic_read(&ctxt->read_sectors) &&
!atomic_read(&ctxt->write_sectors)));
list_empty(&ctxt->ios));
if (!locked)
bch2_bucket_nocow_lock(&c->nocow_locks,

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "error.h"
#include "journal.h"
#include "recovery_passes.h"
#include "super.h"
#include "thread_with_file.h"
@ -16,7 +17,8 @@ bool bch2_inconsistent_error(struct bch_fs *c)
return false;
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
bch_err(c, "inconsistency detected - emergency read only");
bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
journal_cur_seq(&c->journal));
return true;
case BCH_ON_ERROR_panic:
panic(bch2_fmt(c, "panic after error"));

View File

@ -115,7 +115,7 @@ static void swap_bytes(void *a, void *b, size_t n)
struct wrapper {
cmp_func_t cmp;
swap_func_t swap_f;
swap_func_t swap;
};
/*
@ -125,7 +125,7 @@ struct wrapper {
static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
{
if (swap_func == SWAP_WRAPPER) {
((const struct wrapper *)priv)->swap_f(a, b, (int)size);
((const struct wrapper *)priv)->swap(a, b, (int)size);
return;
}
@ -174,7 +174,7 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size,
int i, c, r;
/* called from 'sort' without swap function, let's pick the default */
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_f)
if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
swap_func = NULL;
if (!swap_func) {
@ -227,7 +227,7 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
{
struct wrapper w = {
.cmp = cmp_func,
.swap_f = swap_func,
.swap = swap_func,
};
return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);

View File

@ -63,9 +63,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol,
u32 *snapshot, u64 *inum)
{
struct bch_subvolume s;
int ret;
ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
int ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
*snapshot = le32_to_cpu(s.snapshot);
*inum = le64_to_cpu(s.inode);
@ -158,9 +156,10 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
&dir_hash_info, &iter,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
ret = bch2_btree_iter_traverse(&iter) ?:
bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
&dir_hash_info, &iter,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
bch2_trans_iter_exit(trans, &iter);
err:
bch_err_fn(c, ret);
@ -169,7 +168,8 @@ err:
/* Get lost+found, create if it doesn't exist: */
static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
struct bch_inode_unpacked *lostfound)
struct bch_inode_unpacked *lostfound,
u64 reattaching_inum)
{
struct bch_fs *c = trans->c;
struct qstr lostfound_str = QSTR("lost+found");
@ -184,19 +184,36 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
return ret;
subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
u32 subvol_snapshot;
ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol),
&subvol_snapshot, &root_inum.inum);
bch_err_msg(c, ret, "looking up root subvol");
struct bch_subvolume subvol;
ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol),
false, 0, &subvol);
bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u",
le32_to_cpu(st.master_subvol), snapshot);
if (ret)
return ret;
if (!subvol.inode) {
struct btree_iter iter;
struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)),
0, subvolume);
ret = PTR_ERR_OR_ZERO(subvol);
if (ret)
return ret;
subvol->v.inode = cpu_to_le64(reattaching_inum);
bch2_trans_iter_exit(trans, &iter);
}
root_inum.inum = le64_to_cpu(subvol.inode);
struct bch_inode_unpacked root_inode;
struct bch_hash_info root_hash_info;
u32 root_inode_snapshot = snapshot;
ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot);
bch_err_msg(c, ret, "looking up root inode");
bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
root_inum.inum, le32_to_cpu(st.master_subvol));
if (ret)
return ret;
@ -292,7 +309,7 @@ static int reattach_inode(struct btree_trans *trans,
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
}
ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum);
if (ret)
return ret;
@ -363,6 +380,112 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume
return ret;
}
static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum)
{
struct bch_fs *c = trans->c;
if (!bch2_snapshot_is_leaf(c, snapshotid)) {
bch_err(c, "need to reconstruct subvol, but have interior node snapshot");
return -BCH_ERR_fsck_repair_unimplemented;
}
/*
* If inum isn't set, that means we're being called from check_dirents,
* not check_inodes - the root of this subvolume doesn't exist or we
* would have found it there:
*/
if (!inum) {
struct btree_iter inode_iter = {};
struct bch_inode_unpacked new_inode;
u64 cpu = raw_smp_processor_id();
bch2_inode_init_early(c, &new_inode);
bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL);
new_inode.bi_subvol = subvolid;
int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?:
bch2_btree_iter_traverse(&inode_iter) ?:
bch2_inode_write(trans, &inode_iter, &new_inode);
bch2_trans_iter_exit(trans, &inode_iter);
if (ret)
return ret;
inum = new_inode.bi_inum;
}
bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum);
struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
int ret = PTR_ERR_OR_ZERO(new_subvol);
if (ret)
return ret;
bkey_subvolume_init(&new_subvol->k_i);
new_subvol->k.p.offset = subvolid;
new_subvol->v.snapshot = cpu_to_le32(snapshotid);
new_subvol->v.inode = cpu_to_le64(inum);
ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0);
if (ret)
return ret;
struct btree_iter iter;
struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_snapshots, POS(0, snapshotid),
0, snapshot);
ret = PTR_ERR_OR_ZERO(s);
bch_err_msg(c, ret, "getting snapshot %u", snapshotid);
if (ret)
return ret;
u32 snapshot_tree = le32_to_cpu(s->v.tree);
s->v.subvol = cpu_to_le32(subvolid);
SET_BCH_SNAPSHOT_SUBVOL(&s->v, true);
bch2_trans_iter_exit(trans, &iter);
struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter,
BTREE_ID_snapshot_trees, POS(0, snapshot_tree),
0, snapshot_tree);
ret = PTR_ERR_OR_ZERO(st);
bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree);
if (ret)
return ret;
if (!st->v.master_subvol)
st->v.master_subvol = cpu_to_le32(subvolid);
bch2_trans_iter_exit(trans, &iter);
return 0;
}
static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode)
{
struct bch_fs *c = trans->c;
struct bch_inode_unpacked new_inode;
bch2_inode_init_early(c, &new_inode);
bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL);
new_inode.bi_size = size;
new_inode.bi_inum = inum;
return __bch2_fsck_write_inode(trans, &new_inode, snapshot);
}
static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum)
{
struct btree_iter iter = {};
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0);
struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter);
bch2_trans_iter_exit(trans, &iter);
int ret = bkey_err(k);
if (ret)
return ret;
return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG);
}
struct snapshots_seen_entry {
u32 id;
u32 equiv;
@ -1064,6 +1187,11 @@ static int check_inode(struct btree_trans *trans,
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum);
goto do_update;
}
if (fsck_err_on(ret,
c, inode_bi_subvol_missing,
"inode %llu:%u bi_subvol points to missing subvolume %u",
@ -1081,7 +1209,7 @@ static int check_inode(struct btree_trans *trans,
do_update = true;
}
}
do_update:
if (do_update) {
ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
bch_err_msg(c, ret, "in fsck updating inode");
@ -1130,8 +1258,8 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
i->count = count2;
if (i->count != count2) {
bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
w->last_pos.inode, i->snapshot, i->count, count2);
bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
w->last_pos.inode, i->snapshot, i->count, count2);
return -BCH_ERR_internal_fsck_err;
}
@ -1371,10 +1499,6 @@ static int check_overlapping_extents(struct btree_trans *trans,
goto err;
}
ret = extent_ends_at(c, extent_ends, seen, k);
if (ret)
goto err;
extent_ends->last_pos = k.k->p;
err:
return ret;
@ -1438,6 +1562,17 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto err;
if (k.k->type != KEY_TYPE_whiteout) {
if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
goto err;
inode->last_pos.inode--;
ret = -BCH_ERR_transaction_restart_nested;
goto err;
}
if (fsck_err_on(!i, c, extent_in_missing_inode,
"extent in missing inode:\n %s",
(printbuf_reset(&buf),
@ -1504,6 +1639,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
i->seen_this_pos = true;
}
if (k.k->type != KEY_TYPE_whiteout) {
ret = extent_ends_at(c, extent_ends, s, k);
if (ret)
goto err;
}
out:
err:
fsck_err:
@ -1584,8 +1725,8 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
return count2;
if (i->count != count2) {
bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
i->count, count2);
bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
w->last_pos.inode, i->snapshot, i->count, count2);
i->count = count2;
if (i->inode.bi_nlink == i->count)
continue;
@ -1782,6 +1923,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
u32 parent_snapshot;
u32 new_parent_subvol = 0;
u64 parent_inum;
struct printbuf buf = PRINTBUF;
int ret = 0;
@ -1790,6 +1932,27 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
if (ret ||
(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) {
int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
if (ret2 && !bch2_err_matches(ret, ENOENT))
return ret2;
}
if (ret &&
!new_parent_subvol &&
(c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
/*
* Couldn't find a subvol for dirent's snapshot - but we lost
* subvols, so we need to reconstruct:
*/
ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0);
if (ret)
return ret;
parent_snapshot = d.k->p.snapshot;
}
if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
"dirent parent_subvol points to missing subvolume\n%s",
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
@ -1798,10 +1961,10 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
"dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
parent_snapshot,
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
u32 new_parent_subvol;
ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
if (ret)
goto err;
if (!new_parent_subvol) {
bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot);
return -BCH_ERR_fsck_repair_unimplemented;
}
struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
ret = PTR_ERR_OR_ZERO(new_dirent);
@ -1847,9 +2010,16 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
goto err;
if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
if (ret) {
bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum);
ret = -BCH_ERR_fsck_repair_unimplemented;
ret = 0;
goto err;
}
if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol,
c, inode_bi_parent_wrong,
"subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
target_inum,
@ -1857,13 +2027,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
subvol_root.bi_parent_subvol = parent_subvol;
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
if (ret)
return ret;
goto err;
}
ret = check_dirent_target(trans, iter, d, &subvol_root,
target_snapshot);
if (ret)
return ret;
goto err;
out:
err:
fsck_err:
@ -1880,7 +2050,6 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct snapshots_seen *s)
{
struct bch_fs *c = trans->c;
struct bkey_s_c_dirent d;
struct inode_walker_entry *i;
struct printbuf buf = PRINTBUF;
struct bpos equiv;
@ -1919,6 +2088,17 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
dir->first_this_inode = false;
if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret)
goto err;
dir->last_pos.inode--;
ret = -BCH_ERR_transaction_restart_nested;
goto err;
}
if (fsck_err_on(!i, c, dirent_in_missing_dir_inode,
"dirent in nonexisting directory:\n%s",
(printbuf_reset(&buf),
@ -1953,7 +2133,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (k.k->type != KEY_TYPE_dirent)
goto out;
d = bkey_s_c_to_dirent(k);
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
if (d.v->d_type == DT_SUBVOL) {
ret = check_dirent_to_subvol(trans, iter, d);

View File

@ -37,7 +37,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
struct bkey_buf sk;
u32 restart_count = trans->restart_count;
int ret;
if (!fn)
return 0;
@ -45,11 +44,11 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k);
ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?:
fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
fn->resume(trans, sk.k);
bch2_bkey_buf_exit(&sk, c);
return ret;
return trans_was_restarted(trans, restart_count);
}
int bch2_resume_logged_ops(struct bch_fs *c)

View File

@ -7,6 +7,7 @@
#include "disk_groups.h"
#include "error.h"
#include "opts.h"
#include "recovery_passes.h"
#include "super-io.h"
#include "util.h"
@ -205,6 +206,9 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_STR(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices
#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = U64_MAX, \
.choices = _choices
#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \

View File

@ -362,7 +362,12 @@ enum fsck_err_opts {
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Don't replay the journal") \
NULL, "Exit recovery immediately prior to journal replay")\
x(recovery_pass_last, u8, \
OPT_FS|OPT_MOUNT, \
OPT_STR_NOLIMIT(bch2_recovery_passes), \
BCH2_NO_SB_OPT, 0, \
NULL, "Exit recovery after specified pass") \
x(retain_recovery_info, u8, \
0, \
OPT_BOOL(), \

View File

@ -33,6 +33,20 @@
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
{
u64 b = BIT_ULL(btree);
if (!(c->sb.btrees_lost_data & b)) {
bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree));
mutex_lock(&c->sb_lock);
bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
}
static bool btree_id_is_alloc(enum btree_id id)
{
switch (id) {
@ -272,7 +286,8 @@ int bch2_journal_replay(struct bch_fs *c)
bch2_trans_put(trans);
trans = NULL;
if (!c->opts.retain_recovery_info)
if (!c->opts.retain_recovery_info &&
c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c);
replay_now_at(j, j->replay_journal_seq_end);
@ -468,8 +483,8 @@ static int read_btree_roots(struct bch_fs *c)
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
}
set_bit(i, &c->btrees_lost_data);
ret = 0;
bch2_btree_lost_data(c, i);
}
}
@ -590,27 +605,14 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
if (c->opts.fsck && c->opts.norecovery) {
bch_err(c, "cannot select both norecovery and fsck");
ret = -EINVAL;
goto err;
}
c->opts.retain_recovery_info |= c->opts.norecovery;
c->opts.nochanges |= c->opts.norecovery;
if (c->opts.norecovery)
c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
if (!c->opts.nochanges) {
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
struct bch_sb_field_ext *ext =
bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
if (!ext) {
ret = -BCH_ERR_ENOSPC_sb;
mutex_unlock(&c->sb_lock);
goto err;
}
if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
ext->recovery_passes_required[0] |=
cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
@ -841,6 +843,7 @@ use_clean:
}
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
@ -854,15 +857,18 @@ use_clean:
write_sb = true;
}
if (!test_bit(BCH_FS_error, &c->flags)) {
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (ext &&
(!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) ||
!bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) {
memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
write_sb = true;
}
if (!test_bit(BCH_FS_error, &c->flags) &&
!bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) {
memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
write_sb = true;
}
if (c->opts.fsck &&
!test_bit(BCH_FS_error, &c->flags) &&
c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
ext->btrees_lost_data) {
ext->btrees_lost_data = 0;
write_sb = true;
}
if (c->opts.fsck &&
@ -932,6 +938,7 @@ int bch2_fs_initialize(struct bch_fs *c)
int ret;
bch_notice(c, "initializing new filesystem");
set_bit(BCH_FS_new_fs, &c->flags);
mutex_lock(&c->sb_lock);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);

View File

@ -2,6 +2,8 @@
#ifndef _BCACHEFS_RECOVERY_H
#define _BCACHEFS_RECOVERY_H
void bch2_btree_lost_data(struct bch_fs *, enum btree_id);
int bch2_journal_replay(struct bch_fs *);
int bch2_fs_recovery(struct bch_fs *);

View File

@ -17,6 +17,7 @@
#include "snapshot.h"
#include "subvolume.h"
#include "super.h"
#include "super-io.h"
const char * const bch2_recovery_passes[] = {
#define x(_fn, ...) #_fn,
@ -27,7 +28,7 @@ const char * const bch2_recovery_passes[] = {
static int bch2_check_allocations(struct bch_fs *c)
{
return bch2_gc(c, true, c->opts.norecovery);
return bch2_gc(c, true, false);
}
static int bch2_set_may_go_rw(struct bch_fs *c)
@ -59,18 +60,23 @@ static struct recovery_pass_fn recovery_pass_fns[] = {
#undef x
};
u64 bch2_recovery_passes_to_stable(u64 v)
{
static const u8 map[] = {
static const u8 passes_to_stable_map[] = {
#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
BCH_RECOVERY_PASSES()
#undef x
};
};
static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
{
return passes_to_stable_map[pass];
}
u64 bch2_recovery_passes_to_stable(u64 v)
{
u64 ret = 0;
for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
if (v & BIT_ULL(i))
ret |= BIT_ULL(map[i]);
ret |= BIT_ULL(passes_to_stable_map[i]);
return ret;
}
@ -113,6 +119,38 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c,
}
}
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
enum bch_recovery_pass pass)
{
enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (!test_bit_le64(s, ext->recovery_passes_required)) {
__set_bit_le64(s, ext->recovery_passes_required);
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock);
return bch2_run_explicit_recovery_pass(c, pass);
}
static void bch2_clear_recovery_pass_required(struct bch_fs *c,
enum bch_recovery_pass pass)
{
enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (test_bit_le64(s, ext->recovery_passes_required)) {
__clear_bit_le64(s, ext->recovery_passes_required);
bch2_write_super(c);
}
mutex_unlock(&c->sb_lock);
}
u64 bch2_fsck_recovery_passes(void)
{
u64 ret = 0;
@ -127,8 +165,6 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa
{
struct recovery_pass_fn *p = recovery_pass_fns + pass;
if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
return false;
if (c->recovery_passes_explicit & BIT_ULL(pass))
return true;
if ((p->when & PASS_FSCK) && c->opts.fsck)
@ -184,6 +220,10 @@ int bch2_run_recovery_passes(struct bch_fs *c)
int ret = 0;
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
if (c->opts.recovery_pass_last &&
c->curr_recovery_pass > c->opts.recovery_pass_last)
break;
if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
unsigned pass = c->curr_recovery_pass;
@ -196,8 +236,13 @@ int bch2_run_recovery_passes(struct bch_fs *c)
c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
}
c->curr_recovery_pass++;
c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
if (!test_bit(BCH_FS_error, &c->flags))
bch2_clear_recovery_pass_required(c, c->curr_recovery_pass);
c->curr_recovery_pass++;
}
return ret;

View File

@ -9,6 +9,7 @@ u64 bch2_recovery_passes_from_stable(u64 v);
u64 bch2_fsck_recovery_passes(void);
int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
int bch2_run_online_recovery_passes(struct bch_fs *);
int bch2_run_recovery_passes(struct bch_fs *);

View File

@ -32,6 +32,7 @@
x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, 17, 0) \
x(reconstruct_snapshots, 38, 0) \
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \

View File

@ -268,7 +268,9 @@
x(btree_node_bkey_bad_u64s, 260) \
x(btree_node_topology_empty_interior_node, 261) \
x(btree_ptr_v2_min_key_bad, 262) \
x(btree_root_unreadable_and_scan_found_nothing, 263)
x(btree_root_unreadable_and_scan_found_nothing, 263) \
x(snapshot_node_missing, 264) \
x(dup_backpointer_to_bad_csum_extent, 265)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,

View File

@ -8,6 +8,7 @@
#include "errcode.h"
#include "error.h"
#include "fs.h"
#include "recovery_passes.h"
#include "snapshot.h"
#include <linux/random.h>
@ -131,7 +132,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots);
if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out;
}
@ -574,6 +575,13 @@ static int check_snapshot_tree(struct btree_trans *trans,
u32 subvol_id;
ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
bch_err_fn(c, ret);
if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
ret = 0;
goto err;
}
if (ret)
goto err;
@ -731,7 +739,6 @@ static int check_snapshot(struct btree_trans *trans,
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
u32 real_depth;
struct printbuf buf = PRINTBUF;
bool should_have_subvol;
u32 i, id;
int ret = 0;
@ -777,7 +784,7 @@ static int check_snapshot(struct btree_trans *trans,
}
}
should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(&s);
if (should_have_subvol) {
@ -879,6 +886,154 @@ int bch2_check_snapshots(struct bch_fs *c)
return ret;
}
static int check_snapshot_exists(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
if (bch2_snapshot_equiv(c, id))
return 0;
u32 tree_id;
int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
if (ret)
return ret;
struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
ret = PTR_ERR_OR_ZERO(snapshot);
if (ret)
return ret;
bkey_snapshot_init(&snapshot->k_i);
snapshot->k.p = POS(0, id);
snapshot->v.tree = cpu_to_le32(tree_id);
snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c));
return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
}
/* Figure out which snapshot nodes belong in the same tree: */
struct snapshot_tree_reconstruct {
enum btree_id btree;
struct bpos cur_pos;
snapshot_id_list cur_ids;
DARRAY(snapshot_id_list) trees;
};
static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
{
darray_for_each(r->trees, i)
darray_exit(i);
darray_exit(&r->trees);
darray_exit(&r->cur_ids);
}
static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
{
return r->btree == BTREE_ID_inodes
? r->cur_pos.offset == pos.offset
: r->cur_pos.inode == pos.inode;
}
static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
{
darray_for_each(*l, i)
if (snapshot_list_has_id(r, *i))
return true;
return false;
}
static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
{
bool first = true;
darray_for_each(*s, i) {
if (!first)
prt_char(out, ' ');
first = false;
prt_printf(out, "%u", *i);
}
}
static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
{
if (r->cur_ids.nr) {
darray_for_each(r->trees, i)
if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
int ret = snapshot_list_merge(c, i, &r->cur_ids);
if (ret)
return ret;
goto out;
}
darray_push(&r->trees, r->cur_ids);
darray_init(&r->cur_ids);
}
out:
r->cur_ids.nr = 0;
return 0;
}
static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
{
if (!same_snapshot(r, pos))
snapshot_tree_reconstruct_next(c, r);
r->cur_pos = pos;
return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
}
int bch2_reconstruct_snapshots(struct bch_fs *c)
{
struct btree_trans *trans = bch2_trans_get(c);
struct printbuf buf = PRINTBUF;
struct snapshot_tree_reconstruct r = {};
int ret = 0;
for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
if (btree_type_has_snapshots(btree)) {
r.btree = btree;
ret = for_each_btree_key(trans, iter, btree, POS_MIN,
BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
get_snapshot_trees(c, &r, k.k->p);
}));
if (ret)
goto err;
snapshot_tree_reconstruct_next(c, &r);
}
}
darray_for_each(r.trees, t) {
printbuf_reset(&buf);
snapshot_id_list_to_text(&buf, t);
darray_for_each(*t, id) {
if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
c, snapshot_node_missing,
"snapshot node %u from tree %s missing", *id, buf.buf)) {
if (t->nr > 1) {
bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
ret = -BCH_ERR_fsck_repair_unimplemented;
goto err;
}
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_snapshot_exists(trans, *id));
if (ret)
goto err;
}
}
}
fsck_err:
err:
bch2_trans_put(trans);
snapshot_tree_reconstruct_exit(&r);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
/*
* Mark a snapshot as deleted, for future cleanup:
*/
@ -1689,6 +1844,20 @@ int bch2_snapshots_read(struct bch_fs *c)
POS_MIN, 0, k,
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
bch_err_fn(c, ret);
/*
* It's important that we check if we need to reconstruct snapshots
* before going RW, so we mark that pass as required in the superblock -
* otherwise, we could end up deleting keys with missing snapshot nodes
* instead
*/
BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
test_bit(BCH_FS_may_go_rw, &c->flags));
if (bch2_err_matches(ret, EIO) ||
(c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
return ret;
}

View File

@ -209,15 +209,34 @@ static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list
static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
{
int ret;
BUG_ON(snapshot_list_has_id(s, id));
ret = darray_push(s, id);
int ret = darray_push(s, id);
if (ret)
bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
return ret;
}
static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id)
{
int ret = snapshot_list_has_id(s, id)
? 0
: darray_push(s, id);
if (ret)
bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
return ret;
}
static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src)
{
darray_for_each(*src, i) {
int ret = snapshot_list_add_nodup(c, dst, *i);
if (ret)
return ret;
}
return 0;
}
int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s);
int bch2_snapshot_get_subvol(struct btree_trans *, u32,
@ -229,6 +248,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *);
int bch2_reconstruct_snapshots(struct bch_fs *);
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
void bch2_delete_dead_snapshots_work(struct work_struct *);

View File

@ -142,8 +142,8 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb,
void bch2_free_super(struct bch_sb_handle *sb)
{
kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->bdev_handle))
bdev_release(sb->bdev_handle);
if (!IS_ERR_OR_NULL(sb->s_bdev_file))
fput(sb->s_bdev_file);
kfree(sb->holder);
kfree(sb->sb_name);
@ -527,9 +527,11 @@ static void bch2_sb_update(struct bch_fs *c)
memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
if (ext)
if (ext) {
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
sizeof(c->sb.errors_silent) * 8);
c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
}
for_each_member_device(c, ca) {
struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
@ -712,23 +714,23 @@ retry:
if (!opt_get(*opts, nochanges))
sb->mode |= BLK_OPEN_WRITE;
sb->bdev_handle = bdev_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (IS_ERR(sb->bdev_handle) &&
PTR_ERR(sb->bdev_handle) == -EACCES &&
sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (IS_ERR(sb->s_bdev_file) &&
PTR_ERR(sb->s_bdev_file) == -EACCES &&
opt_get(*opts, read_only)) {
sb->mode &= ~BLK_OPEN_WRITE;
sb->bdev_handle = bdev_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (!IS_ERR(sb->bdev_handle))
sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
if (!IS_ERR(sb->s_bdev_file))
opt_set(*opts, nochanges, true);
}
if (IS_ERR(sb->bdev_handle)) {
ret = PTR_ERR(sb->bdev_handle);
if (IS_ERR(sb->s_bdev_file)) {
ret = PTR_ERR(sb->s_bdev_file);
prt_printf(&err, "error opening %s: %s", path, bch2_err_str(ret));
goto err;
}
sb->bdev = sb->bdev_handle->bdev;
sb->bdev = file_bdev(sb->s_bdev_file);
ret = bch2_sb_realloc(sb, 0);
if (ret) {
@ -1162,6 +1164,11 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
kfree(errors_silent);
}
prt_printf(out, "Btrees with missing data:");
prt_tab(out);
prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data));
prt_newline(out);
}
static const struct bch_sb_field_ops bch_sb_field_ops_ext = {

View File

@ -366,7 +366,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_emergency_ro, &c->flags) &&
test_bit(BCH_FS_started, &c->flags) &&
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
!c->opts.norecovery) {
c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
BUG_ON(atomic_read(&c->btree_cache.dirty));
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
@ -511,7 +511,8 @@ err:
int bch2_fs_read_write(struct bch_fs *c)
{
if (c->opts.norecovery)
if (c->opts.recovery_pass_last &&
c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
return -BCH_ERR_erofs_norecovery;
if (c->opts.nochanges)
@ -1018,8 +1019,16 @@ int bch2_fs_start(struct bch_fs *c)
for_each_online_member(c, ca)
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
struct bch_sb_field_ext *ext =
bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
mutex_unlock(&c->sb_lock);
if (!ext) {
bch_err(c, "insufficient space in superblock for sb_field_ext");
ret = -BCH_ERR_ENOSPC_sb;
goto err;
}
for_each_rw_member(c, ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);

View File

@ -4,7 +4,7 @@
struct bch_sb_handle {
struct bch_sb *sb;
struct bdev_handle *bdev_handle;
struct file *s_bdev_file;
struct block_device *bdev;
char *sb_name;
struct bio *bio;

View File

@ -793,4 +793,14 @@ static inline void __set_bit_le64(size_t bit, __le64 *addr)
addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
}
static inline void __clear_bit_le64(size_t bit, __le64 *addr)
{
addr[bit / 64] &= !cpu_to_le64(BIT_ULL(bit % 64));
}
static inline bool test_bit_le64(size_t bit, __le64 *addr)
{
return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0;
}
#endif /* _BCACHEFS_UTIL_H */

View File

@ -162,16 +162,18 @@ sector_t get_capacity(struct gendisk *disk)
return bytes >> 9;
}
void bdev_release(struct bdev_handle *handle)
void fput(struct file *file)
{
fdatasync(handle->bdev->bd_fd);
close(handle->bdev->bd_fd);
free(handle->bdev);
free(handle);
struct block_device *bdev = file_bdev(file);
fdatasync(bdev->bd_fd);
close(bdev->bd_fd);
free(bdev);
free(file);
}
struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hop)
struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hop)
{
int fd, flags = 0;
@ -204,13 +206,12 @@ struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
bdev->bd_disk = &bdev->__bd_disk;
bdev->bd_disk->bdi = &bdev->bd_disk->__bdi;
bdev->queue.backing_dev_info = bdev->bd_disk->bdi;
bdev->bd_inode = &bdev->__bd_inode;
struct bdev_handle *handle = calloc(sizeof(*handle), 1);
handle->bdev = bdev;
handle->holder = holder;
handle->mode = mode;
struct file *file = calloc(sizeof(*file), 1);
file->f_inode = bdev->bd_inode;
return handle;
return file;
}
int lookup_bdev(const char *path, dev_t *dev)