Update bcachefs sources to 72740a707b64 bcachefs: Split brain detection

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-12-22 21:39:45 -05:00
parent ca4892af17
commit f0334bbc10
13 changed files with 150 additions and 73 deletions

View File

@ -1 +1 @@
c6d45169c6e3b4e42a189c9e87d1d14070033f01 72740a707b64a4fb5f2bb559d8db27a66abc97cc

View File

@ -1301,6 +1301,7 @@ struct bch_member {
__le64 errors[BCH_MEMBER_ERROR_NR]; __le64 errors[BCH_MEMBER_ERROR_NR];
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR]; __le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time; __le64 errors_reset_time;
__le64 seq;
}; };
#define BCH_MEMBER_V1_BYTES 56 #define BCH_MEMBER_V1_BYTES 56
@ -1704,7 +1705,9 @@ LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
x(deleted_inodes, BCH_VERSION(1, 2), \ x(deleted_inodes, BCH_VERSION(1, 2), \
BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \ BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \
x(rebalance_work, BCH_VERSION(1, 3), \ x(rebalance_work, BCH_VERSION(1, 3), \
BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \
x(member_seq, BCH_VERSION(1, 4), \
0)
enum bcachefs_metadata_version { enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9, bcachefs_metadata_version_min = 9,
@ -1770,7 +1773,8 @@ struct bch_sb {
__le32 time_base_hi; __le32 time_base_hi;
__le32 time_precision; __le32 time_precision;
__le64 flags[8]; __le64 flags[7];
__le64 write_time;
__le64 features[2]; __le64 features[2];
__le64 compat[2]; __le64 compat[2];

View File

@ -2013,29 +2013,6 @@ do_write:
/* buffer must be a multiple of the block size */ /* buffer must be a multiple of the block size */
bytes = round_up(bytes, block_bytes(c)); bytes = round_up(bytes, block_bytes(c));
if (bytes > btree_bytes(c)) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "btree node write bounce buffer overrun: %u > %zu\n",
bytes, btree_bytes(c));
prt_printf(&buf, "header: %zu\n", b->written
? sizeof(struct btree_node)
: sizeof(struct btree_node_entry));
prt_printf(&buf, "unwritten: %zu\n", b->whiteout_u64s * sizeof(u64));
for_each_bset(b, t) {
i = bset(b, t);
if (bset_written(b, i))
continue;
prt_printf(&buf, "bset %zu: %zu\n", t - b->set, le16_to_cpu(i->u64s) * sizeof(u64));
}
panic("%s", buf.buf);
printbuf_exit(&buf);
}
data = btree_bounce_alloc(c, bytes, &used_mempool); data = btree_bounce_alloc(c, bytes, &used_mempool);
if (!b->written) { if (!b->written) {

View File

@ -1209,7 +1209,6 @@ static btree_path_idx_t btree_path_clone(struct btree_trans *trans, btree_path_i
bool intent) bool intent)
{ {
btree_path_idx_t new = btree_path_alloc(trans, src); btree_path_idx_t new = btree_path_alloc(trans, src);
btree_path_copy(trans, trans->paths + new, trans->paths + src); btree_path_copy(trans, trans->paths + new, trans->paths + src);
__btree_path_get(trans->paths + new, intent); __btree_path_get(trans->paths + new, intent);
return new; return new;
@ -1512,42 +1511,50 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *trans)
return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
} }
static noinline void btree_path_overflow(struct btree_trans *trans)
{
bch2_dump_trans_paths_updates(trans);
bch_err(trans->c, "trans path overflow");
}
static noinline void btree_paths_realloc(struct btree_trans *trans) static noinline void btree_paths_realloc(struct btree_trans *trans)
{ {
unsigned nr = trans->nr_paths * 2; unsigned nr = trans->nr_paths * 2;
void *p = kzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) + void *p = kzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) +
nr + 8 +
sizeof(struct btree_trans_paths) + sizeof(struct btree_trans_paths) +
nr * sizeof(struct btree_path) + nr * sizeof(struct btree_path) +
nr * sizeof(btree_path_idx_t) + 8 +
nr * sizeof(struct btree_insert_entry), GFP_KERNEL|__GFP_NOFAIL); nr * sizeof(struct btree_insert_entry), GFP_KERNEL|__GFP_NOFAIL);
unsigned long *paths_allocated = p; unsigned long *paths_allocated = p;
p += BITS_TO_LONGS(nr) * sizeof(unsigned long);
struct btree_path *paths = p;
p += nr * sizeof(struct btree_path);
u8 *sorted = p;
p += nr + 8;
struct btree_insert_entry *updates = p;
*trans_paths_nr(paths) = nr;
memcpy(paths_allocated, trans->paths_allocated, BITS_TO_LONGS(trans->nr_paths) * sizeof(unsigned long)); memcpy(paths_allocated, trans->paths_allocated, BITS_TO_LONGS(trans->nr_paths) * sizeof(unsigned long));
memcpy(sorted, trans->sorted, trans->nr_sorted); p += BITS_TO_LONGS(nr) * sizeof(unsigned long);
p += sizeof(struct btree_trans_paths);
struct btree_path *paths = p;
*trans_paths_nr(paths) = nr;
memcpy(paths, trans->paths, trans->nr_paths * sizeof(struct btree_path)); memcpy(paths, trans->paths, trans->nr_paths * sizeof(struct btree_path));
memcpy(updates, trans->updates, trans->nr_paths * sizeof(struct btree_path)); p += nr * sizeof(struct btree_path);
btree_path_idx_t *sorted = p;
memcpy(sorted, trans->sorted, trans->nr_sorted * sizeof(btree_path_idx_t));
p += nr * sizeof(btree_path_idx_t) + 8;
struct btree_insert_entry *updates = p;
memcpy(updates, trans->updates, trans->nr_paths * sizeof(struct btree_insert_entry));
unsigned long *old = trans->paths_allocated; unsigned long *old = trans->paths_allocated;
rcu_assign_pointer(trans->paths_allocated, paths_allocated); rcu_assign_pointer(trans->paths_allocated, paths_allocated);
rcu_assign_pointer(trans->sorted, sorted);
rcu_assign_pointer(trans->paths, paths); rcu_assign_pointer(trans->paths, paths);
rcu_assign_pointer(trans->sorted, sorted);
rcu_assign_pointer(trans->updates, updates); rcu_assign_pointer(trans->updates, updates);
trans->nr_paths = nr; trans->nr_paths = nr;
if (old != trans->_paths_allocated) if (old != trans->_paths_allocated)
kfree_rcu_mightsleep(trans->paths_allocated); kfree_rcu_mightsleep(old);
} }
static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans, static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans,
@ -1555,8 +1562,14 @@ static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans,
{ {
btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, trans->nr_paths); btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, trans->nr_paths);
if (unlikely(idx == trans->nr_paths)) if (unlikely(idx == trans->nr_paths)) {
if (trans->nr_paths == BTREE_ITER_MAX) {
btree_path_overflow(trans);
return 0;
}
btree_paths_realloc(trans); btree_paths_realloc(trans);
}
/* /*
* Do this before marking the new path as allocated, since it won't be * Do this before marking the new path as allocated, since it won't be
@ -2640,21 +2653,18 @@ out:
static inline void btree_path_list_remove(struct btree_trans *trans, static inline void btree_path_list_remove(struct btree_trans *trans,
struct btree_path *path) struct btree_path *path)
{ {
unsigned i;
EBUG_ON(path->sorted_idx >= trans->nr_sorted); EBUG_ON(path->sorted_idx >= trans->nr_sorted);
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
trans->nr_sorted--; trans->nr_sorted--;
memmove_u64s_down_small(trans->sorted + path->sorted_idx, memmove_u64s_down_small(trans->sorted + path->sorted_idx,
trans->sorted + path->sorted_idx + 1, trans->sorted + path->sorted_idx + 1,
DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx,
sizeof(u64) / sizeof(btree_path_idx_t)));
#else #else
array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx); array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
#endif #endif
for (i = path->sorted_idx; i < trans->nr_sorted; i++) for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++)
trans->paths[trans->sorted[i]].sorted_idx = i; trans->paths[trans->sorted[i]].sorted_idx = i;
path->sorted_idx = U8_MAX;
} }
static inline void btree_path_list_add(struct btree_trans *trans, static inline void btree_path_list_add(struct btree_trans *trans,
@ -2662,21 +2672,21 @@ static inline void btree_path_list_add(struct btree_trans *trans,
btree_path_idx_t path_idx) btree_path_idx_t path_idx)
{ {
struct btree_path *path = trans->paths + path_idx; struct btree_path *path = trans->paths + path_idx;
unsigned i;
path->sorted_idx = pos ? trans->paths[pos].sorted_idx + 1 : trans->nr_sorted; path->sorted_idx = pos ? trans->paths[pos].sorted_idx + 1 : trans->nr_sorted;
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1, memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
trans->sorted + path->sorted_idx, trans->sorted + path->sorted_idx,
DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx, 8)); DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx,
sizeof(u64) / sizeof(btree_path_idx_t)));
trans->nr_sorted++; trans->nr_sorted++;
trans->sorted[path->sorted_idx] = path_idx; trans->sorted[path->sorted_idx] = path_idx;
#else #else
array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path_idx); array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path_idx);
#endif #endif
for (i = path->sorted_idx; i < trans->nr_sorted; i++) for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++)
trans->paths[trans->sorted[i]].sorted_idx = i; trans->paths[trans->sorted[i]].sorted_idx = i;
btree_trans_verify_sorted_refs(trans); btree_trans_verify_sorted_refs(trans);
@ -2972,7 +2982,7 @@ got_trans:
trans->paths = trans->_paths; trans->paths = trans->_paths;
trans->updates = trans->_updates; trans->updates = trans->_updates;
*trans_paths_nr(trans->paths) = BTREE_ITER_MAX; *trans_paths_nr(trans->paths) = BTREE_ITER_INITIAL;
trans->paths_allocated[0] = 1; trans->paths_allocated[0] = 1;

View File

@ -642,7 +642,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *);
static inline int btree_trans_too_many_iters(struct btree_trans *trans) static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{ {
if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_MAX - 8) if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8)
return __bch2_btree_trans_too_many_iters(trans); return __bch2_btree_trans_too_many_iters(trans);
return 0; return 0;

View File

@ -358,7 +358,8 @@ struct btree_insert_entry {
unsigned long ip_allocated; unsigned long ip_allocated;
}; };
#define BTREE_ITER_MAX 64 #define BTREE_ITER_INITIAL 64
#define BTREE_ITER_MAX (1U << 10)
struct btree_trans_commit_hook; struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
@ -382,7 +383,7 @@ struct btree_trans {
unsigned long *paths_allocated; unsigned long *paths_allocated;
struct btree_path *paths; struct btree_path *paths;
u8 *sorted; btree_path_idx_t *sorted;
struct btree_insert_entry *updates; struct btree_insert_entry *updates;
void *mem; void *mem;
@ -438,11 +439,11 @@ struct btree_trans {
struct list_head list; struct list_head list;
struct closure ref; struct closure ref;
unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_MAX)]; unsigned long _paths_allocated[BITS_TO_LONGS(BTREE_ITER_INITIAL)];
struct btree_trans_paths trans_paths; struct btree_trans_paths trans_paths;
struct btree_path _paths[BTREE_ITER_MAX]; struct btree_path _paths[BTREE_ITER_INITIAL];
u8 _sorted[BTREE_ITER_MAX + 8]; btree_path_idx_t _sorted[BTREE_ITER_INITIAL + 4];
struct btree_insert_entry _updates[BTREE_ITER_MAX]; struct btree_insert_entry _updates[BTREE_ITER_INITIAL];
}; };
static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter) static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter)

View File

@ -169,6 +169,7 @@
x(EINVAL, device_size_too_small) \ x(EINVAL, device_size_too_small) \
x(EINVAL, device_not_a_member_of_filesystem) \ x(EINVAL, device_not_a_member_of_filesystem) \
x(EINVAL, device_has_been_removed) \ x(EINVAL, device_has_been_removed) \
x(EINVAL, device_splitbrain) \
x(EINVAL, device_already_online) \ x(EINVAL, device_already_online) \
x(EINVAL, insufficient_devices_to_start) \ x(EINVAL, insufficient_devices_to_start) \
x(EINVAL, invalid) \ x(EINVAL, invalid) \
@ -220,6 +221,7 @@
x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_invalid, invalid_bkey) \
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \ x(EIO, btree_node_read_err) \
x(EIO, sb_not_downgraded) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \

View File

@ -100,7 +100,7 @@ static int count_iters_for_insert(struct btree_trans *trans,
return ret2 ?: ret; return ret2 ?: ret;
} }
#define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) #define EXTENT_ITERS_MAX (BTREE_ITER_INITIAL / 3)
int bch2_extent_atomic_end(struct btree_trans *trans, int bch2_extent_atomic_end(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,

View File

@ -58,7 +58,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
*/ */
ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0);
if (unlikely(ret)) if (unlikely(ret))
goto err; goto err_noprint;
bch2_bkey_buf_reassemble(&old, c, k); bch2_bkey_buf_reassemble(&old, c, k);
@ -118,7 +118,7 @@ err:
inum.inum, inum.inum,
iter->pos.offset << 9, iter->pos.offset << 9,
"%s(): error: %s", __func__, bch2_err_str(ret)); "%s(): error: %s", __func__, bch2_err_str(ret));
err_noprint:
bch2_open_buckets_put(c, &open_buckets); bch2_open_buckets_put(c, &open_buckets);
bch2_disk_reservation_put(c, &disk_res); bch2_disk_reservation_put(c, &disk_res);
bch2_bkey_buf_exit(&new, c); bch2_bkey_buf_exit(&new, c);

View File

@ -1090,6 +1090,8 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret) if (ret)
goto err; goto err;
c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1;
if (enabled_qtypes(c)) { if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c); ret = bch2_fs_quota_read(c);
if (ret) if (ret)

View File

@ -235,6 +235,11 @@ static void member_to_text(struct printbuf *out,
prt_printf(out, "(never)"); prt_printf(out, "(never)");
prt_newline(out); prt_newline(out);
prt_printf(out, "Last superblock write:");
prt_tab(out);
prt_u64(out, le64_to_cpu(m.seq));
prt_newline(out);
prt_printf(out, "State:"); prt_printf(out, "State:");
prt_tab(out); prt_tab(out);
prt_printf(out, "%s", prt_printf(out, "%s",

View File

@ -530,6 +530,7 @@ static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
dst->time_base_lo = src->time_base_lo; dst->time_base_lo = src->time_base_lo;
dst->time_base_hi = src->time_base_hi; dst->time_base_hi = src->time_base_hi;
dst->time_precision = src->time_precision; dst->time_precision = src->time_precision;
dst->write_time = src->write_time;
memcpy(dst->flags, src->flags, sizeof(dst->flags)); memcpy(dst->flags, src->flags, sizeof(dst->flags));
memcpy(dst->features, src->features, sizeof(dst->features)); memcpy(dst->features, src->features, sizeof(dst->features));
@ -906,8 +907,25 @@ int bch2_write_super(struct bch_fs *c)
c->disk_sb.sb->magic = BCHFS_MAGIC; c->disk_sb.sb->magic = BCHFS_MAGIC;
c->disk_sb.sb->layout.magic = BCHFS_MAGIC; c->disk_sb.sb->layout.magic = BCHFS_MAGIC;
if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "attempting to write superblock that wasn't version downgraded (");
bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version));
prt_str(&buf, " > ");
bch2_version_to_text(&buf, bcachefs_metadata_version_current);
prt_str(&buf, ")");
bch2_fs_fatal_error(c, "%s", buf.buf);
printbuf_exit(&buf);
return -BCH_ERR_sb_not_downgraded;
}
le64_add_cpu(&c->disk_sb.sb->seq, 1); le64_add_cpu(&c->disk_sb.sb->seq, 1);
struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
for_each_online_member(c, ca)
__bch2_members_v2_get_mut(mi, ca->dev_idx)->seq = c->disk_sb.sb->seq;
c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds());
if (test_bit(BCH_FS_error, &c->flags)) if (test_bit(BCH_FS_error, &c->flags))
SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
if (test_bit(BCH_FS_topology_error, &c->flags)) if (test_bit(BCH_FS_topology_error, &c->flags))
@ -1210,6 +1228,11 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
prt_printf(out, "%llu", le64_to_cpu(sb->seq)); prt_printf(out, "%llu", le64_to_cpu(sb->seq));
prt_newline(out); prt_newline(out);
prt_printf(out, "Time of last write:");
prt_tab(out);
bch2_prt_datetime(out, le64_to_cpu(sb->write_time));
prt_newline(out);
prt_printf(out, "Superblock size:"); prt_printf(out, "Superblock size:");
prt_tab(out); prt_tab(out);
prt_printf(out, "%zu", vstruct_bytes(sb)); prt_printf(out, "%zu", vstruct_bytes(sb));

View File

@ -1066,20 +1066,65 @@ static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
return 0; return 0;
} }
static int bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb) static int bch2_dev_in_fs(struct bch_sb_handle *fs,
struct bch_sb_handle *sb)
{ {
struct bch_sb *newest = if (fs == sb)
le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb; return 0;
if (!uuid_equal(&fs->uuid, &sb->uuid)) if (!uuid_equal(&fs->sb->uuid, &sb->sb->uuid))
return -BCH_ERR_device_not_a_member_of_filesystem; return -BCH_ERR_device_not_a_member_of_filesystem;
if (!bch2_dev_exists(newest, sb->dev_idx)) if (!bch2_dev_exists(fs->sb, sb->sb->dev_idx))
return -BCH_ERR_device_has_been_removed; return -BCH_ERR_device_has_been_removed;
if (fs->block_size != sb->block_size) if (fs->sb->block_size != sb->sb->block_size)
return -BCH_ERR_mismatched_block_size; return -BCH_ERR_mismatched_block_size;
if (le16_to_cpu(fs->sb->version) < bcachefs_metadata_version_member_seq ||
le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_member_seq)
return 0;
if (fs->sb->seq == sb->sb->seq &&
fs->sb->write_time != sb->sb->write_time) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "Split brain detected between %pg and %pg:",
sb->bdev, fs->bdev);
prt_newline(&buf);
prt_printf(&buf, "seq=%llu but write_time different, got", le64_to_cpu(sb->sb->seq));
prt_newline(&buf);
prt_printf(&buf, "%pg ", fs->bdev);
bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));;
prt_newline(&buf);
prt_printf(&buf, "%pg ", sb->bdev);
bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));;
prt_newline(&buf);
prt_printf(&buf, "Not using older sb");
pr_err("%s", buf.buf);
printbuf_exit(&buf);
return -BCH_ERR_device_splitbrain;
}
struct bch_member m = bch2_sb_member_get(fs->sb, sb->sb->dev_idx);
u64 seq_from_fs = le64_to_cpu(m.seq);
u64 seq_from_member = le64_to_cpu(sb->sb->seq);
if (seq_from_fs && seq_from_fs < seq_from_member) {
pr_err("Split brain detected between %pg and %pg:\n"
"%pg believes seq of %pg to be %llu, but %pg has %llu\n"
"Not using %pg",
sb->bdev, fs->bdev,
fs->bdev, sb->bdev, seq_from_fs,
sb->bdev, seq_from_member,
sb->bdev);
return -BCH_ERR_device_splitbrain;
}
return 0; return 0;
} }
@ -1773,7 +1818,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
dev_idx = sb.sb->dev_idx; dev_idx = sb.sb->dev_idx;
ret = bch2_dev_in_fs(c->disk_sb.sb, sb.sb); ret = bch2_dev_in_fs(&c->disk_sb, &sb);
bch_err_msg(c, ret, "bringing %s online", path); bch_err_msg(c, ret, "bringing %s online", path);
if (ret) if (ret)
goto err; goto err;
@ -1914,6 +1959,12 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name)
/* Filesystem open: */ /* Filesystem open: */
static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r)
{
return cmp_int(le64_to_cpu(l->seq), le64_to_cpu(r->seq)) ?:
cmp_int(le64_to_cpu(l->write_time), le64_to_cpu(r->write_time));
}
struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
struct bch_opts opts) struct bch_opts opts)
{ {
@ -1946,19 +1997,21 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
} }
darray_for_each(sbs, sb) darray_for_each(sbs, sb)
if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq)) if (!best || sb_cmp(sb->sb, best->sb) > 0)
best = sb; best = sb;
darray_for_each_reverse(sbs, sb) { darray_for_each_reverse(sbs, sb) {
if (sb != best && !bch2_dev_exists(best->sb, sb->sb->dev_idx)) { ret = bch2_dev_in_fs(best, sb);
pr_info("%pg has been removed, skipping", sb->bdev);
if (ret == -BCH_ERR_device_has_been_removed ||
ret == -BCH_ERR_device_splitbrain) {
bch2_free_super(sb); bch2_free_super(sb);
darray_remove_item(&sbs, sb); darray_remove_item(&sbs, sb);
best -= best > sb; best -= best > sb;
ret = 0;
continue; continue;
} }
ret = bch2_dev_in_fs(best->sb, sb->sb);
if (ret) if (ret)
goto err_print; goto err_print;
} }