Update bcachefs sources to 8e5380376586 bcachefs: Improve opts.degraded

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-04-06 16:16:32 -04:00
parent 592f04256f
commit 59b35d4194
89 changed files with 1635 additions and 1170 deletions

View File

@ -1 +1 @@
7fdc3fa3cb5fb561f5945b4de418d48d1a726a8d
8e5380376586b54782ffc7a4d9cf04eaf5976f85

View File

@ -53,7 +53,7 @@ bitflags! {
pub struct BtreeIter<'t> {
raw: c::btree_iter,
trans: PhantomData<&'t BtreeTrans<'t>>,
trans: &'t BtreeTrans<'t>,
}
impl<'t> BtreeIter<'t> {
@ -76,14 +76,14 @@ impl<'t> BtreeIter<'t> {
BtreeIter {
raw: iter.assume_init(),
trans: PhantomData,
trans: trans,
}
}
}
pub fn peek_max(&mut self, end: c::bpos) -> Result<Option<BkeySC<'_>>, bch_errcode> {
unsafe {
let k = c::bch2_btree_iter_peek_max(&mut self.raw, end);
let k = c::bch2_btree_iter_peek_max(self.trans.raw, &mut self.raw, end);
errptr_to_result_c(k.k).map(|_| {
if !k.k.is_null() {
Some(BkeySC {
@ -104,7 +104,7 @@ impl<'t> BtreeIter<'t> {
pub fn peek_and_restart(&mut self) -> Result<Option<BkeySC>, bch_errcode> {
unsafe {
let k = c::bch2_btree_iter_peek_and_restart_outlined(&mut self.raw);
let k = c::bch2_btree_iter_peek_and_restart_outlined(self.trans.raw, &mut self.raw);
errptr_to_result_c(k.k).map(|_| {
if !k.k.is_null() {
@ -122,20 +122,20 @@ impl<'t> BtreeIter<'t> {
pub fn advance(&mut self) {
unsafe {
c::bch2_btree_iter_advance(&mut self.raw);
c::bch2_btree_iter_advance(self.trans.raw, &mut self.raw);
}
}
}
impl Drop for BtreeIter<'_> {
fn drop(&mut self) {
unsafe { c::bch2_trans_iter_exit(self.raw.trans, &mut self.raw) }
unsafe { c::bch2_trans_iter_exit(self.trans.raw, &mut self.raw) }
}
}
pub struct BtreeNodeIter<'t> {
raw: c::btree_iter,
trans: PhantomData<&'t BtreeTrans<'t>>,
trans: &'t BtreeTrans<'t>,
}
impl<'t> BtreeNodeIter<'t> {
@ -161,35 +161,35 @@ impl<'t> BtreeNodeIter<'t> {
BtreeNodeIter {
raw: iter.assume_init(),
trans: PhantomData,
trans: trans,
}
}
}
pub fn peek(&mut self) -> Result<Option<&c::btree>, bch_errcode> {
unsafe {
let b = c::bch2_btree_iter_peek_node(&mut self.raw);
let b = c::bch2_btree_iter_peek_node(self.trans.raw, &mut self.raw);
errptr_to_result_c(b).map(|b| if !b.is_null() { Some(&*b) } else { None })
}
}
pub fn peek_and_restart(&mut self) -> Result<Option<&c::btree>, bch_errcode> {
unsafe {
let b = c::bch2_btree_iter_peek_node_and_restart(&mut self.raw);
let b = c::bch2_btree_iter_peek_node_and_restart(self.trans.raw, &mut self.raw);
errptr_to_result_c(b).map(|b| if !b.is_null() { Some(&*b) } else { None })
}
}
pub fn advance(&mut self) {
unsafe {
c::bch2_btree_iter_next_node(&mut self.raw);
c::bch2_btree_iter_next_node(self.trans.raw, &mut self.raw);
}
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<Option<&c::btree>, bch_errcode> {
unsafe {
let b = c::bch2_btree_iter_next_node(&mut self.raw);
let b = c::bch2_btree_iter_next_node(self.trans.raw, &mut self.raw);
errptr_to_result_c(b).map(|b| if !b.is_null() { Some(&*b) } else { None })
}
}
@ -197,7 +197,7 @@ impl<'t> BtreeNodeIter<'t> {
impl Drop for BtreeNodeIter<'_> {
fn drop(&mut self) {
unsafe { c::bch2_trans_iter_exit(self.raw.trans, &mut self.raw) }
unsafe { c::bch2_trans_iter_exit(self.trans.raw, &mut self.raw) }
}
}

View File

@ -544,7 +544,7 @@ int cmd_device_resize(int argc, char *argv[])
if (resize)
die("confused: more than one online device?");
resize = ca;
percpu_ref_get(&resize->io_ref);
percpu_ref_get(&resize->io_ref[READ]);
}
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
@ -557,7 +557,7 @@ int cmd_device_resize(int argc, char *argv[])
if (ret)
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
percpu_ref_put(&resize->io_ref);
percpu_ref_put(&resize->io_ref[READ]);
bch2_fs_stop(c);
}
return 0;
@ -641,7 +641,7 @@ int cmd_device_resize_journal(int argc, char *argv[])
if (resize)
die("confused: more than one online device?");
resize = ca;
percpu_ref_get(&resize->io_ref);
percpu_ref_get(&resize->io_ref[READ]);
}
u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
@ -651,7 +651,7 @@ int cmd_device_resize_journal(int argc, char *argv[])
if (ret)
fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
percpu_ref_put(&resize->io_ref);
percpu_ref_put(&resize->io_ref[READ]);
bch2_fs_stop(c);
}
return 0;

View File

@ -116,8 +116,7 @@ int cmd_dump(int argc, char *argv[])
opt_set(opts, read_only, true);
opt_set(opts, nochanges, true);
opt_set(opts, norecovery, true);
opt_set(opts, degraded, true);
opt_set(opts, very_degraded, true);
opt_set(opts, degraded, BCH_DEGRADED_very);
opt_set(opts, errors, BCH_ON_ERROR_continue);
opt_set(opts, fix_errors, FSCK_FIX_no);

View File

@ -281,8 +281,7 @@ int cmd_list_journal(int argc, char *argv[])
opt_set(opts, nochanges, true);
opt_set(opts, norecovery, true);
opt_set(opts, read_only, true);
opt_set(opts, degraded, true);
opt_set(opts, very_degraded, true);
opt_set(opts, degraded, BCH_DEGRADED_very);
opt_set(opts, errors, BCH_ON_ERROR_continue);
opt_set(opts, fix_errors, FSCK_FIX_yes);
opt_set(opts, retain_recovery_info ,true);

View File

@ -0,0 +1 @@
#include <linux/module.h>

View File

@ -17,4 +17,15 @@ enum string_size_units {
int string_get_size(u64 size, u64 blk_size, enum string_size_units units,
char *buf, int len);
static inline void memcpy_and_pad(void *dest, size_t dest_len, const void *src,
size_t count, int pad)
{
if (dest_len > count) {
memcpy(dest, src, count);
memset(dest + count, pad, dest_len - count);
} else {
memcpy(dest, src, dest_len);
}
}
#endif

View File

@ -273,7 +273,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu)
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
struct btree_iter iter = { NULL };
struct btree_iter iter = {};
struct posix_acl *acl = NULL;
if (rcu)
@ -344,7 +344,7 @@ int bch2_set_acl(struct mnt_idmap *idmap,
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct btree_iter inode_iter = { NULL };
struct btree_iter inode_iter = {};
struct bch_inode_unpacked inode_u;
struct posix_acl *acl;
umode_t mode;

View File

@ -610,7 +610,7 @@ int bch2_alloc_read(struct bch_fs *c)
* bch2_check_alloc_key() which runs later:
*/
if (!ca) {
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
continue;
}
@ -631,17 +631,17 @@ int bch2_alloc_read(struct bch_fs *c)
* bch2_check_alloc_key() which runs later:
*/
if (!ca) {
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
continue;
}
if (k.k->p.offset < ca->mi.first_bucket) {
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode, ca->mi.first_bucket));
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode, ca->mi.first_bucket));
continue;
}
if (k.k->p.offset >= ca->mi.nbuckets) {
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
continue;
}
@ -1039,9 +1039,10 @@ invalid_bucket:
* This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for
* extents style btrees, but works on non-extents btrees:
*/
static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole)
static struct bkey_s_c bch2_get_key_or_hole(struct btree_trans *trans, struct btree_iter *iter,
struct bpos end, struct bkey *hole)
{
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
if (bkey_err(k))
return k;
@ -1052,9 +1053,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos
struct btree_iter iter2;
struct bpos next;
bch2_trans_copy_iter(&iter2, iter);
bch2_trans_copy_iter(trans, &iter2, iter);
struct btree_path *path = btree_iter_path(iter->trans, iter);
struct btree_path *path = btree_iter_path(trans, iter);
if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX))
end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p));
@ -1064,9 +1065,9 @@ static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos
* btree node min/max is a closed interval, upto takes a half
* open interval:
*/
k = bch2_btree_iter_peek_max(&iter2, end);
k = bch2_btree_iter_peek_max(trans, &iter2, end);
next = iter2.pos;
bch2_trans_iter_exit(iter->trans, &iter2);
bch2_trans_iter_exit(trans, &iter2);
BUG_ON(next.offset >= iter->pos.offset + U32_MAX);
@ -1107,13 +1108,14 @@ static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *buck
return *ca != NULL;
}
static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter,
struct bch_dev **ca, struct bkey *hole)
static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_dev **ca, struct bkey *hole)
{
struct bch_fs *c = iter->trans->c;
struct bch_fs *c = trans->c;
struct bkey_s_c k;
again:
k = bch2_get_key_or_hole(iter, POS_MAX, hole);
k = bch2_get_key_or_hole(trans, iter, POS_MAX, hole);
if (bkey_err(k))
return k;
@ -1126,7 +1128,7 @@ again:
if (!next_bucket(c, ca, &hole_start))
return bkey_s_c_null;
bch2_btree_iter_set_pos(iter, hole_start);
bch2_btree_iter_set_pos(trans, iter, hole_start);
goto again;
}
@ -1167,8 +1169,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,
a = bch2_alloc_to_v4(alloc_k, &a_convert);
bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p);
k = bch2_btree_iter_peek_slot(discard_iter);
bch2_btree_iter_set_pos(trans, discard_iter, alloc_k.k->p);
k = bch2_btree_iter_peek_slot(trans, discard_iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -1181,8 +1183,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,
goto err;
}
bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
k = bch2_btree_iter_peek_slot(freespace_iter);
bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
k = bch2_btree_iter_peek_slot(trans, freespace_iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -1195,8 +1197,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,
goto err;
}
bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset));
k = bch2_btree_iter_peek_slot(bucket_gens_iter);
bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset));
k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -1249,9 +1251,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
if (!ca->mi.freespace_initialized)
return 0;
bch2_btree_iter_set_pos(freespace_iter, start);
bch2_btree_iter_set_pos(trans, freespace_iter, start);
k = bch2_btree_iter_peek_slot(freespace_iter);
k = bch2_btree_iter_peek_slot(trans, freespace_iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -1300,9 +1302,9 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
unsigned i, gens_offset, gens_end_offset;
int ret;
bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
k = bch2_btree_iter_peek_slot(bucket_gens_iter);
k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -1435,7 +1437,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
*gen = a->gen;
out:
fsck_err:
bch2_set_btree_iter_dontneed(&alloc_iter);
bch2_set_btree_iter_dontneed(trans, &alloc_iter);
bch2_trans_iter_exit(trans, &alloc_iter);
printbuf_exit(&buf);
return ret;
@ -1572,7 +1574,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
bch2_trans_begin(trans);
k = bch2_get_key_or_real_bucket_hole(&iter, &ca, &hole);
k = bch2_get_key_or_real_bucket_hole(trans, &iter, &ca, &hole);
ret = bkey_err(k);
if (ret)
goto bkey_err;
@ -1610,7 +1612,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
if (ret)
goto bkey_err;
bch2_btree_iter_set_pos(&iter, next);
bch2_btree_iter_set_pos(trans, &iter, next);
bkey_err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
@ -1638,7 +1640,7 @@ bkey_err:
BTREE_ITER_prefetch);
while (1) {
bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&iter);
k = bch2_btree_iter_peek(trans, &iter);
if (!k.k)
break;
@ -1657,7 +1659,7 @@ bkey_err:
break;
}
bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos));
bch2_btree_iter_set_pos(trans, &iter, bpos_nosnap_successor(iter.pos));
}
bch2_trans_iter_exit(trans, &iter);
if (ret)
@ -1685,7 +1687,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
int ret;
alloc_k = bch2_btree_iter_peek(alloc_iter);
alloc_k = bch2_btree_iter_peek(trans, alloc_iter);
if (!alloc_k.k)
return 0;
@ -1826,7 +1828,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct bpos pos = need_discard_iter->pos;
struct btree_iter iter = { NULL };
struct btree_iter iter = {};
struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
struct printbuf buf = PRINTBUF;
@ -1950,7 +1952,7 @@ static void bch2_do_discards_work(struct work_struct *work)
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[WRITE]);
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
}
@ -1967,7 +1969,7 @@ void bch2_dev_do_discards(struct bch_dev *ca)
if (queue_work(c->write_ref_wq, &ca->discard_work))
return;
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[WRITE]);
put_write_ref:
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
}
@ -2045,7 +2047,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
bch2_trans_put(trans);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[WRITE]);
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
}
@ -2065,7 +2067,7 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
return;
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[WRITE]);
put_ref:
bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
}
@ -2082,6 +2084,9 @@ static int invalidate_one_bp(struct btree_trans *trans,
if (ret)
return ret;
if (!extent_k.k)
return 0;
struct bkey_i *n =
bch2_bkey_make_mut(trans, &extent_iter, &extent_k,
BTREE_UPDATE_internal_snapshot_node);
@ -2199,9 +2204,9 @@ static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter
{
struct bkey_s_c k;
again:
k = bch2_btree_iter_peek_max(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
k = bch2_btree_iter_peek_max(trans, iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
if (!k.k && !*wrapped) {
bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0));
bch2_btree_iter_set_pos(trans, iter, lru_pos(ca->dev_idx, 0, 0));
*wrapped = true;
goto again;
}
@ -2251,12 +2256,12 @@ restart_err:
if (ret)
break;
bch2_btree_iter_advance(&iter);
bch2_btree_iter_advance(trans, &iter);
}
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[WRITE]);
bch2_bkey_buf_exit(&last_flushed, c);
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
@ -2274,7 +2279,7 @@ void bch2_dev_do_invalidates(struct bch_dev *ca)
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
return;
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[WRITE]);
put_ref:
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
@ -2321,7 +2326,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
break;
}
k = bch2_get_key_or_hole(&iter, end, &hole);
k = bch2_get_key_or_hole(trans, &iter, end, &hole);
ret = bkey_err(k);
if (ret)
goto bkey_err;
@ -2340,7 +2345,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
if (ret)
goto bkey_err;
bch2_btree_iter_advance(&iter);
bch2_btree_iter_advance(trans, &iter);
} else {
struct bkey_i *freespace;
@ -2360,7 +2365,7 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
if (ret)
goto bkey_err;
bch2_btree_iter_set_pos(&iter, k.k->p);
bch2_btree_iter_set_pos(trans, &iter, k.k->p);
}
bkey_err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@ -2506,7 +2511,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
bch2_set_ra_pages(c, ra_pages);
for_each_rw_member(c, ca) {
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
u64 dev_reserve = 0;
/*

View File

@ -321,11 +321,11 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
{
u64 want_free = ca->mi.nbuckets >> 7;
u64 free = max_t(s64, 0,
u.d[BCH_DATA_free].buckets
+ u.d[BCH_DATA_need_discard].buckets
u.buckets[BCH_DATA_free]
+ u.buckets[BCH_DATA_need_discard]
- bch2_dev_buckets_reserved(ca, BCH_WATERMARK_stripe));
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
return clamp_t(s64, want_free - free, 0, u.buckets[BCH_DATA_cached]);
}
void bch2_dev_do_invalidates(struct bch_dev *);

File diff suppressed because it is too large Load Diff

View File

@ -5,6 +5,7 @@
#include "bcachefs.h"
#include "alloc_types.h"
#include "extents.h"
#include "io_write_types.h"
#include "sb-members.h"
#include <linux/hash.h>
@ -23,6 +24,52 @@ struct dev_alloc_list {
u8 data[BCH_SB_MEMBERS_MAX];
};
struct alloc_request {
unsigned nr_replicas;
unsigned target;
bool ec;
enum bch_watermark watermark;
enum bch_write_flags flags;
enum bch_data_type data_type;
struct bch_devs_list *devs_have;
struct write_point *wp;
/* These fields are used primarily by open_bucket_add_buckets */
struct open_buckets ptrs;
unsigned nr_effective; /* sum of @ptrs durability */
bool have_cache; /* have we allocated from a 0 durability dev */
struct bch_devs_mask devs_may_alloc;
/* bch2_bucket_alloc_set_trans(): */
struct bch_dev_usage usage;
/* bch2_bucket_alloc_trans(): */
struct bch_dev *ca;
enum {
BTREE_BITMAP_NO,
BTREE_BITMAP_YES,
BTREE_BITMAP_ANY,
} btree_bitmap;
struct {
u64 buckets_seen;
u64 skipped_open;
u64 skipped_need_journal_commit;
u64 need_journal_commit;
u64 skipped_nocow;
u64 skipped_nouse;
u64 skipped_mi_btree_bitmap;
} counters;
unsigned scratch_nr_replicas;
unsigned scratch_nr_effective;
bool scratch_have_cache;
enum bch_data_type scratch_data_type;
struct open_buckets scratch_ptrs;
struct bch_devs_mask scratch_devs_may_alloc;
};
struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
struct dev_stripe_state *,
struct bch_devs_mask *);
@ -171,11 +218,8 @@ static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64
}
enum bch_write_flags;
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct open_buckets *,
struct dev_stripe_state *, struct bch_devs_mask *,
unsigned, unsigned *, bool *, enum bch_write_flags,
enum bch_data_type, enum bch_watermark,
struct closure *);
int bch2_bucket_alloc_set_trans(struct btree_trans *, struct alloc_request *,
struct dev_stripe_state *, struct closure *);
int bch2_alloc_sectors_start_trans(struct btree_trans *,
unsigned, unsigned,

View File

@ -8,22 +8,6 @@
#include "clock_types.h"
#include "fifo.h"
struct bucket_alloc_state {
enum {
BTREE_BITMAP_NO,
BTREE_BITMAP_YES,
BTREE_BITMAP_ANY,
} btree_bitmap;
u64 buckets_seen;
u64 skipped_open;
u64 skipped_need_journal_commit;
u64 need_journal_commit;
u64 skipped_nocow;
u64 skipped_nouse;
u64 skipped_mi_btree_bitmap;
};
#define BCH_WATERMARKS() \
x(stripe) \
x(normal) \

View File

@ -252,12 +252,24 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
0,
bp.v->level,
iter_flags);
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
if (bkey_err(k)) {
bch2_trans_iter_exit(trans, iter);
return k;
}
/*
* peek_slot() doesn't normally return NULL - except when we ask for a
* key at a btree level that doesn't exist.
*
* We may want to revisit this and change peek_slot():
*/
if (!k.k) {
bkey_init(&iter->k);
iter->k.p = bp.v->pos;
k.k = &iter->k;
}
if (k.k &&
extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp))
return k;
@ -293,7 +305,7 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
0,
bp.v->level - 1,
0);
struct btree *b = bch2_btree_iter_peek_node(iter);
struct btree *b = bch2_btree_iter_peek_node(trans, iter);
if (IS_ERR_OR_NULL(b))
goto err;
@ -321,7 +333,7 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st
return 0;
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter = { NULL };
struct btree_iter alloc_iter = {};
struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret = 0;
@ -462,7 +474,7 @@ err:
if (bio)
bio_put(bio);
kvfree(data_buf);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
printbuf_exit(&buf);
return ret;
}
@ -650,7 +662,7 @@ static int check_btree_root_to_backpointers(struct btree_trans *trans,
retry:
bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
b = bch2_btree_iter_peek_node(&iter);
b = bch2_btree_iter_peek_node(trans, &iter);
ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto err;
@ -934,7 +946,7 @@ static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k,
{
struct btree_iter iter;
bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0);
struct btree *b = bch2_btree_iter_peek_node(&iter);
struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
int ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto err;

View File

@ -524,8 +524,8 @@ struct bch_dev {
struct percpu_ref ref;
#endif
struct completion ref_completion;
struct percpu_ref io_ref;
struct completion io_ref_completion;
struct percpu_ref io_ref[2];
struct completion io_ref_completion[2];
struct bch_fs *fs;
@ -562,7 +562,8 @@ struct bch_dev {
unsigned long *bucket_backpointer_mismatches;
unsigned long *bucket_backpointer_empty;
struct bch_dev_usage __percpu *usage;
struct bch_dev_usage_full __percpu
*usage;
/* Allocator: */
u64 alloc_cursor[3];
@ -613,6 +614,7 @@ struct bch_dev {
x(accounting_replay_done) \
x(may_go_rw) \
x(rw) \
x(rw_init_done) \
x(was_rw) \
x(stopping) \
x(emergency_ro) \
@ -649,6 +651,9 @@ struct btree_transaction_stats {
unsigned nr_max_paths;
unsigned journal_entries_size;
unsigned max_mem;
#ifdef CONFIG_BCACHEFS_DEBUG
darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif
char *max_paths_text;
};
@ -871,7 +876,7 @@ struct bch_fs {
struct btree_write_buffer btree_write_buffer;
struct workqueue_struct *btree_update_wq;
struct workqueue_struct *btree_io_complete_wq;
struct workqueue_struct *btree_write_complete_wq;
/* copygc needs its own workqueue for index updates.. */
struct workqueue_struct *copygc_wq;
/*

View File

@ -842,7 +842,7 @@ LE64_BITMASK(BCH_SB_SHARD_INUMS, struct bch_sb, flags[3], 28, 29);
LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
/* one free bit */
LE64_BITMASK(BCH_SB_SINGLE_DEVICE, struct bch_sb, flags[3], 63, 64);
LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
LE64_BITMASK(BCH_SB_NOCOW, struct bch_sb, flags[4], 33, 34);
@ -863,6 +863,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14);
LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20);
LE64_BITMASK(BCH_SB_DEGRADED_ACTION, struct bch_sb, flags[6], 20, 22);
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
@ -984,6 +985,19 @@ enum bch_error_actions {
BCH_ON_ERROR_NR
};
#define BCH_DEGRADED_ACTIONS() \
x(ask, 0) \
x(yes, 1) \
x(very, 2) \
x(no, 3)
enum bch_degraded_actions {
#define x(t, n) BCH_DEGRADED_##t = n,
BCH_DEGRADED_ACTIONS()
#undef x
BCH_DEGRADED_ACTIONS_NR
};
#define BCH_STR_HASH_TYPES() \
x(crc32c, 0) \
x(crc64, 1) \

View File

@ -191,6 +191,7 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
{
return bpos_eq(l.k->p, r.k->p) &&
l.k->size == r.k->size &&
bkey_bytes(l.k) == bkey_bytes(r.k) &&
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
}

View File

@ -691,7 +691,7 @@ retry_root:
struct btree_iter iter;
bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN,
0, bch2_btree_id_root(c, btree)->b->c.level, 0);
struct btree *b = bch2_btree_iter_peek_node(&iter);
struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto err_root;
@ -1199,7 +1199,7 @@ int bch2_gc_gens(struct bch_fs *c)
BCH_TRANS_COMMIT_no_enospc, ({
ca = bch2_dev_iterate(c, ca, k.k->p.inode);
if (!ca) {
bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
continue;
}
bch2_alloc_write_oldest_gen(trans, ca, &iter, k);
@ -1243,16 +1243,11 @@ void bch2_gc_gens_async(struct bch_fs *c)
bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens);
}
void bch2_fs_btree_gc_exit(struct bch_fs *c)
{
}
int bch2_fs_btree_gc_init(struct bch_fs *c)
void bch2_fs_btree_gc_init_early(struct bch_fs *c)
{
seqcount_init(&c->gc_pos_lock);
INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work);
init_rwsem(&c->gc_lock);
mutex_init(&c->gc_gens_lock);
return 0;
}

View File

@ -83,7 +83,6 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *);
int bch2_gc_gens(struct bch_fs *);
void bch2_gc_gens_async(struct bch_fs *);
void bch2_fs_btree_gc_exit(struct bch_fs *);
int bch2_fs_btree_gc_init(struct bch_fs *);
void bch2_fs_btree_gc_init_early(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_GC_H */

View File

@ -1353,7 +1353,7 @@ start:
"btree read error %s for %s",
bch2_blk_status_to_str(bio->bi_status), buf.buf);
if (rb->have_ioref)
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
rb->have_ioref = false;
bch2_mark_io_failure(&failed, &rb->pick, false);
@ -1609,6 +1609,7 @@ static void btree_node_read_all_replicas_endio(struct bio *bio)
struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);
bch2_latency_acct(ca, rb->start_time, READ);
percpu_ref_put(&ca->io_ref[READ]);
}
ra->err[rb->idx] = bio->bi_status;
@ -1908,7 +1909,8 @@ static void btree_node_scrub_work(struct work_struct *work)
scrub->key.k->k.p, 0, scrub->level - 1, 0);
struct btree *b;
int ret = lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter)));
int ret = lockrestart_do(trans,
PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(trans, &iter)));
if (ret)
goto err;
@ -1927,7 +1929,7 @@ err:
printbuf_exit(&err);
bch2_bkey_buf_exit(&scrub->key, c);;
btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
percpu_ref_put(&scrub->ca->io_ref);
percpu_ref_put(&scrub->ca->io_ref[READ]);
kfree(scrub);
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
}
@ -1996,7 +1998,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
return 0;
err_free:
btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
err:
bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
return ret;
@ -2144,6 +2146,7 @@ static void btree_node_write_endio(struct bio *bio)
if (ca && bio->bi_status) {
struct printbuf buf = PRINTBUF;
buf.atomic++;
prt_printf(&buf, "btree write error: %s\n ",
bch2_blk_status_to_str(bio->bi_status));
bch2_btree_pos_to_text(&buf, c, b);
@ -2158,8 +2161,12 @@ static void btree_node_write_endio(struct bio *bio)
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
}
/*
* XXX: we should be using io_ref[WRITE], but we aren't retrying failed
* btree writes yet (due to device removal/ro):
*/
if (wbio->have_ioref)
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
if (parent) {
bio_put(bio);
@ -2170,7 +2177,7 @@ static void btree_node_write_endio(struct bio *bio)
clear_btree_node_write_in_flight_inner(b);
wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner);
INIT_WORK(&wb->work, btree_node_write_work);
queue_work(c->btree_io_complete_wq, &wb->work);
queue_work(c->btree_write_complete_wq, &wb->work);
}
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,

View File

@ -244,10 +244,8 @@ void bch2_trans_verify_paths(struct btree_trans *trans)
bch2_btree_path_verify(trans, path);
}
static void bch2_btree_iter_verify(struct btree_iter *iter)
static void bch2_btree_iter_verify(struct btree_trans *trans, struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached);
BUG_ON((iter->flags & BTREE_ITER_is_extents) &&
@ -276,9 +274,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
bkey_gt(iter->pos, iter->k.p)));
}
static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
static int bch2_btree_iter_verify_ret(struct btree_trans *trans,
struct btree_iter *iter, struct bkey_s_c k)
{
struct btree_trans *trans = iter->trans;
struct btree_iter copy;
struct bkey_s_c prev;
int ret = 0;
@ -299,7 +297,7 @@ static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k
bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
BTREE_ITER_nopreserve|
BTREE_ITER_all_snapshots);
prev = bch2_btree_iter_prev(&copy);
prev = bch2_btree_iter_prev(trans, &copy);
if (!prev.k)
goto out;
@ -365,9 +363,11 @@ static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
struct btree_path *path, unsigned l) {}
static inline void bch2_btree_path_verify(struct btree_trans *trans,
struct btree_path *path) {}
static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
static inline void bch2_btree_iter_verify(struct btree_trans *trans,
struct btree_iter *iter) {}
static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
static inline int bch2_btree_iter_verify_ret(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k) { return 0; }
#endif
@ -1855,10 +1855,8 @@ hole:
return (struct bkey_s_c) { u, NULL };
}
void bch2_set_btree_iter_dontneed(struct btree_iter *iter)
void bch2_set_btree_iter_dontneed(struct btree_trans *trans, struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
if (!iter->path || trans->restarted)
return;
@ -1870,17 +1868,14 @@ void bch2_set_btree_iter_dontneed(struct btree_iter *iter)
/* Btree iterators: */
int __must_check
__bch2_btree_iter_traverse(struct btree_iter *iter)
__bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter)
{
return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
return bch2_btree_path_traverse(trans, iter->path, iter->flags);
}
int __must_check
bch2_btree_iter_traverse(struct btree_iter *iter)
bch2_btree_iter_traverse(struct btree_trans *trans, struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
int ret;
bch2_trans_verify_not_unlocked_or_in_restart(trans);
iter->path = bch2_btree_path_set_pos(trans, iter->path,
@ -1888,7 +1883,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter)
iter->flags & BTREE_ITER_intent,
btree_iter_ip_allocated(iter));
ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (ret)
return ret;
@ -1900,14 +1895,14 @@ bch2_btree_iter_traverse(struct btree_iter *iter)
/* Iterate across nodes (leaf and interior nodes) */
struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
struct btree *bch2_btree_iter_peek_node(struct btree_trans *trans,
struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
struct btree *b = NULL;
int ret;
EBUG_ON(trans->paths[iter->path].cached);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (ret)
@ -1929,7 +1924,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
out:
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
return b;
err:
@ -1938,26 +1933,26 @@ err:
}
/* Only kept for -tools */
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter)
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *trans,
struct btree_iter *iter)
{
struct btree *b;
while (b = bch2_btree_iter_peek_node(iter),
while (b = bch2_btree_iter_peek_node(trans, iter),
bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
bch2_trans_begin(iter->trans);
bch2_trans_begin(trans);
return b;
}
struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
struct btree *b = NULL;
int ret;
EBUG_ON(trans->paths[iter->path].cached);
bch2_trans_verify_not_unlocked_or_in_restart(trans);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (ret)
@ -2024,7 +2019,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
EBUG_ON(btree_iter_path(trans, iter)->uptodate);
out:
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
return b;
err:
@ -2034,7 +2029,7 @@ err:
/* Iterate across keys (in leaf nodes only) */
inline bool bch2_btree_iter_advance(struct btree_iter *iter)
inline bool bch2_btree_iter_advance(struct btree_trans *trans, struct btree_iter *iter)
{
struct bpos pos = iter->k.p;
bool ret = !(iter->flags & BTREE_ITER_all_snapshots
@ -2043,11 +2038,11 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter)
if (ret && !(iter->flags & BTREE_ITER_is_extents))
pos = bkey_successor(iter, pos);
bch2_btree_iter_set_pos(iter, pos);
bch2_btree_iter_set_pos(trans, iter, pos);
return ret;
}
inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
inline bool bch2_btree_iter_rewind(struct btree_trans *trans, struct btree_iter *iter)
{
struct bpos pos = bkey_start_pos(&iter->k);
bool ret = !(iter->flags & BTREE_ITER_all_snapshots
@ -2056,7 +2051,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
if (ret && !(iter->flags & BTREE_ITER_is_extents))
pos = bkey_predecessor(iter, pos);
bch2_btree_iter_set_pos(iter, pos);
bch2_btree_iter_set_pos(trans, iter, pos);
return ret;
}
@ -2183,9 +2178,9 @@ void btree_trans_peek_prev_journal(struct btree_trans *trans,
* bkey_s_c_null:
*/
static noinline
struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
struct bkey_s_c btree_trans_peek_key_cache(struct btree_trans *trans, struct btree_iter *iter,
struct bpos pos)
{
struct btree_trans *trans = iter->trans;
struct bch_fs *c = trans->c;
struct bkey u;
struct bkey_s_c k;
@ -2231,14 +2226,14 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
return k;
}
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_trans *trans, struct btree_iter *iter,
struct bpos search_key)
{
struct btree_trans *trans = iter->trans;
struct bkey_s_c k, k2;
int ret;
EBUG_ON(btree_iter_path(trans, iter)->cached);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
while (1) {
iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
@ -2248,7 +2243,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (unlikely(ret)) {
/* ensure that iter->k is consistent with iter->pos: */
bch2_btree_iter_set_pos(iter, iter->pos);
bch2_btree_iter_set_pos(trans, iter, iter->pos);
k = bkey_s_c_err(ret);
break;
}
@ -2258,7 +2253,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
if (unlikely(!l->b)) {
/* No btree nodes at requested level: */
bch2_btree_iter_set_pos(iter, SPOS_MAX);
bch2_btree_iter_set_pos(trans, iter, SPOS_MAX);
k = bkey_s_c_null;
break;
}
@ -2269,10 +2264,10 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
k.k &&
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
(k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) {
k = k2;
if (bkey_err(k)) {
bch2_btree_iter_set_pos(iter, iter->pos);
bch2_btree_iter_set_pos(trans, iter, iter->pos);
break;
}
}
@ -2305,27 +2300,28 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
search_key = bpos_successor(l->b->key.k.p);
} else {
/* End of btree: */
bch2_btree_iter_set_pos(iter, SPOS_MAX);
bch2_btree_iter_set_pos(trans, iter, SPOS_MAX);
k = bkey_s_c_null;
break;
}
}
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
return k;
}
/**
* bch2_btree_iter_peek_max() - returns first key greater than or equal to
* iterator's current position
* @trans: btree transaction object
* @iter: iterator to peek from
* @end: search limit: returns keys less than or equal to @end
*
* Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos end)
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *trans, struct btree_iter *iter,
struct bpos end)
{
struct btree_trans *trans = iter->trans;
struct bpos search_key = btree_iter_search_key(iter);
struct bkey_s_c k;
struct bpos iter_pos = iter->pos;
@ -2348,7 +2344,7 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
}
while (1) {
k = __bch2_btree_iter_peek(iter, search_key);
k = __bch2_btree_iter_peek(trans, iter, search_key);
if (unlikely(!k.k))
goto end;
if (unlikely(bkey_err(k)))
@ -2462,9 +2458,9 @@ out_no_locked:
if (!(iter->flags & BTREE_ITER_all_snapshots))
iter->pos.snapshot = iter->snapshot;
ret = bch2_btree_iter_verify_ret(iter, k);
ret = bch2_btree_iter_verify_ret(trans, iter, k);
if (unlikely(ret)) {
bch2_btree_iter_set_pos(iter, iter->pos);
bch2_btree_iter_set_pos(trans, iter, iter->pos);
k = bkey_s_c_err(ret);
}
@ -2472,7 +2468,7 @@ out_no_locked:
return k;
end:
bch2_btree_iter_set_pos(iter, end);
bch2_btree_iter_set_pos(trans, iter, end);
k = bkey_s_c_null;
goto out_no_locked;
}
@ -2480,24 +2476,25 @@ end:
/**
* bch2_btree_iter_next() - returns first key greater than iterator's current
* position
* @trans: btree transaction object
* @iter: iterator to peek from
*
* Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
struct bkey_s_c bch2_btree_iter_next(struct btree_trans *trans, struct btree_iter *iter)
{
if (!bch2_btree_iter_advance(iter))
if (!bch2_btree_iter_advance(trans, iter))
return bkey_s_c_null;
return bch2_btree_iter_peek(iter);
return bch2_btree_iter_peek(trans, iter);
}
static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, struct bpos search_key)
static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter,
struct bpos search_key)
{
struct btree_trans *trans = iter->trans;
struct bkey_s_c k, k2;
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
while (1) {
iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
@ -2507,7 +2504,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
int ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
if (unlikely(ret)) {
/* ensure that iter->k is consistent with iter->pos: */
bch2_btree_iter_set_pos(iter, iter->pos);
bch2_btree_iter_set_pos(trans, iter, iter->pos);
k = bkey_s_c_err(ret);
break;
}
@ -2517,7 +2514,7 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
if (unlikely(!l->b)) {
/* No btree nodes at requested level: */
bch2_btree_iter_set_pos(iter, SPOS_MAX);
bch2_btree_iter_set_pos(trans, iter, SPOS_MAX);
k = bkey_s_c_null;
break;
}
@ -2533,10 +2530,10 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
k.k &&
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
(k2 = btree_trans_peek_key_cache(trans, iter, k.k->p)).k) {
k = k2;
if (bkey_err(k2)) {
bch2_btree_iter_set_pos(iter, iter->pos);
bch2_btree_iter_set_pos(trans, iter, iter->pos);
break;
}
}
@ -2557,25 +2554,27 @@ static struct bkey_s_c __bch2_btree_iter_peek_prev(struct btree_iter *iter, stru
search_key = bpos_predecessor(path->l[0].b->data->min_key);
} else {
/* Start of btree: */
bch2_btree_iter_set_pos(iter, POS_MIN);
bch2_btree_iter_set_pos(trans, iter, POS_MIN);
k = bkey_s_c_null;
break;
}
}
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
return k;
}
/**
* bch2_btree_iter_peek_prev_min() - returns first key less than or equal to
* iterator's current position
* @trans: btree transaction object
* @iter: iterator to peek from
* @end: search limit: returns keys greater than or equal to @end
*
* Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bpos end)
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct btree_iter *iter,
struct bpos end)
{
if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) &&
!bkey_eq(iter->pos, POS_MAX)) {
@ -2587,7 +2586,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
* real visible extents - easiest to just use peek_slot() (which
* internally uses peek() for extents)
*/
struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
if (bkey_err(k))
return k;
@ -2597,7 +2596,6 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
return k;
}
struct btree_trans *trans = iter->trans;
struct bpos search_key = iter->pos;
struct bkey_s_c k;
btree_path_idx_t saved_path = 0;
@ -2613,7 +2611,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
}
while (1) {
k = __bch2_btree_iter_peek_prev(iter, search_key);
k = __bch2_btree_iter_peek_prev(trans, iter, search_key);
if (unlikely(!k.k))
goto end;
if (unlikely(bkey_err(k)))
@ -2704,10 +2702,10 @@ out_no_locked:
bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_intent);
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
return k;
end:
bch2_btree_iter_set_pos(iter, end);
bch2_btree_iter_set_pos(trans, iter, end);
k = bkey_s_c_null;
goto out_no_locked;
}
@ -2715,27 +2713,27 @@ end:
/**
* bch2_btree_iter_prev() - returns first key less than iterator's current
* position
* @trans: btree transaction object
* @iter: iterator to peek from
*
* Returns: key if found, or an error extractable with bkey_err().
*/
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *trans, struct btree_iter *iter)
{
if (!bch2_btree_iter_rewind(iter))
if (!bch2_btree_iter_rewind(trans, iter))
return bkey_s_c_null;
return bch2_btree_iter_peek_prev(iter);
return bch2_btree_iter_peek_prev(trans, iter);
}
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
struct bpos search_key;
struct bkey_s_c k;
int ret;
bch2_trans_verify_not_unlocked_or_in_restart(trans);
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify(trans, iter);
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
@ -2751,7 +2749,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (iter->pos.inode == KEY_INODE_MAX)
return bkey_s_c_null;
bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos));
}
search_key = btree_iter_search_key(iter);
@ -2785,7 +2783,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
goto out;
if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
(k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
(k = btree_trans_peek_key_cache(trans, iter, iter->pos)).k) {
if (!bkey_err(k))
iter->k = *k.k;
/* We're not returning a key from iter->path: */
@ -2812,8 +2810,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (iter->flags & BTREE_ITER_intent) {
struct btree_iter iter2;
bch2_trans_copy_iter(&iter2, iter);
k = bch2_btree_iter_peek_max(&iter2, end);
bch2_trans_copy_iter(trans, &iter2, iter);
k = bch2_btree_iter_peek_max(trans, &iter2, end);
if (k.k && !bkey_err(k)) {
swap(iter->key_cache_path, iter2.key_cache_path);
@ -2824,9 +2822,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
} else {
struct bpos pos = iter->pos;
k = bch2_btree_iter_peek_max(iter, end);
k = bch2_btree_iter_peek_max(trans, iter, end);
if (unlikely(bkey_err(k)))
bch2_btree_iter_set_pos(iter, pos);
bch2_btree_iter_set_pos(trans, iter, pos);
else
iter->pos = pos;
}
@ -2857,39 +2855,39 @@ out:
btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
out_no_locked:
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
ret = bch2_btree_iter_verify_ret(iter, k);
bch2_btree_iter_verify(trans, iter);
ret = bch2_btree_iter_verify_ret(trans, iter, k);
if (unlikely(ret))
return bkey_s_c_err(ret);
return k;
}
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *trans, struct btree_iter *iter)
{
if (!bch2_btree_iter_advance(iter))
if (!bch2_btree_iter_advance(trans, iter))
return bkey_s_c_null;
return bch2_btree_iter_peek_slot(iter);
return bch2_btree_iter_peek_slot(trans, iter);
}
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *trans, struct btree_iter *iter)
{
if (!bch2_btree_iter_rewind(iter))
if (!bch2_btree_iter_rewind(trans, iter))
return bkey_s_c_null;
return bch2_btree_iter_peek_slot(iter);
return bch2_btree_iter_peek_slot(trans, iter);
}
/* Obsolete, but still used by rust wrapper in -tools */
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *trans, struct btree_iter *iter)
{
struct bkey_s_c k;
while (btree_trans_too_many_iters(iter->trans) ||
(k = bch2_btree_iter_peek_type(iter, iter->flags),
while (btree_trans_too_many_iters(trans) ||
(k = bch2_btree_iter_peek_type(trans, iter, iter->flags),
bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
bch2_trans_begin(iter->trans);
bch2_trans_begin(trans);
return k;
}
@ -3035,7 +3033,6 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
iter->path = 0;
iter->update_path = 0;
iter->key_cache_path = 0;
iter->trans = NULL;
}
void bch2_trans_iter_init_outlined(struct btree_trans *trans,
@ -3075,10 +3072,9 @@ void bch2_trans_node_iter_init(struct btree_trans *trans,
BUG_ON(iter->min_depth != depth);
}
void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
void bch2_trans_copy_iter(struct btree_trans *trans,
struct btree_iter *dst, struct btree_iter *src)
{
struct btree_trans *trans = src->trans;
*dst = *src;
#ifdef TRACK_PATH_ALLOCATED
dst->ip_allocated = _RET_IP_;
@ -3090,7 +3086,19 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
dst->key_cache_path = 0;
}
void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_trans_kmalloc_trace_to_text(struct printbuf *out,
darray_trans_kmalloc_trace *trace)
{
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 60);
darray_for_each(*trace, i)
prt_printf(out, "%pS\t%zu\n", (void *) i->ip, i->bytes);
}
#endif
void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long ip)
{
struct bch_fs *c = trans->c;
unsigned new_top = trans->mem_top + size;
@ -3100,14 +3108,33 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
void *new_mem;
void *p;
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) {
#ifdef CONFIG_BCACHEFS_DEBUG
struct printbuf buf = PRINTBUF;
bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace);
bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
#endif
}
ret = trans_maybe_inject_restart(trans, _RET_IP_);
if (ret)
return ERR_PTR(ret);
struct btree_transaction_stats *s = btree_trans_stats(trans);
s->max_mem = max(s->max_mem, new_bytes);
if (new_bytes > s->max_mem) {
#ifdef CONFIG_BCACHEFS_DEBUG
darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr);
s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size,
trans->trans_kmalloc_trace.nr);
memcpy(s->trans_kmalloc_trace.data,
trans->trans_kmalloc_trace.data,
sizeof(s->trans_kmalloc_trace.data[0]) *
s->trans_kmalloc_trace.nr);
#endif
s->max_mem = new_bytes;
}
if (trans->used_mempool) {
if (trans->mem_bytes >= new_bytes)
@ -3167,6 +3194,8 @@ out_new_mem:
BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
}
out_change_top:
bch2_trans_kmalloc_trace(trans, size, ip);
p = trans->mem + trans->mem_top;
trans->mem_top += size;
memset(p, 0, size);
@ -3280,6 +3309,10 @@ u32 bch2_trans_begin(struct btree_trans *trans)
}
#endif
#ifdef CONFIG_BCACHEFS_DEBUG
trans->trans_kmalloc_trace.nr = 0;
#endif
trans_set_locked(trans, false);
if (trans->restarted) {
@ -3448,6 +3481,7 @@ void bch2_trans_put(struct btree_trans *trans)
#ifdef CONFIG_BCACHEFS_DEBUG
darray_exit(&trans->last_restarted_trace);
darray_exit(&trans->trans_kmalloc_trace);
#endif
unsigned long *paths_allocated = trans->paths_allocated;
@ -3603,6 +3637,9 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
for (s = c->btree_transaction_stats;
s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
s++) {
#ifdef CONFIG_BCACHEFS_DEBUG
darray_exit(&s->trans_kmalloc_trace);
#endif
kfree(s->max_paths_text);
bch2_time_stats_exit(&s->lock_hold_times);
}

View File

@ -393,36 +393,37 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree_path *, struct
void bch2_trans_node_drop(struct btree_trans *trans, struct btree *);
void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *);
int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
int __must_check bch2_btree_iter_traverse(struct btree_iter *);
int __must_check __bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *);
int __must_check bch2_btree_iter_traverse(struct btree_trans *, struct btree_iter *);
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_iter *);
struct btree *bch2_btree_iter_peek_node(struct btree_trans *, struct btree_iter *);
struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_trans *, struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_trans *, struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *, struct bpos);
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_max(struct btree_trans *, struct btree_iter *, struct bpos);
struct bkey_s_c bch2_btree_iter_next(struct btree_trans *, struct btree_iter *);
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_trans *trans,
struct btree_iter *iter)
{
return bch2_btree_iter_peek_max(iter, SPOS_MAX);
return bch2_btree_iter_peek_max(trans, iter, SPOS_MAX);
}
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *, struct bpos);
struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *, struct btree_iter *, struct bpos);
static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
static inline struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_trans *trans, struct btree_iter *iter)
{
return bch2_btree_iter_peek_prev_min(iter, POS_MIN);
return bch2_btree_iter_peek_prev_min(trans, iter, POS_MIN);
}
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev(struct btree_trans *, struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *, struct btree_iter *);
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_trans *, struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_trans *, struct btree_iter *);
bool bch2_btree_iter_advance(struct btree_iter *);
bool bch2_btree_iter_rewind(struct btree_iter *);
bool bch2_btree_iter_advance(struct btree_trans *, struct btree_iter *);
bool bch2_btree_iter_rewind(struct btree_trans *, struct btree_iter *);
static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{
@ -433,10 +434,9 @@ static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpo
iter->k.size = 0;
}
static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
static inline void bch2_btree_iter_set_pos(struct btree_trans *trans,
struct btree_iter *iter, struct bpos new_pos)
{
struct btree_trans *trans = iter->trans;
if (unlikely(iter->update_path))
bch2_path_put(trans, iter->update_path,
iter->flags & BTREE_ITER_intent);
@ -454,13 +454,14 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it
iter->pos = bkey_start_pos(&iter->k);
}
static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot)
static inline void bch2_btree_iter_set_snapshot(struct btree_trans *trans,
struct btree_iter *iter, u32 snapshot)
{
struct bpos pos = iter->pos;
iter->snapshot = snapshot;
pos.snapshot = snapshot;
bch2_btree_iter_set_pos(iter, pos);
bch2_btree_iter_set_pos(trans, iter, pos);
}
void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
@ -502,7 +503,6 @@ static inline void bch2_trans_iter_init_common(struct btree_trans *trans,
unsigned flags,
unsigned long ip)
{
iter->trans = trans;
iter->update_path = 0;
iter->key_cache_path = 0;
iter->btree_id = btree_id;
@ -539,22 +539,50 @@ static inline void bch2_trans_iter_init(struct btree_trans *trans,
void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *,
enum btree_id, struct bpos,
unsigned, unsigned, unsigned);
void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btree_iter *);
void bch2_set_btree_iter_dontneed(struct btree_iter *);
void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *);
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_trans_kmalloc_trace_to_text(struct printbuf *,
darray_trans_kmalloc_trace *);
#endif
/**
* bch2_trans_kmalloc - allocate memory for use by the current transaction
*
* Must be called after bch2_trans_begin, which on second and further calls
* frees all memory allocated in this transaction
*/
static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
void *__bch2_trans_kmalloc(struct btree_trans *, size_t, unsigned long);
static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size,
unsigned long ip)
{
#ifdef CONFIG_BCACHEFS_DEBUG
darray_push(&trans->trans_kmalloc_trace,
((struct trans_kmalloc_trace) { .ip = ip, .bytes = size }));
#endif
}
static __always_inline void *bch2_trans_kmalloc_nomemzero_ip(struct btree_trans *trans, size_t size,
unsigned long ip)
{
size = roundup(size, 8);
bch2_trans_kmalloc_trace(trans, size, ip);
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
void *p = trans->mem + trans->mem_top;
trans->mem_top += size;
return p;
} else {
return __bch2_trans_kmalloc(trans, size, ip);
}
}
static __always_inline void *bch2_trans_kmalloc_ip(struct btree_trans *trans, size_t size,
unsigned long ip)
{
size = roundup(size, 8);
bch2_trans_kmalloc_trace(trans, size, ip);
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
void *p = trans->mem + trans->mem_top;
@ -562,22 +590,24 @@ static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
memset(p, 0, size);
return p;
} else {
return __bch2_trans_kmalloc(trans, size);
return __bch2_trans_kmalloc(trans, size, ip);
}
}
static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
/**
* bch2_trans_kmalloc - allocate memory for use by the current transaction
*
* Must be called after bch2_trans_begin, which on second and further calls
* frees all memory allocated in this transaction
*/
static __always_inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
{
size = round_up(size, 8);
return bch2_trans_kmalloc_ip(trans, size, _THIS_IP_);
}
if (likely(trans->mem_top + size <= trans->mem_bytes)) {
void *p = trans->mem + trans->mem_top;
trans->mem_top += size;
return p;
} else {
return __bch2_trans_kmalloc(trans, size);
}
static __always_inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size)
{
return bch2_trans_kmalloc_nomemzero_ip(trans, size, _THIS_IP_);
}
static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
@ -588,7 +618,7 @@ static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans,
struct bkey_s_c k;
bch2_trans_iter_init(trans, iter, btree_id, pos, flags);
k = bch2_btree_iter_peek_slot(iter);
k = bch2_btree_iter_peek_slot(trans, iter);
if (!bkey_err(k) && type && k.k->type != type)
k = bkey_s_c_err(-BCH_ERR_ENOENT_bkey_type_mismatch);
@ -658,14 +688,14 @@ u32 bch2_trans_begin(struct btree_trans *);
int _ret3 = 0; \
do { \
_ret3 = lockrestart_do((_trans), ({ \
struct btree *_b = bch2_btree_iter_peek_node(&_iter); \
struct btree *_b = bch2_btree_iter_peek_node(_trans, &_iter);\
if (!_b) \
break; \
\
PTR_ERR_OR_ZERO(_b) ?: (_do); \
})) ?: \
lockrestart_do((_trans), \
PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \
PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(_trans, &_iter)));\
} while (!_ret3); \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
@ -677,31 +707,34 @@ u32 bch2_trans_begin(struct btree_trans *);
__for_each_btree_node(_trans, _iter, _btree_id, _start, \
0, 0, _flags, _b, _do)
static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter,
static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_trans *trans,
struct btree_iter *iter,
unsigned flags)
{
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) :
bch2_btree_iter_peek_prev(iter);
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) :
bch2_btree_iter_peek_prev(trans, iter);
}
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_trans *trans,
struct btree_iter *iter,
unsigned flags)
{
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(iter) :
bch2_btree_iter_peek(iter);
return flags & BTREE_ITER_slots ? bch2_btree_iter_peek_slot(trans, iter) :
bch2_btree_iter_peek(trans, iter);
}
static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_iter *iter,
struct bpos end,
unsigned flags)
static inline struct bkey_s_c bch2_btree_iter_peek_max_type(struct btree_trans *trans,
struct btree_iter *iter,
struct bpos end,
unsigned flags)
{
if (!(flags & BTREE_ITER_slots))
return bch2_btree_iter_peek_max(iter, end);
return bch2_btree_iter_peek_max(trans, iter, end);
if (bkey_gt(iter->pos, end))
return bkey_s_c_null;
return bch2_btree_iter_peek_slot(iter);
return bch2_btree_iter_peek_slot(trans, iter);
}
int __bch2_btree_trans_too_many_iters(struct btree_trans *);
@ -768,14 +801,14 @@ transaction_restart: \
\
do { \
_ret3 = lockrestart_do(_trans, ({ \
(_k) = bch2_btree_iter_peek_max_type(&(_iter), \
(_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), \
_end, (_flags)); \
if (!(_k).k) \
break; \
\
bkey_err(_k) ?: (_do); \
})); \
} while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \
} while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
_ret3; \
@ -813,14 +846,14 @@ transaction_restart: \
\
do { \
_ret3 = lockrestart_do(_trans, ({ \
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), \
(_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), \
(_flags)); \
if (!(_k).k) \
break; \
\
bkey_err(_k) ?: (_do); \
})); \
} while (!_ret3 && bch2_btree_iter_rewind(&(_iter))); \
} while (!_ret3 && bch2_btree_iter_rewind(_trans, &(_iter))); \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
_ret3; \
@ -850,37 +883,38 @@ transaction_restart: \
(_do) ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_commit_flags)))
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_trans *,
struct btree_iter *);
#define for_each_btree_key_max_norestart(_trans, _iter, _btree_id, \
_start, _end, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
(_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags),\
(_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags),\
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
bch2_btree_iter_advance(_trans, &(_iter)))
#define for_each_btree_key_max_continue_norestart(_iter, _end, _flags, _k, _ret)\
#define for_each_btree_key_max_continue_norestart(_trans, _iter, _end, _flags, _k, _ret)\
for (; \
(_k) = bch2_btree_iter_peek_max_type(&(_iter), _end, _flags), \
(_k) = bch2_btree_iter_peek_max_type(_trans, &(_iter), _end, _flags), \
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
bch2_btree_iter_advance(_trans, &(_iter)))
#define for_each_btree_key_norestart(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for_each_btree_key_max_norestart(_trans, _iter, _btree_id, _start,\
SPOS_MAX, _flags, _k, _ret)
#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), _flags), \
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_rewind(&(_iter)))
#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \
_start, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
(_k) = bch2_btree_iter_peek_prev_type(_trans, &(_iter), _flags), \
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_rewind(_trans, &(_iter)))
#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
for_each_btree_key_max_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret)
#define for_each_btree_key_continue_norestart(_trans, _iter, _flags, _k, _ret) \
for_each_btree_key_max_continue_norestart(_trans, _iter, SPOS_MAX, _flags, _k, _ret)
/*
* This should not be used in a fastpath, without first trying _do in

View File

@ -287,6 +287,19 @@ err:
return ret;
}
static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path,
struct bkey_s_c k)
{
struct printbuf buf = PRINTBUF;
bch2_bpos_to_text(&buf, ck_path->pos);
prt_char(&buf, ' ');
bch2_bkey_val_to_text(&buf, trans->c, k);
trace_key_cache_fill(trans, buf.buf);
printbuf_exit(&buf);
}
static noinline int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path,
unsigned flags)
@ -306,7 +319,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
BTREE_ITER_key_cache_fill|
BTREE_ITER_cached_nofill);
iter.flags &= ~BTREE_ITER_with_journal;
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -320,18 +333,11 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
if (ret)
goto err;
if (trace_key_cache_fill_enabled()) {
struct printbuf buf = PRINTBUF;
bch2_bpos_to_text(&buf, ck_path->pos);
prt_char(&buf, ' ');
bch2_bkey_val_to_text(&buf, trans->c, k);
trace_key_cache_fill(trans, buf.buf);
printbuf_exit(&buf);
}
if (trace_key_cache_fill_enabled())
do_trace_key_cache_fill(trans, ck_path, k);
out:
/* We're not likely to need this iterator again: */
bch2_set_btree_iter_dontneed(&iter);
bch2_set_btree_iter_dontneed(trans, &iter);
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
@ -412,7 +418,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
BTREE_ITER_intent);
b_iter.flags &= ~BTREE_ITER_with_key_cache;
ret = bch2_btree_iter_traverse(&c_iter);
ret = bch2_btree_iter_traverse(trans, &c_iter);
if (ret)
goto out;
@ -444,7 +450,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
!test_bit(JOURNAL_space_low, &c->journal.flags))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(&b_iter);
struct bkey_s_c btree_k = bch2_btree_iter_peek_slot(trans, &b_iter);
ret = bkey_err(btree_k);
if (ret)
goto err;

View File

@ -271,7 +271,7 @@ static int read_btree_nodes_worker(void *p)
err:
bio_put(bio);
free_page((unsigned long) buf);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
closure_put(w->cl);
kfree(w);
return 0;
@ -291,7 +291,7 @@ static int read_btree_nodes(struct find_btree_nodes *f)
struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
if (!w) {
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
ret = -ENOMEM;
goto err;
}
@ -303,14 +303,14 @@ static int read_btree_nodes(struct find_btree_nodes *f)
struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
ret = PTR_ERR_OR_ZERO(t);
if (ret) {
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
kfree(w);
bch_err_msg(c, ret, "starting kthread");
break;
}
closure_get(&cl);
percpu_ref_get(&ca->io_ref);
percpu_ref_get(&ca->io_ref[READ]);
wake_up_process(t);
}
err:

View File

@ -20,6 +20,7 @@
#include "snapshot.h"
#include <linux/prefetch.h>
#include <linux/string_helpers.h>
static const char * const trans_commit_flags_strs[] = {
#define x(n, ...) #n,
@ -366,7 +367,8 @@ static noinline void journal_transaction_name(struct btree_trans *trans)
struct jset_entry_log *l =
container_of(entry, struct jset_entry_log, entry);
strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64));
memcpy_and_pad(l->d, JSET_ENTRY_LOG_U64s * sizeof(u64),
trans->fn, strlen(trans->fn), 0);
}
static inline int btree_key_can_insert(struct btree_trans *trans,

View File

@ -367,7 +367,6 @@ static inline unsigned long btree_path_ip_allocated(struct btree_path *path)
* @nodes_intent_locked - bitmask indicating which locks are intent locks
*/
struct btree_iter {
struct btree_trans *trans;
btree_path_idx_t path;
btree_path_idx_t update_path;
btree_path_idx_t key_cache_path;
@ -478,6 +477,12 @@ struct btree_trans_paths {
struct btree_path paths[];
};
struct trans_kmalloc_trace {
unsigned long ip;
size_t bytes;
};
typedef DARRAY(struct trans_kmalloc_trace) darray_trans_kmalloc_trace;
struct btree_trans {
struct bch_fs *c;
@ -489,6 +494,9 @@ struct btree_trans {
void *mem;
unsigned mem_top;
unsigned mem_bytes;
#ifdef CONFIG_BCACHEFS_DEBUG
darray_trans_kmalloc_trace trans_kmalloc_trace;
#endif
btree_path_idx_t nr_sorted;
btree_path_idx_t nr_paths;

View File

@ -14,6 +14,8 @@
#include "snapshot.h"
#include "trace.h"
#include <linux/string_helpers.h>
static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l,
const struct btree_insert_entry *r)
{
@ -126,7 +128,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
struct bpos new_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter old_iter, new_iter = { NULL };
struct btree_iter old_iter, new_iter = {};
struct bkey_s_c old_k, new_k;
snapshot_id_list s;
struct bkey_i *update;
@ -140,7 +142,7 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
bch2_trans_iter_init(trans, &old_iter, id, old_pos,
BTREE_ITER_not_extents|
BTREE_ITER_all_snapshots);
while ((old_k = bch2_btree_iter_prev(&old_iter)).k &&
while ((old_k = bch2_btree_iter_prev(trans, &old_iter)).k &&
!(ret = bkey_err(old_k)) &&
bkey_eq(old_pos, old_k.k->p)) {
struct bpos whiteout_pos =
@ -296,7 +298,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
BTREE_ITER_intent|
BTREE_ITER_with_updates|
BTREE_ITER_not_extents);
k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX));
k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX));
if ((ret = bkey_err(k)))
goto err;
if (!k.k)
@ -322,8 +324,8 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
if (done)
goto out;
next:
bch2_btree_iter_advance(&iter);
k = bch2_btree_iter_peek_max(&iter, POS(insert->k.p.inode, U64_MAX));
bch2_btree_iter_advance(trans, &iter);
k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX));
if ((ret = bkey_err(k)))
goto err;
if (!k.k)
@ -592,13 +594,13 @@ int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
enum btree_id btree, struct bpos end)
{
bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent);
struct bkey_s_c k = bch2_btree_iter_peek_prev(iter);
struct bkey_s_c k = bch2_btree_iter_peek_prev(trans, iter);
int ret = bkey_err(k);
if (ret)
goto err;
bch2_btree_iter_advance(iter);
k = bch2_btree_iter_peek_slot(iter);
bch2_btree_iter_advance(trans, iter);
k = bch2_btree_iter_peek_slot(trans, iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -634,7 +636,7 @@ int bch2_btree_insert_nonextent(struct btree_trans *trans,
BTREE_ITER_cached|
BTREE_ITER_not_extents|
BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(&iter) ?:
ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_trans_update(trans, &iter, k, flags);
bch2_trans_iter_exit(trans, &iter);
return ret;
@ -646,7 +648,7 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id,
struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k),
BTREE_ITER_intent|flags);
int ret = bch2_btree_iter_traverse(&iter) ?:
int ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_trans_update(trans, &iter, k, flags);
bch2_trans_iter_exit(trans, &iter);
return ret;
@ -695,7 +697,7 @@ int bch2_btree_delete(struct btree_trans *trans,
bch2_trans_iter_init(trans, &iter, btree, pos,
BTREE_ITER_cached|
BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(&iter) ?:
ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_delete_at(trans, &iter, update_flags);
bch2_trans_iter_exit(trans, &iter);
@ -713,7 +715,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
int ret = 0;
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent);
while ((k = bch2_btree_iter_peek_max(&iter, end)).k) {
while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(trans->c, 0);
struct bkey_i delete;
@ -808,7 +810,7 @@ int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent);
int ret = bch2_btree_iter_traverse(&iter) ?:
int ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_bit_mod_iter(trans, &iter, set);
bch2_trans_iter_exit(trans, &iter);
return ret;
@ -829,7 +831,6 @@ int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
{
unsigned u64s = DIV_ROUND_UP(buf->pos, sizeof(u64));
prt_chars(buf, '\0', u64s * sizeof(u64) - buf->pos);
int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
if (ret)
@ -842,7 +843,7 @@ int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry);
journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s);
memcpy(l->d, buf->buf, buf->pos);
memcpy_and_pad(l->d, u64s * sizeof(u64), buf->buf, buf->pos, 0);
return 0;
}
@ -868,7 +869,6 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
prt_vprintf(&buf, fmt, args);
unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
prt_chars(&buf, '\0', u64s * sizeof(u64) - buf.pos);
int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0;
if (ret)
@ -881,7 +881,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries);
journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s);
memcpy(l->d, buf.buf, buf.pos);
memcpy_and_pad(l->d, u64s * sizeof(u64), buf.buf, buf.pos, 0);
c->journal.early_journal_entries.nr += jset_u64s(u64s);
} else {
ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags,

View File

@ -222,7 +222,7 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans)
trans->extra_disk_res = 0;
}
static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
static __always_inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
unsigned type, unsigned min_bytes)
{
unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k));
@ -245,7 +245,7 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t
return mut;
}
static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
static __always_inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
{
return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0);
}

View File

@ -2147,7 +2147,7 @@ static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter,
bch2_trans_node_iter_init(trans, iter, b->c.btree_id, b->key.k.p,
BTREE_MAX_DEPTH, b->c.level,
BTREE_ITER_intent);
int ret = bch2_btree_iter_traverse(iter);
int ret = bch2_btree_iter_traverse(trans, iter);
if (ret)
goto err;
@ -2239,7 +2239,7 @@ static int bch2_btree_node_rewrite_key(struct btree_trans *trans,
bch2_trans_node_iter_init(trans, &iter,
btree, k->k.p,
BTREE_MAX_DEPTH, level, 0);
struct btree *b = bch2_btree_iter_peek_node(&iter);
struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
int ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto out;
@ -2262,7 +2262,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans,
/* Traverse one depth lower to get a pointer to the node itself: */
struct btree_iter iter;
bch2_trans_node_iter_init(trans, &iter, btree, pos, 0, level - 1, 0);
struct btree *b = bch2_btree_iter_peek_node(&iter);
struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
int ret = PTR_ERR_OR_ZERO(b);
if (ret)
goto err;
@ -2406,7 +2406,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
bool skip_triggers)
{
struct bch_fs *c = trans->c;
struct btree_iter iter2 = { NULL };
struct btree_iter iter2 = {};
struct btree *parent;
int ret;
@ -2430,7 +2430,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
parent = btree_node_parent(btree_iter_path(trans, iter), b);
if (parent) {
bch2_trans_copy_iter(&iter2, iter);
bch2_trans_copy_iter(trans, &iter2, iter);
iter2.path = bch2_btree_path_make_mut(trans, iter2.path,
iter2.flags & BTREE_ITER_intent,
@ -2444,7 +2444,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
trans->paths_sorted = false;
ret = bch2_btree_iter_traverse(&iter2) ?:
ret = bch2_btree_iter_traverse(trans, &iter2) ?:
bch2_trans_update(trans, &iter2, new_key, BTREE_TRIGGER_norun);
if (ret)
goto err;

View File

@ -144,7 +144,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq);
EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
ret = bch2_btree_iter_traverse(iter);
ret = bch2_btree_iter_traverse(trans, iter);
if (ret)
return ret;
@ -208,7 +208,7 @@ btree_write_buffered_insert(struct btree_trans *trans,
trans->journal_res.seq = wb->journal_seq;
ret = bch2_btree_iter_traverse(&iter) ?:
ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_trans_update(trans, &iter, &wb->k,
BTREE_UPDATE_internal_snapshot_node);
bch2_trans_iter_exit(trans, &iter);
@ -285,7 +285,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_iter iter = { NULL };
struct btree_iter iter = {};
size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0;
bool write_locked = false;
bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags);
@ -368,7 +368,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
write_locked = false;
ret = lockrestart_do(trans,
bch2_btree_iter_traverse(&iter) ?:
bch2_btree_iter_traverse(trans, &iter) ?:
bch2_foreground_maybe_merge(trans, iter.path, 0,
BCH_WATERMARK_reclaim|
BCH_TRANS_COMMIT_journal_reclaim|
@ -385,7 +385,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
BTREE_ITER_intent|BTREE_ITER_all_snapshots);
}
bch2_btree_iter_set_pos(&iter, k->k.k.p);
bch2_btree_iter_set_pos(trans, &iter, k->k.k.p);
btree_iter_path(trans, &iter)->preserve = false;
bool accounting_accumulated = false;
@ -866,13 +866,18 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
darray_exit(&wb->inc.keys);
}
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
void bch2_fs_btree_write_buffer_init_early(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
mutex_init(&wb->inc.lock);
mutex_init(&wb->flushing.lock);
INIT_WORK(&wb->flush_work, bch2_btree_write_buffer_flush_work);
}
int bch2_fs_btree_write_buffer_init(struct bch_fs *c)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
/* Will be resized by journal as needed: */
unsigned initial_size = 1 << 16;

View File

@ -101,6 +101,7 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_t
int bch2_btree_write_buffer_resize(struct bch_fs *, size_t);
void bch2_fs_btree_write_buffer_exit(struct bch_fs *);
void bch2_fs_btree_write_buffer_init_early(struct bch_fs *);
int bch2_fs_btree_write_buffer_init(struct bch_fs *);
#endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */

View File

@ -29,6 +29,12 @@
#include <linux/preempt.h>
void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
{
for (unsigned i = 0; i < BCH_DATA_NR; i++)
usage->buckets[i] = percpu_u64_get(&ca->usage->d[i].buckets);
}
void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage)
{
memset(usage, 0, sizeof(*usage));
acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s());
@ -75,7 +81,7 @@ bch2_fs_usage_read_short(struct bch_fs *c)
void bch2_dev_usage_to_text(struct printbuf *out,
struct bch_dev *ca,
struct bch_dev_usage *usage)
struct bch_dev_usage_full *usage)
{
if (out->nr_tabstops < 5) {
printbuf_tabstops_reset(out);
@ -365,7 +371,7 @@ found:
struct btree_iter iter;
bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
BTREE_ITER_intent|BTREE_ITER_all_snapshots);
ret = bch2_btree_iter_traverse(&iter) ?:
ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_trans_update(trans, &iter, new,
BTREE_UPDATE_internal_snapshot_node|
BTREE_TRIGGER_norun);
@ -707,7 +713,7 @@ err:
struct disk_accounting_pos acc;
memset(&acc, 0, sizeof(acc));
acc.type = BCH_DISK_ACCOUNTING_replicas;
memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e));
unsafe_memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e), "VLA");
gc_stripe_unlock(m);
acc.replicas.data_type = data_type;
@ -1132,7 +1138,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c,
for_each_online_member(c, ca) {
int ret = bch2_trans_mark_dev_sb(c, ca, flags);
if (ret) {
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
return ret;
}
}
@ -1331,7 +1337,7 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
{
ca->usage = alloc_percpu(struct bch_dev_usage);
ca->usage = alloc_percpu(struct bch_dev_usage_full);
if (!ca->usage)
return -BCH_ERR_ENOMEM_usage_init;

View File

@ -172,7 +172,16 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
return ret;
}
void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *);
void bch2_dev_usage_full_read_fast(struct bch_dev *, struct bch_dev_usage_full *);
static inline struct bch_dev_usage_full bch2_dev_usage_full_read(struct bch_dev *ca)
{
struct bch_dev_usage_full ret;
bch2_dev_usage_full_read_fast(ca, &ret);
return ret;
}
void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage_full *);
static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark)
{
@ -207,7 +216,7 @@ static inline u64 dev_buckets_free(struct bch_dev *ca,
enum bch_watermark watermark)
{
return max_t(s64, 0,
usage.d[BCH_DATA_free].buckets -
usage.buckets[BCH_DATA_free]-
ca->nr_open_buckets -
bch2_dev_buckets_reserved(ca, watermark));
}
@ -217,10 +226,10 @@ static inline u64 __dev_buckets_available(struct bch_dev *ca,
enum bch_watermark watermark)
{
return max_t(s64, 0,
usage.d[BCH_DATA_free].buckets
+ usage.d[BCH_DATA_cached].buckets
+ usage.d[BCH_DATA_need_gc_gens].buckets
+ usage.d[BCH_DATA_need_discard].buckets
usage.buckets[BCH_DATA_free]
+ usage.buckets[BCH_DATA_cached]
+ usage.buckets[BCH_DATA_need_gc_gens]
+ usage.buckets[BCH_DATA_need_discard]
- ca->nr_open_buckets
- bch2_dev_buckets_reserved(ca, watermark));
}

View File

@ -54,7 +54,12 @@ struct bucket_gens {
u8 b[] __counted_by(nbuckets);
};
/* Only info on bucket countns: */
struct bch_dev_usage {
u64 buckets[BCH_DATA_NR];
};
struct bch_dev_usage_full {
struct bch_dev_usage_type {
u64 buckets;
u64 sectors; /* _compressed_ sectors: */

View File

@ -350,8 +350,8 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
if (ctx->arg.op == BCH_DATA_OP_scrub) {
struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev);
if (ca) {
struct bch_dev_usage u;
bch2_dev_usage_read_fast(ca, &u);
struct bch_dev_usage_full u;
bch2_dev_usage_full_read_fast(ca, &u);
for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++)
if (ctx->arg.scrub.data_types & BIT(i))
e.p.sectors_total += u.d[i].sectors;
@ -473,7 +473,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
struct bch_ioctl_dev_usage __user *user_arg)
{
struct bch_ioctl_dev_usage arg;
struct bch_dev_usage src;
struct bch_dev_usage_full src;
struct bch_dev *ca;
unsigned i;
@ -493,7 +493,7 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
if (IS_ERR(ca))
return PTR_ERR(ca);
src = bch2_dev_usage_read(ca);
src = bch2_dev_usage_full_read(ca);
arg.state = ca->mi.state;
arg.bucket_size = ca->mi.bucket_size;
@ -514,7 +514,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
struct bch_ioctl_dev_usage_v2 __user *user_arg)
{
struct bch_ioctl_dev_usage_v2 arg;
struct bch_dev_usage src;
struct bch_dev_usage_full src;
struct bch_dev *ca;
int ret = 0;
@ -534,7 +534,7 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
if (IS_ERR(ca))
return PTR_ERR(ca);
src = bch2_dev_usage_read(ca);
src = bch2_dev_usage_full_read(ca);
arg.state = ca->mi.state;
arg.bucket_size = ca->mi.bucket_size;
@ -615,7 +615,7 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
for_each_online_member(c, ca)
if (ca->dev == dev) {
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
return ca->dev_idx;
}

View File

@ -371,13 +371,14 @@ static int attempt_compress(struct bch_fs *c,
};
zlib_set_workspace(&strm, workspace);
zlib_deflateInit2(&strm,
if (zlib_deflateInit2(&strm,
compression.level
? clamp_t(unsigned, compression.level,
Z_BEST_SPEED, Z_BEST_COMPRESSION)
: Z_DEFAULT_COMPRESSION,
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
Z_DEFAULT_STRATEGY);
Z_DEFAULT_STRATEGY) != Z_OK)
return 0;
if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
return 0;
@ -713,7 +714,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
ret = match_string(bch2_compression_opts, -1, type_str);
if (ret < 0 && err)
prt_str(err, "invalid compression type");
prt_str(err, "invalid compression type\n");
if (ret < 0)
goto err;
@ -728,7 +729,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
if (!ret && level > 15)
ret = -EINVAL;
if (ret < 0 && err)
prt_str(err, "invalid compression level");
prt_str(err, "invalid compression level\n");
if (ret < 0)
goto err;

View File

@ -20,7 +20,17 @@ struct { \
#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
typedef DARRAY(char) darray_char;
typedef DARRAY(char *) darray_str;
typedef DARRAY(char *) darray_str;
typedef DARRAY(u8) darray_u8;
typedef DARRAY(u16) darray_u16;
typedef DARRAY(u32) darray_u32;
typedef DARRAY(u64) darray_u64;
typedef DARRAY(s8) darray_s8;
typedef DARRAY(s16) darray_s16;
typedef DARRAY(s32) darray_s32;
typedef DARRAY(s64) darray_s64;
int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t);

View File

@ -216,7 +216,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
bch2_trans_begin(trans);
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -398,7 +398,7 @@ restart_drop_extra_replicas:
BCH_TRANS_COMMIT_no_enospc|
m->data_opts.btree_insert_flags);
if (!ret) {
bch2_btree_iter_set_pos(&iter, next_pos);
bch2_btree_iter_set_pos(trans, &iter, next_pos);
this_cpu_add(c->counters[BCH_COUNTER_io_move_finish], new->k.size);
if (trace_io_move_finish_enabled())
@ -426,7 +426,7 @@ nowork:
count_event(c, io_move_fail);
bch2_btree_iter_advance(&iter);
bch2_btree_iter_advance(trans, &iter);
goto next;
}
out:
@ -497,7 +497,7 @@ static int bch2_update_unwritten_extent(struct btree_trans *trans,
bch2_trans_iter_init(trans, &iter, update->btree_id, update->op.pos,
BTREE_ITER_slots);
ret = lockrestart_do(trans, ({
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(trans, &iter);
bkey_err(k);
}));
bch2_trans_iter_exit(trans, &iter);
@ -607,7 +607,7 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update
prt_newline(out);
printbuf_indent_add(out, 2);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
prt_printf(out, "read_done:\t\%u\n", m->read_done);
prt_printf(out, "read_done:\t%u\n", m->read_done);
bch2_write_op_to_text(out, &m->op);
printbuf_indent_sub(out, 2);
}

View File

@ -57,7 +57,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
submit_bio_wait(bio);
bio_put(bio);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
@ -297,7 +297,7 @@ out:
if (bio)
bio_put(bio);
kvfree(n_ondisk);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
}
#ifdef CONFIG_DEBUG_FS
@ -770,6 +770,12 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
mutex_lock(&s->lock);
prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem);
#ifdef CONFIG_BCACHEFS_DEBUG
printbuf_indent_add(&i->buf, 2);
bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
printbuf_indent_sub(&i->buf, 2);
#endif
prt_printf(&i->buf, "Transaction duration:\n");
printbuf_indent_add(&i->buf, 2);
@ -927,7 +933,11 @@ void bch2_fs_debug_init(struct bch_fs *c)
if (IS_ERR_OR_NULL(bch_debug))
return;
snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
if (!c->opts.single_device)
snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
else
strscpy(name, c->name, sizeof(name));
c->fs_debug_dir = debugfs_create_dir(name, bch_debug);
if (IS_ERR_OR_NULL(c->fs_debug_dir))
return;

View File

@ -287,8 +287,8 @@ static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
EBUG_ON(!dirent->v.d_casefold);
EBUG_ON(!cf_name->len);
dirent->v.d_cf_name_block.d_name_len = name->len;
dirent->v.d_cf_name_block.d_cf_name_len = cf_name->len;
dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len);
dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_name->len);
memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len);
memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0,
@ -417,8 +417,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
enum bch_rename_mode mode)
{
struct qstr src_name_lookup, dst_name_lookup;
struct btree_iter src_iter = { NULL };
struct btree_iter dst_iter = { NULL };
struct btree_iter src_iter = {};
struct btree_iter dst_iter = {};
struct bkey_s_c old_src, old_dst = bkey_s_c_null;
struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
struct bpos dst_pos =
@ -586,16 +586,16 @@ out_set_src:
}
if (delete_src) {
bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
ret = bch2_btree_iter_traverse(&src_iter) ?:
bch2_btree_iter_set_snapshot(trans, &src_iter, old_src.k->p.snapshot);
ret = bch2_btree_iter_traverse(trans, &src_iter) ?:
bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_internal_snapshot_node);
if (ret)
goto out;
}
if (delete_dst) {
bch2_btree_iter_set_snapshot(&dst_iter, old_dst.k->p.snapshot);
ret = bch2_btree_iter_traverse(&dst_iter) ?:
bch2_btree_iter_set_snapshot(trans, &dst_iter, old_dst.k->p.snapshot);
ret = bch2_btree_iter_traverse(trans, &dst_iter) ?:
bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_internal_snapshot_node);
if (ret)
goto out;
@ -642,7 +642,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
const struct qstr *name, subvol_inum *inum)
{
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct btree_iter iter = {};
int ret = lockrestart_do(trans,
bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
@ -771,7 +771,7 @@ int bch2_fsck_remove_dirent(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(&iter) ?:
ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
&dir_hash_info, &iter,
BTREE_UPDATE_internal_snapshot_node);

View File

@ -739,7 +739,7 @@ int bch2_accounting_read(struct bch_fs *c)
struct disk_accounting_pos next;
memset(&next, 0, sizeof(next));
next.type = acc_k.type + 1;
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next));
continue;
}
@ -930,7 +930,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
struct disk_accounting_pos next;
memset(&next, 0, sizeof(next));
next.type = acc_k.type + 1;
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next));
continue;
}

View File

@ -555,9 +555,9 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
? rcu_dereference(c->devs[t.dev])
: NULL;
if (ca && percpu_ref_tryget(&ca->io_ref)) {
if (ca && percpu_ref_tryget(&ca->io_ref[READ])) {
prt_printf(out, "/dev/%s", ca->name);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
} else if (ca) {
prt_printf(out, "offline device %u", t.dev);
} else {

View File

@ -105,6 +105,7 @@ struct ec_bio {
struct bch_dev *ca;
struct ec_stripe_buf *buf;
size_t idx;
int rw;
u64 submit_time;
struct bio bio;
};
@ -462,7 +463,8 @@ int bch2_trigger_stripe(struct btree_trans *trans,
return ret;
if (gc)
memcpy(&gc->r.e, &acc.replicas, replicas_entry_bytes(&acc.replicas));
unsafe_memcpy(&gc->r.e, &acc.replicas,
replicas_entry_bytes(&acc.replicas), "VLA");
}
if (old_s) {
@ -703,6 +705,7 @@ static void ec_block_endio(struct bio *bio)
struct bch_extent_ptr *ptr = &v->ptrs[ec_bio->idx];
struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private;
int rw = ec_bio->rw;
bch2_account_io_completion(ca, bio_data_dir(bio),
ec_bio->submit_time, !bio->bi_status);
@ -724,7 +727,7 @@ static void ec_block_endio(struct bio *bio)
}
bio_put(&ec_bio->bio);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[rw]);
closure_put(cl);
}
@ -775,6 +778,7 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
ec_bio->ca = ca;
ec_bio->buf = buf;
ec_bio->idx = idx;
ec_bio->rw = rw;
ec_bio->submit_time = local_clock();
ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9);
@ -784,14 +788,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
closure_get(cl);
percpu_ref_get(&ca->io_ref);
percpu_ref_get(&ca->io_ref[rw]);
submit_bio(&ec_bio->bio);
offset += b;
}
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[rw]);
}
static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
@ -1264,7 +1268,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
ob->sectors_free,
GFP_KERNEL, 0);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[WRITE]);
if (ret)
s->err = ret;
@ -1712,23 +1716,32 @@ err:
}
static int new_stripe_alloc_buckets(struct btree_trans *trans,
struct alloc_request *req,
struct ec_stripe_head *h, struct ec_stripe_new *s,
enum bch_watermark watermark, struct closure *cl)
struct closure *cl)
{
struct bch_fs *c = trans->c;
struct bch_devs_mask devs = h->devs;
struct open_bucket *ob;
struct open_buckets buckets;
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
bool have_cache = true;
int ret = 0;
req->scratch_data_type = req->data_type;
req->scratch_ptrs = req->ptrs;
req->scratch_nr_replicas = req->nr_replicas;
req->scratch_nr_effective = req->nr_effective;
req->scratch_have_cache = req->have_cache;
req->scratch_devs_may_alloc = req->devs_may_alloc;
req->devs_may_alloc = h->devs;
req->have_cache = true;
BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity);
BUG_ON(v->nr_redundant != s->nr_parity);
/* * We bypass the sector allocator which normally does this: */
bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
bitmap_and(req->devs_may_alloc.d, req->devs_may_alloc.d,
c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) {
/*
@ -1738,7 +1751,7 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans,
* block when updating the stripe
*/
if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID)
__clear_bit(v->ptrs[i].dev, devs.d);
__clear_bit(v->ptrs[i].dev, req->devs_may_alloc.d);
if (i < s->nr_data)
nr_have_data++;
@ -1749,60 +1762,58 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans,
BUG_ON(nr_have_data > s->nr_data);
BUG_ON(nr_have_parity > s->nr_parity);
buckets.nr = 0;
req->ptrs.nr = 0;
if (nr_have_parity < s->nr_parity) {
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
&h->parity_stripe,
&devs,
s->nr_parity,
&nr_have_parity,
&have_cache, 0,
BCH_DATA_parity,
watermark,
cl);
req->nr_replicas = s->nr_parity;
req->nr_effective = nr_have_parity;
req->data_type = BCH_DATA_parity;
open_bucket_for_each(c, &buckets, ob, i) {
ret = bch2_bucket_alloc_set_trans(trans, req, &h->parity_stripe, cl);
open_bucket_for_each(c, &req->ptrs, ob, i) {
j = find_next_zero_bit(s->blocks_gotten,
s->nr_data + s->nr_parity,
s->nr_data);
BUG_ON(j >= s->nr_data + s->nr_parity);
s->blocks[j] = buckets.v[i];
s->blocks[j] = req->ptrs.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
__set_bit(j, s->blocks_gotten);
}
if (ret)
return ret;
goto err;
}
buckets.nr = 0;
req->ptrs.nr = 0;
if (nr_have_data < s->nr_data) {
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
&h->block_stripe,
&devs,
s->nr_data,
&nr_have_data,
&have_cache, 0,
BCH_DATA_user,
watermark,
cl);
req->nr_replicas = s->nr_data;
req->nr_effective = nr_have_data;
req->data_type = BCH_DATA_user;
open_bucket_for_each(c, &buckets, ob, i) {
ret = bch2_bucket_alloc_set_trans(trans, req, &h->block_stripe, cl);
open_bucket_for_each(c, &req->ptrs, ob, i) {
j = find_next_zero_bit(s->blocks_gotten,
s->nr_data, 0);
BUG_ON(j >= s->nr_data);
s->blocks[j] = buckets.v[i];
s->blocks[j] = req->ptrs.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
__set_bit(j, s->blocks_gotten);
}
if (ret)
return ret;
goto err;
}
return 0;
err:
req->data_type = req->scratch_data_type;
req->ptrs = req->scratch_ptrs;
req->nr_replicas = req->scratch_nr_replicas;
req->nr_effective = req->scratch_nr_effective;
req->have_cache = req->scratch_have_cache;
req->devs_may_alloc = req->scratch_devs_may_alloc;
return ret;
}
static int __get_existing_stripe(struct btree_trans *trans,
@ -1836,7 +1847,7 @@ static int __get_existing_stripe(struct btree_trans *trans,
ret = 1;
}
out:
bch2_set_btree_iter_dontneed(&iter);
bch2_set_btree_iter_dontneed(trans, &iter);
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
@ -1949,7 +1960,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st
if (bkey_gt(k.k->p, POS(0, U32_MAX))) {
if (start_pos.offset) {
start_pos = min_pos;
bch2_btree_iter_set_pos(&iter, start_pos);
bch2_btree_iter_set_pos(trans, &iter, start_pos);
continue;
}
@ -1983,17 +1994,15 @@ err:
}
struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
unsigned target,
struct alloc_request *req,
unsigned algo,
unsigned redundancy,
enum bch_watermark watermark,
struct closure *cl)
{
struct bch_fs *c = trans->c;
struct ec_stripe_head *h;
bool waiting = false;
unsigned redundancy = req->nr_replicas - 1;
unsigned disk_label = 0;
struct target t = target_decode(target);
struct target t = target_decode(req->target);
bool waiting = false;
int ret;
if (t.type == TARGET_GROUP) {
@ -2004,7 +2013,9 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
disk_label = t.group + 1; /* 0 == no label */
}
h = __bch2_ec_stripe_head_get(trans, disk_label, algo, redundancy, watermark);
struct ec_stripe_head *h =
__bch2_ec_stripe_head_get(trans, disk_label, algo,
redundancy, req->watermark);
if (IS_ERR_OR_NULL(h))
return h;
@ -2028,8 +2039,12 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
goto alloc_existing;
/* First, try to allocate a full stripe: */
ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?:
enum bch_watermark saved_watermark = BCH_WATERMARK_stripe;
swap(req->watermark, saved_watermark);
ret = new_stripe_alloc_buckets(trans, req, h, s, NULL) ?:
__bch2_ec_stripe_head_reserve(trans, h, s);
swap(req->watermark, saved_watermark);
if (!ret)
goto allocate_buf;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
@ -2047,8 +2062,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
goto err;
if (watermark == BCH_WATERMARK_copygc) {
ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?:
if (req->watermark == BCH_WATERMARK_copygc) {
ret = new_stripe_alloc_buckets(trans, req, h, s, NULL) ?:
__bch2_ec_stripe_head_reserve(trans, h, s);
if (ret)
goto err;
@ -2067,7 +2082,7 @@ alloc_existing:
* Retry allocating buckets, with the watermark for this
* particular write:
*/
ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl);
ret = new_stripe_alloc_buckets(trans, req, h, s, cl);
if (ret)
goto err;

View File

@ -254,9 +254,10 @@ void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *, int);
int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
struct alloc_request;
struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *,
unsigned, unsigned, unsigned,
enum bch_watermark, struct closure *);
struct alloc_request *, unsigned, struct closure *);
void bch2_do_stripe_deletes(struct bch_fs *);
void bch2_ec_do_stripe_creates(struct bch_fs *);

View File

@ -192,6 +192,8 @@
x(BCH_ERR_data_update_done, data_update_done_no_rw_devs) \
x(EINVAL, device_state_not_allowed) \
x(EINVAL, member_info_missing) \
x(EINVAL, single_device_filesystem) \
x(EINVAL, not_single_device_filesystem) \
x(EINVAL, mismatched_block_size) \
x(EINVAL, block_size_too_small) \
x(EINVAL, bucket_size_too_small) \
@ -211,6 +213,7 @@
x(EINVAL, inode_unpack_error) \
x(EINVAL, varint_decode_error) \
x(EINVAL, erasure_coding_found_btree_node) \
x(EINVAL, option_negative) \
x(EOPNOTSUPP, may_not_use_incompat_feature) \
x(EROFS, erofs_trans_commit) \
x(EROFS, erofs_no_writes) \

View File

@ -34,7 +34,7 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out)
journal_cur_seq(&c->journal));
return true;
case BCH_ON_ERROR_panic:
bch2_print_string_as_lines(KERN_ERR, out->buf);
bch2_print_string_as_lines_nonblocking(KERN_ERR, out->buf);
panic(bch2_fmt(c, "panic after error"));
return true;
default:
@ -45,6 +45,8 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out)
bool bch2_inconsistent_error(struct bch_fs *c)
{
struct printbuf buf = PRINTBUF;
buf.atomic++;
printbuf_indent_add_nextline(&buf, 2);
bool ret = __bch2_inconsistent_error(c, &buf);
@ -59,6 +61,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra
const char *fmt, va_list args)
{
struct printbuf buf = PRINTBUF;
buf.atomic++;
bch2_log_msg_start(c, &buf);
@ -68,7 +71,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra
if (trans)
bch2_trans_updates_to_text(&buf, trans);
bool ret = __bch2_inconsistent_error(c, &buf);
bch2_print_string_as_lines(KERN_ERR, buf.buf);
bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf);
printbuf_exit(&buf);
return ret;

View File

@ -112,7 +112,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
unsigned nr_iters = 0;
int ret;
ret = bch2_btree_iter_traverse(iter);
ret = bch2_btree_iter_traverse(trans, iter);
if (ret)
return ret;
@ -126,9 +126,9 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
if (ret < 0)
return ret;
bch2_trans_copy_iter(&copy, iter);
bch2_trans_copy_iter(trans, &copy, iter);
for_each_btree_key_max_continue_norestart(copy, insert->k.p, 0, k, ret) {
for_each_btree_key_max_continue_norestart(trans, copy, insert->k.p, 0, k, ret) {
unsigned offset = 0;
if (bkey_gt(bkey_start_pos(&insert->k), bkey_start_pos(k.k)))

View File

@ -136,12 +136,8 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (k.k->type == KEY_TYPE_error)
return -BCH_ERR_key_type_error;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
return -BCH_ERR_extent_poisened;
rcu_read_lock();
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
u64 pick_latency;

View File

@ -183,12 +183,12 @@ static void bchfs_read(struct btree_trans *trans,
if (ret)
goto err;
bch2_btree_iter_set_snapshot(&iter, snapshot);
bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
bch2_btree_iter_set_pos(&iter,
bch2_btree_iter_set_pos(trans, &iter,
POS(inum.inum, rbio->bio.bi_iter.bi_sector));
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -225,11 +225,26 @@ static void bchfs_read(struct btree_trans *trans,
bch2_read_extent(trans, rbio, iter.pos,
data_btree, k, offset_into_extent, flags);
swap(rbio->bio.bi_iter.bi_size, bytes);
/*
* Careful there's a landmine here if bch2_read_extent() ever
* starts returning transaction restarts here.
*
* We've changed rbio->bi_iter.bi_size to be "bytes we can read
* from this extent" with the swap call, and we restore it
* below. That restore needs to come before checking for
* errors.
*
* But unlike __bch2_read(), we use the rbio bvec iter, not one
* on the stack, so we can't do the restore right after the
* bch2_read_extent() call: we don't own that iterator anymore
* if BCH_READ_last_fragment is set, since we may have submitted
* that rbio instead of cloning it.
*/
if (flags & BCH_READ_last_fragment)
break;
swap(rbio->bio.bi_iter.bi_size, bytes);
bio_advance(&rbio->bio, bytes);
err:
if (ret &&

View File

@ -48,7 +48,7 @@ static void nocow_flush_endio(struct bio *_bio)
struct nocow_flush *bio = container_of(_bio, struct nocow_flush, bio);
closure_put(bio->cl);
percpu_ref_put(&bio->ca->io_ref);
percpu_ref_put(&bio->ca->io_ref[WRITE]);
bio_put(&bio->bio);
}
@ -71,7 +71,7 @@ void bch2_inode_flush_nocow_writes_async(struct bch_fs *c,
for_each_set_bit(dev, devs.d, BCH_SB_MEMBERS_MAX) {
rcu_read_lock();
ca = rcu_dereference(c->devs[dev]);
if (ca && !percpu_ref_tryget(&ca->io_ref))
if (ca && !percpu_ref_tryget(&ca->io_ref[WRITE]))
ca = NULL;
rcu_read_unlock();
@ -241,6 +241,7 @@ out:
if (!ret)
ret = err;
bch_err_fn(c, ret);
return ret;
}
@ -636,9 +637,9 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
if (ret)
goto bkey_err;
bch2_btree_iter_set_snapshot(&iter, snapshot);
bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(trans, &iter);
if ((ret = bkey_err(k)))
goto bkey_err;
@ -649,13 +650,13 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
/* already reserved */
if (bkey_extent_is_reservation(k) &&
bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
bch2_btree_iter_advance(&iter);
bch2_btree_iter_advance(trans, &iter);
continue;
}
if (bkey_extent_is_data(k.k) &&
!(mode & FALLOC_FL_ZERO_RANGE)) {
bch2_btree_iter_advance(&iter);
bch2_btree_iter_advance(trans, &iter);
continue;
}
@ -676,7 +677,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
if (ret)
goto bkey_err;
}
bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, hole_start));
if (ret)
goto bkey_err;

View File

@ -88,7 +88,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
void *p, unsigned fields)
{
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct btree_iter iter = {};
struct bch_inode_unpacked inode_u;
int ret;
retry:
@ -1075,7 +1075,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_qid qid;
struct btree_trans *trans;
struct btree_iter inode_iter = { NULL };
struct btree_iter inode_iter = {};
struct bch_inode_unpacked inode_u;
struct posix_acl *acl = NULL;
kuid_t kuid;
@ -1330,9 +1330,9 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
if (ret)
continue;
bch2_btree_iter_set_snapshot(&iter, snapshot);
bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
k = bch2_btree_iter_peek_max(&iter, end);
k = bch2_btree_iter_peek_max(trans, &iter, end);
ret = bkey_err(k);
if (ret)
continue;
@ -1342,7 +1342,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
if (!bkey_extent_is_data(k.k) &&
k.k->type != KEY_TYPE_reservation) {
bch2_btree_iter_advance(&iter);
bch2_btree_iter_advance(trans, &iter);
continue;
}
@ -1380,7 +1380,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bkey_copy(prev.k, cur.k);
have_extent = true;
bch2_btree_iter_set_pos(&iter,
bch2_btree_iter_set_pos(trans, &iter,
POS(iter.pos.inode, iter.pos.offset + sectors));
}
bch2_trans_iter_exit(trans, &iter);
@ -1697,17 +1697,17 @@ retry:
if (ret)
goto err;
bch2_btree_iter_set_snapshot(&iter1, snapshot);
bch2_btree_iter_set_snapshot(&iter2, snapshot);
bch2_btree_iter_set_snapshot(trans, &iter1, snapshot);
bch2_btree_iter_set_snapshot(trans, &iter2, snapshot);
ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
if (ret)
goto err;
if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
bch2_btree_iter_set_pos(trans, &iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
k = bch2_btree_iter_peek_slot(&iter1);
k = bch2_btree_iter_peek_slot(trans, &iter1);
ret = bkey_err(k);
if (ret)
goto err;
@ -1731,7 +1731,7 @@ retry:
* File with multiple hardlinks and our backref is to the wrong
* directory - linear search:
*/
for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
for_each_btree_key_continue_norestart(trans, iter2, 0, k, ret) {
if (k.k->p.inode > dir->ei_inode.bi_inum)
break;
@ -2237,7 +2237,7 @@ got_sb:
/* XXX: create an anonymous device for multi device filesystems */
sb->s_bdev = bdev;
sb->s_dev = bdev->bd_dev;
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
break;
}

View File

@ -186,7 +186,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
{
struct bch_fs *c = trans->c;
struct qstr lostfound_str = QSTR("lost+found");
struct btree_iter lostfound_iter = { NULL };
struct btree_iter lostfound_iter = {};
u64 inum = 0;
unsigned d_type = 0;
int ret;
@ -295,8 +295,8 @@ create_lostfound:
if (ret)
goto err;
bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot);
ret = bch2_btree_iter_traverse(&lostfound_iter);
bch2_btree_iter_set_snapshot(trans, &lostfound_iter, snapshot);
ret = bch2_btree_iter_traverse(trans, &lostfound_iter);
if (ret)
goto err;
@ -544,7 +544,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub
new_inode.bi_subvol = subvolid;
int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?:
bch2_btree_iter_traverse(&inode_iter) ?:
bch2_btree_iter_traverse(trans, &inode_iter) ?:
bch2_inode_write(trans, &inode_iter, &new_inode);
bch2_trans_iter_exit(trans, &inode_iter);
if (ret)
@ -609,7 +609,7 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32
struct btree_iter iter = {};
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0);
struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0));
struct bkey_s_c k = bch2_btree_iter_peek_prev_min(trans, &iter, POS(inum, 0));
bch2_trans_iter_exit(trans, &iter);
int ret = bkey_err(k);
if (ret)
@ -1557,7 +1557,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
struct btree_iter iter1, iter2 = { NULL };
struct btree_iter iter1, iter2 = {};
struct bkey_s_c k1, k2;
int ret;
@ -1566,7 +1566,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
bch2_trans_iter_init(trans, &iter1, btree, pos1,
BTREE_ITER_all_snapshots|
BTREE_ITER_not_extents);
k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX));
k1 = bch2_btree_iter_peek_max(trans, &iter1, POS(pos1.inode, U64_MAX));
ret = bkey_err(k1);
if (ret)
goto err;
@ -1586,12 +1586,12 @@ static int overlapping_extents_found(struct btree_trans *trans,
goto err;
}
bch2_trans_copy_iter(&iter2, &iter1);
bch2_trans_copy_iter(trans, &iter2, &iter1);
while (1) {
bch2_btree_iter_advance(&iter2);
bch2_btree_iter_advance(trans, &iter2);
k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX));
k2 = bch2_btree_iter_peek_max(trans, &iter2, POS(pos1.inode, U64_MAX));
ret = bkey_err(k2);
if (ret)
goto err;
@ -1791,9 +1791,9 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
struct btree_iter iter2;
bch2_trans_copy_iter(&iter2, iter);
bch2_btree_iter_set_snapshot(&iter2, i->snapshot);
ret = bch2_btree_iter_traverse(&iter2) ?:
bch2_trans_copy_iter(trans, &iter2, iter);
bch2_btree_iter_set_snapshot(trans, &iter2, i->snapshot);
ret = bch2_btree_iter_traverse(trans, &iter2) ?:
bch2_btree_delete_at(trans, &iter2,
BTREE_UPDATE_internal_snapshot_node);
bch2_trans_iter_exit(trans, &iter2);
@ -2185,7 +2185,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
BTREE_ID_dirents,
SPOS(k.k->p.inode, k.k->p.offset, *i),
BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(&delete_iter) ?:
ret = bch2_btree_iter_traverse(trans, &delete_iter) ?:
bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
hash_info,
&delete_iter,
@ -2366,8 +2366,6 @@ int bch2_check_root(struct bch_fs *c)
return ret;
}
typedef DARRAY(u32) darray_u32;
static bool darray_u32_has(darray_u32 *d, u32 v)
{
darray_for_each(*d, i)
@ -2412,7 +2410,7 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
bch2_trans_iter_exit(trans, &parent_iter);
bch2_trans_iter_init(trans, &parent_iter,
BTREE_ID_subvolumes, POS(0, parent), 0);
k = bch2_btree_iter_peek_slot(&parent_iter);
k = bch2_btree_iter_peek_slot(trans, &parent_iter);
ret = bkey_err(k);
if (ret)
goto err;

View File

@ -940,7 +940,7 @@ int bch2_inode_create(struct btree_trans *trans,
BTREE_ITER_intent);
struct bkey_s_c k;
again:
while ((k = bch2_btree_iter_peek(iter)).k &&
while ((k = bch2_btree_iter_peek(trans, iter)).k &&
!(ret = bkey_err(k)) &&
bkey_lt(k.k->p, POS(0, max))) {
if (pos < iter->pos.offset)
@ -951,7 +951,7 @@ again:
* we've found just one:
*/
pos = iter->pos.offset + 1;
bch2_btree_iter_set_pos(iter, POS(0, pos));
bch2_btree_iter_set_pos(trans, iter, POS(0, pos));
}
if (!ret && pos < max)
@ -967,12 +967,12 @@ again:
/* Retry from start */
pos = start = min;
bch2_btree_iter_set_pos(iter, POS(0, pos));
bch2_btree_iter_set_pos(trans, iter, POS(0, pos));
le32_add_cpu(&cursor->v.gen, 1);
goto again;
found_slot:
bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
k = bch2_btree_iter_peek_slot(iter);
bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, snapshot));
k = bch2_btree_iter_peek_slot(trans, iter);
ret = bkey_err(k);
if (ret) {
bch2_trans_iter_exit(trans, iter);
@ -1009,9 +1009,9 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
if (ret)
goto err;
bch2_btree_iter_set_snapshot(&iter, snapshot);
bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
k = bch2_btree_iter_peek_max(&iter, end);
k = bch2_btree_iter_peek_max(trans, &iter, end);
ret = bkey_err(k);
if (ret)
goto err;
@ -1042,7 +1042,7 @@ err:
int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
{
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
struct btree_iter iter = {};
struct bkey_s_c k;
u32 snapshot;
int ret;
@ -1207,7 +1207,7 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i
static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot)
{
struct bch_fs *c = trans->c;
struct btree_iter iter = { NULL };
struct btree_iter iter = {};
struct bkey_i_inode_generation delete;
struct bch_inode_unpacked inode_u;
struct bkey_s_c k;

View File

@ -43,7 +43,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
bch2_bkey_buf_init(&new);
closure_init_stack(&cl);
k = bch2_btree_iter_peek_slot(iter);
k = bch2_btree_iter_peek_slot(trans, iter);
ret = bkey_err(k);
if (ret)
return ret;
@ -164,12 +164,12 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
if (ret)
continue;
bch2_btree_iter_set_snapshot(iter, snapshot);
bch2_btree_iter_set_snapshot(trans, iter, snapshot);
/*
* peek_max() doesn't have ideal semantics for extents:
*/
k = bch2_btree_iter_peek_max(iter, end_pos);
k = bch2_btree_iter_peek_max(trans, iter, end_pos);
if (!k.k)
break;
@ -230,7 +230,7 @@ static int truncate_set_isize(struct btree_trans *trans,
u64 new_i_size,
bool warn)
{
struct btree_iter iter = { NULL };
struct btree_iter iter = {};
struct bch_inode_unpacked inode_u;
int ret;
@ -399,7 +399,7 @@ case LOGGED_OP_FINSERT_start:
if (ret)
goto err;
} else {
bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset));
bch2_btree_iter_set_pos(trans, &iter, POS(inum.inum, src_offset));
ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta);
if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
@ -425,12 +425,12 @@ case LOGGED_OP_FINSERT_shift_extents:
if (ret)
goto btree_err;
bch2_btree_iter_set_snapshot(&iter, snapshot);
bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot));
bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
bch2_btree_iter_set_pos(trans, &iter, SPOS(inum.inum, pos, snapshot));
k = insert
? bch2_btree_iter_peek_prev_min(&iter, POS(inum.inum, 0))
: bch2_btree_iter_peek_max(&iter, POS(inum.inum, U64_MAX));
? bch2_btree_iter_peek_prev_min(trans, &iter, POS(inum.inum, 0))
: bch2_btree_iter_peek_max(trans, &iter, POS(inum.inum, U64_MAX));
if ((ret = bkey_err(k)))
goto btree_err;

View File

@ -25,6 +25,7 @@
#include "subvolume.h"
#include "trace.h"
#include <linux/moduleparam.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
@ -34,6 +35,12 @@ module_param_named(read_corrupt_ratio, bch2_read_corrupt_ratio, uint, 0644);
MODULE_PARM_DESC(read_corrupt_ratio, "");
#endif
static bool bch2_poison_extents_on_checksum_error;
module_param_named(poison_extents_on_checksum_error,
bch2_poison_extents_on_checksum_error, bool, 0644);
MODULE_PARM_DESC(poison_extents_on_checksum_error,
"Extents with checksum errors are marked as poisoned - unsafe without read fua support");
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
static bool bch2_target_congested(struct bch_fs *c, u16 target)
@ -296,6 +303,13 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
bool *read_full,
struct bch_io_failures *failed)
{
/*
* We're in the retry path, but we don't know what to repair yet, and we
* don't want to do a promote here:
*/
if (failed && !failed->nr)
return NULL;
struct bch_fs *c = trans->c;
/*
* if failed != NULL we're not actually doing a promote, we're
@ -394,7 +408,7 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
if (rbio->have_ioref) {
struct bch_dev *ca = bch2_dev_have_ref(rbio->c, rbio->pick.ptr.dev);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
}
if (rbio->split) {
@ -430,6 +444,74 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
bio_endio(&rbio->bio);
}
static void get_rbio_extent(struct btree_trans *trans,
struct bch_read_bio *rbio,
struct bkey_buf *sk)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret = lockrestart_do(trans,
bkey_err(k = bch2_bkey_get_iter(trans, &iter,
rbio->data_btree, rbio->data_pos, 0)));
if (ret)
return;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(ptrs, ptr)
if (bch2_extent_ptr_eq(*ptr, rbio->pick.ptr)) {
bch2_bkey_buf_reassemble(sk, trans->c, k);
break;
}
bch2_trans_iter_exit(trans, &iter);
}
static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
enum btree_id btree, struct bkey_s_c read_k)
{
if (!bch2_poison_extents_on_checksum_error)
return 0;
struct bch_fs *c = trans->c;
struct data_update *u = rbio_data_update(rbio);
if (u)
read_k = bkey_i_to_s_c(u->k.k);
u64 flags = bch2_bkey_extent_flags(read_k);
if (flags & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
return 0;
struct btree_iter iter;
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, bkey_start_pos(read_k.k),
BTREE_ITER_intent);
int ret = bkey_err(k);
if (ret)
return ret;
if (!bkey_and_val_eq(k, read_k))
goto out;
struct bkey_i *new = bch2_trans_kmalloc(trans,
bkey_bytes(k.k) + sizeof(struct bch_extent_flags));
ret = PTR_ERR_OR_ZERO(new) ?:
(bkey_reassemble(new, k), 0) ?:
bch2_bkey_extent_flags_set(c, new, flags|BIT_ULL(BCH_EXTENT_FLAG_poisoned)) ?:
bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node) ?:
bch2_trans_commit(trans, NULL, NULL, 0);
/*
* Propagate key change back to data update path, in particular so it
* knows the extent has been poisoned and it's safe to change the
* checksum
*/
if (u && !ret)
bch2_bkey_buf_copy(&u->k, c, new);
out:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static noinline int bch2_read_retry_nodecode(struct btree_trans *trans,
struct bch_read_bio *rbio,
struct bvec_iter bvec_iter,
@ -463,7 +545,8 @@ retry:
err:
bch2_trans_iter_exit(trans, &iter);
if (bch2_err_matches(ret, BCH_ERR_data_read_retry))
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, BCH_ERR_data_read_retry))
goto retry;
if (ret) {
@ -487,13 +570,21 @@ static void bch2_rbio_retry(struct work_struct *work)
.inum = rbio->read_pos.inode,
};
struct bch_io_failures failed = { .nr = 0 };
struct btree_trans *trans = bch2_trans_get(c);
struct bkey_buf sk;
bch2_bkey_buf_init(&sk);
bkey_init(&sk.k->k);
trace_io_read_retry(&rbio->bio);
this_cpu_add(c->counters[BCH_COUNTER_io_read_retry],
bvec_iter_sectors(rbio->bvec_iter));
if (bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
get_rbio_extent(trans, rbio, &sk);
if (!bkey_deleted(&sk.k->k) &&
bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
bch2_mark_io_failure(&failed, &rbio->pick,
rbio->ret == -BCH_ERR_data_read_retry_csum_err);
@ -514,7 +605,7 @@ static void bch2_rbio_retry(struct work_struct *work)
int ret = rbio->data_update
? bch2_read_retry_nodecode(trans, rbio, iter, &failed, flags)
: __bch2_read(trans, rbio, iter, inum, &failed, flags);
: __bch2_read(trans, rbio, iter, inum, &failed, &sk, flags);
if (ret) {
rbio->ret = ret;
@ -535,6 +626,7 @@ static void bch2_rbio_retry(struct work_struct *work)
}
bch2_rbio_done(rbio);
bch2_bkey_buf_exit(&sk, c);
bch2_trans_put(trans);
}
@ -909,7 +1001,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
prt_printf(&buf, "memory gen: %u", gen);
ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(trans, &iter)));
if (!ret) {
prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, k);
@ -959,6 +1051,10 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
bvec_iter_sectors(iter));
goto out_read_done;
}
if ((bch2_bkey_extent_flags(k) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) &&
!orig->data_update)
return -BCH_ERR_extent_poisened;
retry_pick:
ret = bch2_bkey_pick_read_device(c, k, failed, &pick, dev);
@ -967,6 +1063,16 @@ retry_pick:
goto hole;
if (unlikely(ret < 0)) {
if (ret == -BCH_ERR_data_read_csum_err) {
int ret2 = maybe_poison_extent(trans, orig, data_btree, k);
if (ret2) {
ret = ret2;
goto err;
}
trace_and_count(c, io_read_fail_and_poison, &orig->bio);
}
struct printbuf buf = PRINTBUF;
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "%s\n ", bch2_err_str(ret));
@ -1003,7 +1109,7 @@ retry_pick:
unlikely(dev_ptr_stale(ca, &pick.ptr))) {
read_from_stale_dirty_pointer(trans, ca, k, pick.ptr);
bch2_mark_io_failure(failed, &pick, false);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
goto retry_pick;
}
@ -1036,7 +1142,7 @@ retry_pick:
*/
if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
if (ca)
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
rbio->ret = -BCH_ERR_data_read_buffer_too_small;
goto out_read_done;
}
@ -1260,12 +1366,15 @@ out_read_done:
int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, subvol_inum inum,
struct bch_io_failures *failed, unsigned flags)
struct bch_io_failures *failed,
struct bkey_buf *prev_read,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_buf sk;
struct bkey_s_c k;
enum btree_id data_btree;
int ret;
EBUG_ON(rbio->data_update);
@ -1276,7 +1385,7 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
BTREE_ITER_slots);
while (1) {
enum btree_id data_btree = BTREE_ID_extents;
data_btree = BTREE_ID_extents;
bch2_trans_begin(trans);
@ -1285,12 +1394,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
if (ret)
goto err;
bch2_btree_iter_set_snapshot(&iter, snapshot);
bch2_btree_iter_set_snapshot(trans, &iter, snapshot);
bch2_btree_iter_set_pos(&iter,
bch2_btree_iter_set_pos(trans, &iter,
POS(inum.inum, bvec_iter.bi_sector));
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -1308,6 +1417,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
k = bkey_i_to_s_c(sk.k);
if (unlikely(flags & BCH_READ_in_retry)) {
if (!bkey_and_val_eq(k, bkey_i_to_s_c(prev_read->k)))
failed->nr = 0;
bch2_bkey_buf_copy(prev_read, c, sk.k);
}
/*
* With indirect extents, the amount of data to read is the min
* of the original extent and the indirect extent:
@ -1342,9 +1457,7 @@ err:
break;
}
bch2_trans_iter_exit(trans, &iter);
if (ret) {
if (unlikely(ret)) {
struct printbuf buf = PRINTBUF;
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf, inum,
@ -1360,6 +1473,7 @@ err:
bch2_rbio_done(rbio);
}
bch2_trans_iter_exit(trans, &iter);
bch2_bkey_buf_exit(&sk, c);
return ret;
}
@ -1370,10 +1484,18 @@ void bch2_fs_io_read_exit(struct bch_fs *c)
rhashtable_destroy(&c->promote_table);
bioset_exit(&c->bio_read_split);
bioset_exit(&c->bio_read);
mempool_exit(&c->bio_bounce_pages);
}
int bch2_fs_io_read_init(struct bch_fs *c)
{
if (mempool_init_page_pool(&c->bio_bounce_pages,
max_t(unsigned,
c->opts.btree_node_size,
c->opts.encoded_extent_max) /
PAGE_SIZE, 0))
return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
BIOSET_NEED_BVECS))
return -BCH_ERR_ENOMEM_bio_read_init;

View File

@ -144,7 +144,8 @@ static inline void bch2_read_extent(struct btree_trans *trans,
}
int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter,
subvol_inum, struct bch_io_failures *, unsigned flags);
subvol_inum,
struct bch_io_failures *, struct bkey_buf *, unsigned flags);
static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
subvol_inum inum)
@ -154,7 +155,7 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
rbio->subvol = inum.subvol;
bch2_trans_run(c,
__bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL,
__bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL,
BCH_READ_retry_if_stale|
BCH_READ_may_promote|
BCH_READ_user_mapped));

View File

@ -168,9 +168,9 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
*i_sectors_delta = 0;
*disk_sectors_delta = 0;
bch2_trans_copy_iter(&iter, extent_iter);
bch2_trans_copy_iter(trans, &iter, extent_iter);
for_each_btree_key_max_continue_norestart(iter,
for_each_btree_key_max_continue_norestart(trans, iter,
new->k.p, BTREE_ITER_slots, old, ret) {
s64 sectors = min(new->k.p.offset, old.k->p.offset) -
max(bkey_start_offset(&new->k),
@ -292,7 +292,7 @@ int bch2_extent_update(struct btree_trans *trans,
* path already traversed at iter->pos because
* bch2_trans_extent_update() will use it to attempt extent merging
*/
ret = __bch2_btree_iter_traverse(iter);
ret = __bch2_btree_iter_traverse(trans, iter);
if (ret)
return ret;
@ -337,7 +337,7 @@ int bch2_extent_update(struct btree_trans *trans,
if (i_sectors_delta_total)
*i_sectors_delta_total += i_sectors_delta;
bch2_btree_iter_set_pos(iter, next_pos);
bch2_btree_iter_set_pos(trans, iter, next_pos);
return 0;
}
@ -445,6 +445,11 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
BUG_ON(c->opts.nochanges);
bkey_for_each_ptr(ptrs, ptr) {
/*
* XXX: btree writes should be using io_ref[WRITE], but we
* aren't retrying failed btree writes yet (due to device
* removal/ro):
*/
struct bch_dev *ca = nocow
? bch2_dev_have_ref(c, ptr->dev)
: bch2_dev_get_ioref(c, ptr->dev, type == BCH_DATA_btree ? READ : WRITE);
@ -697,12 +702,19 @@ static void bch2_write_endio(struct bio *bio)
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_write,
wbio->submit_time, !bio->bi_status);
if (bio->bi_status) {
bch_err_inum_offset_ratelimited(ca,
op->pos.inode,
wbio->inode_offset << 9,
"data write error: %s",
bch2_blk_status_to_str(bio->bi_status));
if (unlikely(bio->bi_status)) {
if (ca)
bch_err_inum_offset_ratelimited(ca,
op->pos.inode,
wbio->inode_offset << 9,
"data write error: %s",
bch2_blk_status_to_str(bio->bi_status));
else
bch_err_inum_offset_ratelimited(c,
op->pos.inode,
wbio->inode_offset << 9,
"data write error: %s",
bch2_blk_status_to_str(bio->bi_status));
set_bit(wbio->dev, op->failed.d);
op->flags |= BCH_WRITE_io_error;
}
@ -715,7 +727,7 @@ static void bch2_write_endio(struct bio *bio)
}
if (wbio->have_ioref)
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[WRITE]);
if (wbio->bounce)
bch2_bio_free_pages_pool(c, bio);
@ -1293,7 +1305,7 @@ retry:
if (ret)
break;
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(trans, &iter);
ret = bkey_err(k);
if (ret)
break;
@ -1377,7 +1389,7 @@ retry:
bch2_keylist_push(&op->insert_keys);
if (op->flags & BCH_WRITE_submitted)
break;
bch2_btree_iter_advance(&iter);
bch2_btree_iter_advance(trans, &iter);
}
out:
bch2_trans_iter_exit(trans, &iter);
@ -1414,7 +1426,7 @@ err:
return;
err_get_ioref:
darray_for_each(buckets, i)
percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref);
percpu_ref_put(&bch2_dev_have_ref(c, i->b.inode)->io_ref[WRITE]);
/* Fall back to COW path: */
goto out;
@ -1711,7 +1723,6 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
void bch2_fs_io_write_exit(struct bch_fs *c)
{
mempool_exit(&c->bio_bounce_pages);
bioset_exit(&c->replica_set);
bioset_exit(&c->bio_write);
}
@ -1722,12 +1733,5 @@ int bch2_fs_io_write_init(struct bch_fs *c)
bioset_init(&c->replica_set, 4, offsetof(struct bch_write_bio, bio), 0))
return -BCH_ERR_ENOMEM_bio_write_init;
if (mempool_init_page_pool(&c->bio_bounce_pages,
max_t(unsigned,
c->opts.btree_node_size,
c->opts.encoded_extent_max) /
PAGE_SIZE, 0))
return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
return 0;
}

View File

@ -17,34 +17,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
__printf(3, 4)
void bch2_write_op_error(struct bch_write_op *op, u64, const char *, ...);
#define BCH_WRITE_FLAGS() \
x(alloc_nowait) \
x(cached) \
x(data_encoded) \
x(pages_stable) \
x(pages_owned) \
x(only_specified_devs) \
x(wrote_data_inline) \
x(check_enospc) \
x(sync) \
x(move) \
x(in_worker) \
x(submitted) \
x(io_error) \
x(convert_unwritten)
enum __bch_write_flags {
#define x(f) __BCH_WRITE_##f,
BCH_WRITE_FLAGS()
#undef x
};
enum bch_write_flags {
#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
BCH_WRITE_FLAGS()
#undef x
};
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
{
return op->watermark == BCH_WATERMARK_copygc

View File

@ -13,6 +13,34 @@
#include <linux/llist.h>
#include <linux/workqueue.h>
#define BCH_WRITE_FLAGS() \
x(alloc_nowait) \
x(cached) \
x(data_encoded) \
x(pages_stable) \
x(pages_owned) \
x(only_specified_devs) \
x(wrote_data_inline) \
x(check_enospc) \
x(sync) \
x(move) \
x(in_worker) \
x(submitted) \
x(io_error) \
x(convert_unwritten)
enum __bch_write_flags {
#define x(f) __BCH_WRITE_##f,
BCH_WRITE_FLAGS()
#undef x
};
enum bch_write_flags {
#define x(f) BCH_WRITE_##f = BIT(__BCH_WRITE_##f),
BCH_WRITE_FLAGS()
#undef x
};
struct bch_write_bio {
struct_group(wbio,
struct bch_fs *c;

View File

@ -1315,7 +1315,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c)
int ret = bch2_dev_journal_alloc(ca, true);
if (ret) {
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
return ret;
}
}
@ -1404,13 +1404,19 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
nr = cur_seq - last_seq;
if (nr + 1 > j->pin.size) {
free_fifo(&j->pin);
init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
if (!j->pin.data) {
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
return -BCH_ERR_ENOMEM_journal_pin_fifo;
}
/*
* Extra fudge factor, in case we crashed when the journal pin fifo was
* nearly or completely full. We'll need to be able to open additional
* journal entries (at least a few) in order for journal replay to get
* going:
*/
nr += nr / 4;
nr = max(nr, JOURNAL_PIN);
init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
if (!j->pin.data) {
bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
return -BCH_ERR_ENOMEM_journal_pin_fifo;
}
j->replay_journal_seq = last_seq;
@ -1454,18 +1460,15 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
j->last_empty_seq = cur_seq - 1; /* to match j->seq */
spin_lock(&j->lock);
set_bit(JOURNAL_running, &j->flags);
j->last_flush_write = jiffies;
j->reservations.idx = journal_cur_seq(j);
c->last_bucket_seq_cleanup = journal_cur_seq(j);
bch2_journal_space_available(j);
spin_unlock(&j->lock);
return bch2_journal_reclaim_start(j);
return 0;
}
/* init/exit: */
@ -1554,7 +1557,7 @@ void bch2_fs_journal_exit(struct journal *j)
free_fifo(&j->pin);
}
int bch2_fs_journal_init(struct journal *j)
void bch2_fs_journal_init_early(struct journal *j)
{
static struct lock_class_key res_key;
@ -1573,10 +1576,10 @@ int bch2_fs_journal_init(struct journal *j)
atomic64_set(&j->reservations.counter,
((union journal_res_state)
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
}
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
return -BCH_ERR_ENOMEM_journal_pin_fifo;
int bch2_fs_journal_init(struct journal *j)
{
j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN;
j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL);
if (!j->free_buf)
@ -1585,8 +1588,6 @@ int bch2_fs_journal_init(struct journal *j)
for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
j->buf[i].idx = i;
j->pin.front = j->pin.back = 1;
j->wq = alloc_workqueue("bcachefs_journal",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512);
if (!j->wq)

View File

@ -463,6 +463,7 @@ int bch2_fs_journal_start(struct journal *, u64);
void bch2_dev_journal_exit(struct bch_dev *);
int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
void bch2_fs_journal_exit(struct journal *);
void bch2_fs_journal_init_early(struct journal *);
int bch2_fs_journal_init(struct journal *);
#endif /* _BCACHEFS_JOURNAL_H */

View File

@ -1218,7 +1218,7 @@ static CLOSURE_CALLBACK(bch2_journal_read_device)
out:
bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
kvfree(buf.data);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
closure_return(cl);
return;
err:
@ -1253,7 +1253,7 @@ int bch2_journal_read(struct bch_fs *c,
if ((ca->mi.state == BCH_MEMBER_STATE_rw ||
ca->mi.state == BCH_MEMBER_STATE_ro) &&
percpu_ref_tryget(&ca->io_ref))
percpu_ref_tryget(&ca->io_ref[READ]))
closure_call(&ca->journal.read,
bch2_journal_read_device,
system_unbound_wq,
@ -1460,7 +1460,7 @@ fsck_err:
static void journal_advance_devs_to_next_bucket(struct journal *j,
struct dev_alloc_list *devs,
unsigned sectors, u64 seq)
unsigned sectors, __le64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
@ -1768,7 +1768,7 @@ static void journal_write_endio(struct bio *bio)
}
closure_put(&w->io);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[WRITE]);
}
static CLOSURE_CALLBACK(journal_write_submit)
@ -1843,7 +1843,7 @@ static CLOSURE_CALLBACK(journal_write_preflush)
if (w->separate_flush) {
for_each_rw_member(c, ca) {
percpu_ref_get(&ca->io_ref);
percpu_ref_get(&ca->io_ref[WRITE]);
struct journal_device *ja = &ca->journal;
struct bio *bio = &ja->bio[w->idx]->bio;

View File

@ -151,8 +151,6 @@ enum journal_flags {
#undef x
};
typedef DARRAY(u64) darray_u64;
struct journal_bio {
struct bch_dev *ca;
unsigned buf_idx;

View File

@ -130,7 +130,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c,
retry:
ret = 0;
while (bch2_trans_begin(trans),
(b = bch2_btree_iter_peek_node(&iter)) &&
(b = bch2_btree_iter_peek_node(trans, &iter)) &&
!(ret = PTR_ERR_OR_ZERO(b))) {
bch2_progress_update_iter(trans, progress, &iter, "dropping metadata");
@ -154,7 +154,7 @@ retry:
if (ret)
break;
next:
bch2_btree_iter_next_node(&iter);
bch2_btree_iter_next_node(trans, &iter);
}
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;

View File

@ -126,26 +126,40 @@ static void move_write_done(struct bch_write_op *op)
static void move_write(struct moving_io *io)
{
struct bch_fs *c = io->write.op.c;
struct moving_context *ctxt = io->write.ctxt;
struct bch_read_bio *rbio = &io->write.rbio;
if (ctxt->stats) {
if (io->write.rbio.bio.bi_status)
if (rbio->bio.bi_status)
atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
&ctxt->stats->sectors_error_uncorrected);
else if (io->write.rbio.saw_error)
else if (rbio->saw_error)
atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
&ctxt->stats->sectors_error_corrected);
}
if (unlikely(io->write.rbio.ret ||
io->write.rbio.bio.bi_status ||
io->write.data_opts.scrub)) {
/*
* If the extent has been bitrotted, we're going to have to give it a
* new checksum in order to move it - but the poison bit will ensure
* that userspace still gets the appropriate error.
*/
if (unlikely(rbio->ret == -BCH_ERR_data_read_csum_err &&
(bch2_bkey_extent_flags(bkey_i_to_s_c(io->write.k.k)) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)))) {
struct bch_extent_crc_unpacked crc = rbio->pick.crc;
struct nonce nonce = extent_nonce(rbio->version, crc);
rbio->pick.crc.csum = bch2_checksum_bio(c, rbio->pick.crc.csum_type,
nonce, &rbio->bio);
rbio->ret = 0;
}
if (unlikely(rbio->ret || io->write.data_opts.scrub)) {
move_free(io);
return;
}
if (trace_io_move_write_enabled()) {
struct bch_fs *c = io->write.op.c;
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
@ -545,7 +559,7 @@ static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *
BTREE_ID_reflink, reflink_pos,
BTREE_ITER_not_extents);
struct bkey_s_c k = bch2_btree_iter_peek(iter);
struct bkey_s_c k = bch2_btree_iter_peek(trans, iter);
if (!k.k || bkey_err(k)) {
bch2_trans_iter_exit(trans, iter);
return k;
@ -603,7 +617,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&iter);
k = bch2_btree_iter_peek(trans, &iter);
if (!k.k)
break;
@ -681,7 +695,7 @@ next:
if (ctxt->stats)
atomic64_add(k.k->size, &ctxt->stats->sectors_seen);
next_nondata:
bch2_btree_iter_advance(&iter);
bch2_btree_iter_advance(trans, &iter);
}
bch2_trans_iter_exit(trans, &reflink_iter);
@ -794,7 +808,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
bch2_trans_begin(trans);
k = bch2_btree_iter_peek(&bp_iter);
k = bch2_btree_iter_peek(trans, &bp_iter);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
@ -876,7 +890,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
if (ctxt->stats)
atomic64_add(sectors, &ctxt->stats->sectors_seen);
next:
bch2_btree_iter_advance(&bp_iter);
bch2_btree_iter_advance(trans, &bp_iter);
}
err:
bch2_trans_iter_exit(trans, &bp_iter);
@ -991,7 +1005,7 @@ static int bch2_move_btree(struct bch_fs *c,
retry:
ret = 0;
while (bch2_trans_begin(trans),
(b = bch2_btree_iter_peek_node(&iter)) &&
(b = bch2_btree_iter_peek_node(trans, &iter)) &&
!(ret = PTR_ERR_OR_ZERO(b))) {
if (kthread && kthread_should_stop())
break;
@ -1011,7 +1025,7 @@ retry:
if (ret)
break;
next:
bch2_btree_iter_next_node(&iter);
bch2_btree_iter_next_node(trans, &iter);
}
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;

View File

@ -280,7 +280,11 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
s64 wait = S64_MAX, fragmented_allowed, fragmented;
for_each_rw_member(c, ca) {
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
struct bch_dev_usage_full usage_full = bch2_dev_usage_full_read(ca);
struct bch_dev_usage usage;
for (unsigned i = 0; i < BCH_DATA_NR; i++)
usage.buckets[i] = usage_full.d[i].buckets;
fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) *
ca->mi.bucket_size) >> 1);
@ -288,7 +292,7 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
for (unsigned i = 0; i < BCH_DATA_NR; i++)
if (data_type_movable(i))
fragmented += usage.d[i].fragmented;
fragmented += usage_full.d[i].fragmented;
wait = min(wait, max(0LL, fragmented_allowed - fragmented));
}

View File

@ -28,8 +28,8 @@ int bch2_create_trans(struct btree_trans *trans,
unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_iter dir_iter = { NULL };
struct btree_iter inode_iter = { NULL };
struct btree_iter dir_iter = {};
struct btree_iter inode_iter = {};
subvol_inum new_inum = dir;
u64 now = bch2_current_time(c);
u64 cpu = raw_smp_processor_id();
@ -127,8 +127,8 @@ int bch2_create_trans(struct btree_trans *trans,
if (ret)
goto err;
bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot);
ret = bch2_btree_iter_traverse(&dir_iter);
bch2_btree_iter_set_snapshot(trans, &dir_iter, dir_snapshot);
ret = bch2_btree_iter_traverse(trans, &dir_iter);
if (ret)
goto err;
}
@ -177,9 +177,9 @@ int bch2_create_trans(struct btree_trans *trans,
new_inode->bi_depth = dir_u->bi_depth + 1;
inode_iter.flags &= ~BTREE_ITER_all_snapshots;
bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
bch2_btree_iter_set_snapshot(trans, &inode_iter, snapshot);
ret = bch2_btree_iter_traverse(&inode_iter) ?:
ret = bch2_btree_iter_traverse(trans, &inode_iter) ?:
bch2_inode_write(trans, &inode_iter, new_inode);
err:
bch2_trans_iter_exit(trans, &inode_iter);
@ -193,8 +193,8 @@ int bch2_link_trans(struct btree_trans *trans,
const struct qstr *name)
{
struct bch_fs *c = trans->c;
struct btree_iter dir_iter = { NULL };
struct btree_iter inode_iter = { NULL };
struct btree_iter dir_iter = {};
struct btree_iter inode_iter = {};
struct bch_hash_info dir_hash;
u64 now = bch2_current_time(c);
u64 dir_offset = 0;
@ -253,9 +253,9 @@ int bch2_unlink_trans(struct btree_trans *trans,
bool deleting_subvol)
{
struct bch_fs *c = trans->c;
struct btree_iter dir_iter = { NULL };
struct btree_iter dirent_iter = { NULL };
struct btree_iter inode_iter = { NULL };
struct btree_iter dir_iter = {};
struct btree_iter dirent_iter = {};
struct btree_iter inode_iter = {};
struct bch_hash_info dir_hash;
subvol_inum inum;
u64 now = bch2_current_time(c);
@ -301,7 +301,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
if (ret)
goto err;
k = bch2_btree_iter_peek_slot(&dirent_iter);
k = bch2_btree_iter_peek_slot(trans, &dirent_iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -310,8 +310,8 @@ int bch2_unlink_trans(struct btree_trans *trans,
* If we're deleting a subvolume, we need to really delete the
* dirent, not just emit a whiteout in the current snapshot:
*/
bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot);
ret = bch2_btree_iter_traverse(&dirent_iter);
bch2_btree_iter_set_snapshot(trans, &dirent_iter, k.k->p.snapshot);
ret = bch2_btree_iter_traverse(trans, &dirent_iter);
if (ret)
goto err;
} else {
@ -390,10 +390,10 @@ int bch2_rename_trans(struct btree_trans *trans,
enum bch_rename_mode mode)
{
struct bch_fs *c = trans->c;
struct btree_iter src_dir_iter = { NULL };
struct btree_iter dst_dir_iter = { NULL };
struct btree_iter src_inode_iter = { NULL };
struct btree_iter dst_inode_iter = { NULL };
struct btree_iter src_dir_iter = {};
struct btree_iter dst_dir_iter = {};
struct btree_iter src_inode_iter = {};
struct btree_iter dst_inode_iter = {};
struct bch_hash_info src_hash, dst_hash;
subvol_inum src_inum, dst_inum;
u64 src_offset, dst_offset;
@ -666,7 +666,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
struct btree_iter bp_iter = { NULL };
struct btree_iter bp_iter = {};
int ret = 0;
if (inode_points_to_dirent(target, d))

View File

@ -133,12 +133,10 @@ void bch2_fs_nocow_locking_exit(struct bch_fs *c)
BUG_ON(atomic_read(&l->l[j]));
}
int bch2_fs_nocow_locking_init(struct bch_fs *c)
void bch2_fs_nocow_locking_init_early(struct bch_fs *c)
{
struct bucket_nocow_lock_table *t = &c->nocow_locks;
for (struct nocow_lock_bucket *l = t->l; l < t->l + ARRAY_SIZE(t->l); l++)
spin_lock_init(&l->lock);
return 0;
}

View File

@ -45,6 +45,6 @@ static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t,
void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *);
void bch2_fs_nocow_locking_exit(struct bch_fs *);
int bch2_fs_nocow_locking_init(struct bch_fs *);
void bch2_fs_nocow_locking_init_early(struct bch_fs *);
#endif /* _BCACHEFS_NOCOW_LOCKING_H */

View File

@ -19,6 +19,11 @@ const char * const bch2_error_actions[] = {
NULL
};
const char * const bch2_degraded_actions[] = {
BCH_DEGRADED_ACTIONS()
NULL
};
const char * const bch2_fsck_fix_opts[] = {
BCH_FIX_ERRORS_OPTS()
NULL
@ -273,20 +278,20 @@ int bch2_opt_lookup(const char *name)
return -1;
}
struct synonym {
struct opt_synonym {
const char *s1, *s2;
};
static const struct synonym bch_opt_synonyms[] = {
static const struct opt_synonym bch2_opt_synonyms[] = {
{ "quota", "usrquota" },
};
static int bch2_mount_opt_lookup(const char *name)
{
const struct synonym *i;
const struct opt_synonym *i;
for (i = bch_opt_synonyms;
i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
for (i = bch2_opt_synonyms;
i < bch2_opt_synonyms + ARRAY_SIZE(bch2_opt_synonyms);
i++)
if (!strcmp(name, i->s1))
name = i->s2;
@ -294,6 +299,30 @@ static int bch2_mount_opt_lookup(const char *name)
return bch2_opt_lookup(name);
}
struct opt_val_synonym {
const char *opt, *v1, *v2;
};
static const struct opt_val_synonym bch2_opt_val_synonyms[] = {
{ "degraded", "true", "yes" },
{ "degraded", "false", "no" },
{ "degraded", "1", "yes" },
{ "degraded", "0", "no" },
};
static const char *bch2_opt_val_synonym_lookup(const char *opt, const char *val)
{
const struct opt_val_synonym *i;
for (i = bch2_opt_val_synonyms;
i < bch2_opt_val_synonyms + ARRAY_SIZE(bch2_opt_val_synonyms);
i++)
if (!strcmp(opt, i->opt) && !strcmp(val, i->v1))
return i->v2;
return val;
}
int bch2_opt_validate(const struct bch_option *opt, u64 v, struct printbuf *err)
{
if (v < opt->min) {
@ -339,19 +368,14 @@ int bch2_opt_parse(struct bch_fs *c,
switch (opt->type) {
case BCH_OPT_BOOL:
if (val) {
ret = lookup_constant(bool_names, val, -BCH_ERR_option_not_bool);
if (ret != -BCH_ERR_option_not_bool) {
*res = ret;
} else {
if (err)
prt_printf(err, "%s: must be bool", opt->attr.name);
return ret;
}
ret = lookup_constant(bool_names, val, -BCH_ERR_option_not_bool);
if (ret != -BCH_ERR_option_not_bool) {
*res = ret;
} else {
*res = 1;
if (err)
prt_printf(err, "%s: must be bool", opt->attr.name);
return ret;
}
break;
case BCH_OPT_UINT:
if (!val) {
@ -360,9 +384,15 @@ int bch2_opt_parse(struct bch_fs *c,
return -EINVAL;
}
ret = opt->flags & OPT_HUMAN_READABLE
? bch2_strtou64_h(val, res)
: kstrtou64(val, 10, res);
if (*val != '-') {
ret = opt->flags & OPT_HUMAN_READABLE
? bch2_strtou64_h(val, res)
: kstrtou64(val, 10, res);
} else {
prt_printf(err, "%s: must be a non-negative number", opt->attr.name);
return -BCH_ERR_option_negative;
}
if (ret < 0) {
if (err)
prt_printf(err, "%s: must be a number",
@ -498,6 +528,14 @@ int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v)
if (v)
bch2_check_set_feature(c, BCH_FEATURE_ec);
break;
case Opt_single_device:
if (v) {
mutex_lock(&c->sb_lock);
if (bch2_sb_nr_devices(c->disk_sb.sb) > 1)
ret = -BCH_ERR_not_single_device_filesystem;
mutex_unlock(&c->sb_lock);
}
break;
}
return ret;
@ -536,6 +574,11 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
if (id < 0)
return 0;
if (!val)
val = "1";
val = bch2_opt_val_synonym_lookup(name, val);
if (!(bch2_opt_table[id].flags & OPT_MOUNT))
goto bad_opt;

View File

@ -11,6 +11,7 @@
struct bch_fs;
extern const char * const bch2_error_actions[];
extern const char * const bch2_degraded_actions[];
extern const char * const bch2_fsck_fix_opts[];
extern const char * const bch2_version_upgrade_opts[];
extern const char * const bch2_sb_features[];
@ -302,14 +303,9 @@ enum fsck_err_opts {
NULL, "Enable project quotas") \
x(degraded, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
OPT_STR(bch2_degraded_actions), \
BCH_SB_DEGRADED_ACTION, BCH_DEGRADED_ask, \
NULL, "Allow mounting in degraded mode") \
x(very_degraded, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Allow mounting in when data will be missing") \
x(no_splitbrain_check, u8, \
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
@ -517,7 +513,7 @@ enum fsck_err_opts {
BCH_MEMBER_DATA_ALLOWED, BIT(BCH_DATA_journal)|BIT(BCH_DATA_btree)|BIT(BCH_DATA_user),\
"types", "Allowed data types for this device: journal, btree, and/or user")\
x(discard, u8, \
OPT_MOUNT|OPT_DEVICE|OPT_RUNTIME, \
OPT_MOUNT|OPT_FS|OPT_DEVICE|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_MEMBER_DISCARD, true, \
NULL, "Enable discard/TRIM support") \
@ -525,8 +521,13 @@ enum fsck_err_opts {
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "BTREE_ITER_prefetch casuse btree nodes to be\n"\
" prefetched sequentially")
NULL, "BTREE_ITER_prefetch causes btree nodes to be\n"\
" prefetched sequentially") \
x(single_device, u8, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH_SB_SINGLE_DEVICE, false, \
NULL, "Devices with the same UUID may be mounted simultaneously")
struct bch_opts {
#define x(_name, _bits, ...) unsigned _name##_defined:1;

View File

@ -516,7 +516,7 @@ static int bch2_fs_quota_read_inode(struct btree_trans *trans,
bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
KEY_TYPE_QUOTA_NOCHECK);
advance:
bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
bch2_btree_iter_set_pos(trans, iter, bpos_nosnap_successor(iter->pos));
return 0;
}

View File

@ -95,6 +95,9 @@ static unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c,
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
return 0;
return bch2_bkey_ptrs_need_compress(c, opts, k, ptrs) |
bch2_bkey_ptrs_need_move(c, opts, ptrs);
}
@ -107,6 +110,9 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
if (!opts)
return 0;
if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned))
return 0;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
u64 sectors = 0;
@ -233,7 +239,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum)
bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
BTREE_ITER_intent);
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -281,7 +287,7 @@ static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum,
bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work,
SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX),
BTREE_ITER_intent);
k = bch2_btree_iter_peek_slot(&iter);
k = bch2_btree_iter_peek_slot(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -301,7 +307,7 @@ static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans,
struct btree_iter *work_iter)
{
return !kthread_should_stop()
? bch2_btree_iter_peek(work_iter)
? bch2_btree_iter_peek(trans, work_iter)
: bkey_s_c_null;
}
@ -335,7 +341,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
work_pos.inode ? BTREE_ID_extents : BTREE_ID_reflink,
work_pos,
BTREE_ITER_all_snapshots);
struct bkey_s_c k = bch2_btree_iter_peek_slot(extent_iter);
struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, extent_iter);
if (bkey_err(k))
return k;
@ -511,7 +517,7 @@ static int do_rebalance(struct moving_context *ctxt)
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
struct bch_fs_rebalance *r = &c->rebalance;
struct btree_iter rebalance_work_iter, extent_iter = { NULL };
struct btree_iter rebalance_work_iter, extent_iter = {};
struct bkey_s_c k;
int ret = 0;
@ -552,7 +558,7 @@ static int do_rebalance(struct moving_context *ctxt)
if (ret)
break;
bch2_btree_iter_advance(&rebalance_work_iter);
bch2_btree_iter_advance(trans, &rebalance_work_iter);
}
bch2_trans_iter_exit(trans, &extent_iter);

View File

@ -198,7 +198,7 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans,
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level,
BTREE_ITER_intent);
int ret = bch2_btree_iter_traverse(&iter);
int ret = bch2_btree_iter_traverse(trans, &iter);
if (ret)
goto out;
@ -261,7 +261,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level,
iter_flags);
ret = bch2_btree_iter_traverse(&iter);
ret = bch2_btree_iter_traverse(trans, &iter);
if (ret)
goto out;
@ -270,7 +270,7 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
bch2_trans_iter_exit(trans, &iter);
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, 0, iter_flags);
ret = bch2_btree_iter_traverse(&iter) ?:
ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_increase_depth(trans, iter.path, 0) ?:
-BCH_ERR_transaction_restart_nested;
goto out;

View File

@ -495,7 +495,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
bool reflink_p_may_update_opts_field)
{
struct bch_fs *c = trans->c;
struct btree_iter reflink_iter = { NULL };
struct btree_iter reflink_iter = {};
struct bkey_s_c k;
struct bkey_i *r_v;
struct bkey_i_reflink_p *r_p;
@ -507,7 +507,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX,
BTREE_ITER_intent);
k = bch2_btree_iter_peek_prev(&reflink_iter);
k = bch2_btree_iter_peek_prev(trans, &reflink_iter);
ret = bkey_err(k);
if (ret)
goto err;
@ -569,12 +569,13 @@ err:
return ret;
}
static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
static struct bkey_s_c get_next_src(struct btree_trans *trans,
struct btree_iter *iter, struct bpos end)
{
struct bkey_s_c k;
int ret;
for_each_btree_key_max_continue_norestart(*iter, end, 0, k, ret) {
for_each_btree_key_max_continue_norestart(trans, *iter, end, 0, k, ret) {
if (bkey_extent_is_unwritten(k))
continue;
@ -583,7 +584,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
}
if (bkey_ge(iter->pos, end))
bch2_btree_iter_set_pos(iter, end);
bch2_btree_iter_set_pos(trans, iter, end);
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
}
@ -647,27 +648,27 @@ s64 bch2_remap_range(struct bch_fs *c,
if (ret)
continue;
bch2_btree_iter_set_snapshot(&src_iter, src_snapshot);
bch2_btree_iter_set_snapshot(trans, &src_iter, src_snapshot);
ret = bch2_subvolume_get_snapshot(trans, dst_inum.subvol,
&dst_snapshot);
if (ret)
continue;
bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot);
bch2_btree_iter_set_snapshot(trans, &dst_iter, dst_snapshot);
if (dst_inum.inum < src_inum.inum) {
/* Avoid some lock cycle transaction restarts */
ret = bch2_btree_iter_traverse(&dst_iter);
ret = bch2_btree_iter_traverse(trans, &dst_iter);
if (ret)
continue;
}
dst_done = dst_iter.pos.offset - dst_start.offset;
src_want = POS(src_start.inode, src_start.offset + dst_done);
bch2_btree_iter_set_pos(&src_iter, src_want);
bch2_btree_iter_set_pos(trans, &src_iter, src_want);
src_k = get_next_src(&src_iter, src_end);
src_k = get_next_src(trans, &src_iter, src_end);
ret = bkey_err(src_k);
if (ret)
continue;
@ -738,7 +739,7 @@ s64 bch2_remap_range(struct bch_fs *c,
do {
struct bch_inode_unpacked inode_u;
struct btree_iter inode_iter = { NULL };
struct btree_iter inode_iter = {};
bch2_trans_begin(trans);

View File

@ -16,6 +16,7 @@ enum counters_flags {
x(io_read_split, 33, TYPE_COUNTER) \
x(io_read_reuse_race, 34, TYPE_COUNTER) \
x(io_read_retry, 32, TYPE_COUNTER) \
x(io_read_fail_and_poison, 82, TYPE_COUNTER) \
x(io_write, 1, TYPE_SECTORS) \
x(io_move, 2, TYPE_SECTORS) \
x(io_move_read, 35, TYPE_SECTORS) \

View File

@ -20,7 +20,7 @@ struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i);
static inline bool bch2_dev_is_online(struct bch_dev *ca)
{
return !percpu_ref_is_zero(&ca->io_ref);
return !percpu_ref_is_zero(&ca->io_ref[READ]);
}
static inline struct bch_dev *bch2_dev_rcu(struct bch_fs *, unsigned);
@ -156,33 +156,34 @@ static inline struct bch_dev *bch2_get_next_dev(struct bch_fs *c, struct bch_dev
static inline struct bch_dev *bch2_get_next_online_dev(struct bch_fs *c,
struct bch_dev *ca,
unsigned state_mask)
unsigned state_mask,
int rw)
{
rcu_read_lock();
if (ca)
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[rw]);
while ((ca = __bch2_next_dev(c, ca, NULL)) &&
(!((1 << ca->mi.state) & state_mask) ||
!percpu_ref_tryget(&ca->io_ref)))
!percpu_ref_tryget(&ca->io_ref[rw])))
;
rcu_read_unlock();
return ca;
}
#define __for_each_online_member(_c, _ca, state_mask) \
#define __for_each_online_member(_c, _ca, state_mask, rw) \
for (struct bch_dev *_ca = NULL; \
(_ca = bch2_get_next_online_dev(_c, _ca, state_mask));)
(_ca = bch2_get_next_online_dev(_c, _ca, state_mask, rw));)
#define for_each_online_member(c, ca) \
__for_each_online_member(c, ca, ~0)
__for_each_online_member(c, ca, ~0, READ)
#define for_each_rw_member(c, ca) \
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw))
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), WRITE)
#define for_each_readable_member(c, ca) \
__for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro))
__for_each_online_member(c, ca, BIT( BCH_MEMBER_STATE_rw)|BIT(BCH_MEMBER_STATE_ro), READ)
static inline bool bch2_dev_exists(const struct bch_fs *c, unsigned dev)
{
@ -287,7 +288,7 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu(c, dev);
if (ca && !percpu_ref_tryget(&ca->io_ref))
if (ca && !percpu_ref_tryget(&ca->io_ref[rw]))
ca = NULL;
rcu_read_unlock();
@ -297,7 +298,7 @@ static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev,
return ca;
if (ca)
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[rw]);
return NULL;
}

View File

@ -281,6 +281,16 @@ fsck_err:
return ret;
}
static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id)
{
mutex_lock(&c->snapshot_table_lock);
int ret = snapshot_t_mut(c, id)
? 0
: -BCH_ERR_ENOMEM_mark_snapshot;
mutex_unlock(&c->snapshot_table_lock);
return ret;
}
static int __bch2_mark_snapshot(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
@ -843,9 +853,6 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
if (bch2_snapshot_exists(c, id))
return 0;
/* Do we need to reconstruct the snapshot_tree entry as well? */
struct btree_iter iter;
struct bkey_s_c k;
@ -890,9 +897,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id)
}
bch2_trans_iter_exit(trans, &iter);
return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0);
return bch2_snapshot_table_make_room(c, id) ?:
bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0);
}
/* Figure out which snapshot nodes belong in the same tree: */
@ -1074,9 +1080,9 @@ static inline void normalize_snapshot_child_pointers(struct bch_snapshot *s)
static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id)
{
struct bch_fs *c = trans->c;
struct btree_iter iter, p_iter = (struct btree_iter) { NULL };
struct btree_iter c_iter = (struct btree_iter) { NULL };
struct btree_iter tree_iter = (struct btree_iter) { NULL };
struct btree_iter iter, p_iter = {};
struct btree_iter c_iter = {};
struct btree_iter tree_iter = {};
struct bkey_s_c_snapshot s;
u32 parent_id, child_id;
unsigned i;
@ -1193,13 +1199,13 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
bch2_trans_iter_init(trans, &iter, BTREE_ID_snapshots,
POS_MIN, BTREE_ITER_intent);
k = bch2_btree_iter_peek(&iter);
k = bch2_btree_iter_peek(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;
for (i = 0; i < nr_snapids; i++) {
k = bch2_btree_iter_prev_slot(&iter);
k = bch2_btree_iter_prev_slot(trans, &iter);
ret = bkey_err(k);
if (ret)
goto err;

View File

@ -195,7 +195,7 @@ int __bch2_str_hash_check_key(struct btree_trans *trans,
struct btree_iter *k_iter, struct bkey_s_c hash_k)
{
struct bch_fs *c = trans->c;
struct btree_iter iter = { NULL };
struct btree_iter iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c k;
int ret = 0;

View File

@ -231,11 +231,11 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
struct bkey_s_c k;
int ret;
bch2_trans_copy_iter(&iter, start);
bch2_trans_copy_iter(trans, &iter, start);
bch2_btree_iter_advance(&iter);
bch2_btree_iter_advance(trans, &iter);
for_each_btree_key_continue_norestart(iter, BTREE_ITER_slots, k, ret) {
for_each_btree_key_continue_norestart(trans, iter, BTREE_ITER_slots, k, ret) {
if (k.k->type != desc.key_type &&
k.k->type != KEY_TYPE_hash_whiteout)
break;
@ -280,7 +280,7 @@ struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans,
}
if (!slot.path && !(flags & STR_HASH_must_replace))
bch2_trans_copy_iter(&slot, iter);
bch2_trans_copy_iter(trans, &slot, iter);
if (k.k->type != KEY_TYPE_hash_whiteout)
goto not_found;

View File

@ -275,7 +275,7 @@ int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol)
struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0);
struct bkey_s_c k = bch2_btree_iter_peek(&iter);
struct bkey_s_c k = bch2_btree_iter_peek(trans, &iter);
bch2_trans_iter_exit(trans, &iter);
return bkey_err(k) ?: k.k && k.k->p.inode == subvol
@ -478,13 +478,11 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
{
struct bch_fs *c = container_of(work, struct bch_fs,
snapshot_wait_for_pagecache_and_delete_work);
snapshot_id_list s;
u32 *id;
int ret = 0;
while (!ret) {
mutex_lock(&c->snapshots_unlinked_lock);
s = c->snapshots_unlinked;
snapshot_id_list s = c->snapshots_unlinked;
darray_init(&c->snapshots_unlinked);
mutex_unlock(&c->snapshots_unlinked_lock);
@ -493,7 +491,7 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor
bch2_evict_subvolume_inodes(c, &s);
for (id = s.data; id < s.data + s.nr; id++) {
darray_for_each(s, id) {
ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id));
bch_err_msg(c, ret, "deleting subvolume %u", *id);
if (ret)
@ -574,7 +572,7 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
bool ro)
{
struct bch_fs *c = trans->c;
struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
struct btree_iter dst_iter, src_iter = {};
struct bkey_i_subvolume *new_subvol = NULL;
struct bkey_i_subvolume *src_subvol = NULL;
u32 parent = 0, new_nodes[2], snapshot_subvols[2];
@ -715,11 +713,10 @@ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
return ret;
}
int bch2_fs_subvolumes_init(struct bch_fs *c)
void bch2_fs_subvolumes_init_early(struct bch_fs *c)
{
INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
bch2_subvolume_wait_for_pagecache_and_delete);
mutex_init(&c->snapshots_unlinked_lock);
return 0;
}

View File

@ -33,16 +33,16 @@ int bch2_subvol_is_ro_trans(struct btree_trans *, u32);
int bch2_subvol_is_ro(struct bch_fs *, u32);
static inline struct bkey_s_c
bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos end,
u32 subvolid, unsigned flags)
bch2_btree_iter_peek_in_subvolume_max_type(struct btree_trans *trans, struct btree_iter *iter,
struct bpos end, u32 subvolid, unsigned flags)
{
u32 snapshot;
int ret = bch2_subvolume_get_snapshot(iter->trans, subvolid, &snapshot);
int ret = bch2_subvolume_get_snapshot(trans, subvolid, &snapshot);
if (ret)
return bkey_s_c_err(ret);
bch2_btree_iter_set_snapshot(iter, snapshot);
return bch2_btree_iter_peek_max_type(iter, end, flags);
bch2_btree_iter_set_snapshot(trans, iter, snapshot);
return bch2_btree_iter_peek_max_type(trans, iter, end, flags);
}
#define for_each_btree_key_in_subvolume_max_continue(_trans, _iter, \
@ -53,14 +53,14 @@ bch2_btree_iter_peek_in_subvolume_max_type(struct btree_iter *iter, struct bpos
\
do { \
_ret3 = lockrestart_do(_trans, ({ \
(_k) = bch2_btree_iter_peek_in_subvolume_max_type(&(_iter), \
(_k) = bch2_btree_iter_peek_in_subvolume_max_type(trans, &(_iter),\
_end, _subvolid, (_flags)); \
if (!(_k).k) \
break; \
\
bkey_err(_k) ?: (_do); \
})); \
} while (!_ret3 && bch2_btree_iter_advance(&(_iter))); \
} while (!_ret3 && bch2_btree_iter_advance(_trans, &(_iter))); \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
_ret3; \
@ -86,6 +86,6 @@ int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, boo
int bch2_initialize_subvolumes(struct bch_fs *);
int bch2_fs_upgrade_for_subvolumes(struct bch_fs *);
int bch2_fs_subvolumes_init(struct bch_fs *);
void bch2_fs_subvolumes_init_early(struct bch_fs *);
#endif /* _BCACHEFS_SUBVOLUME_H */

View File

@ -248,7 +248,7 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
struct bch_sb_handle *dev_sb = &ca->disk_sb;
if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
return NULL;
}
}
@ -945,7 +945,7 @@ static void write_super_endio(struct bio *bio)
}
closure_put(&ca->fs->sb_write);
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
}
static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
@ -963,7 +963,7 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio));
percpu_ref_get(&ca->io_ref);
percpu_ref_get(&ca->io_ref[READ]);
closure_bio_submit(bio, &c->sb_write);
}
@ -989,7 +989,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb],
bio_sectors(bio));
percpu_ref_get(&ca->io_ref);
percpu_ref_get(&ca->io_ref[READ]);
closure_bio_submit(bio, &c->sb_write);
}
@ -1006,7 +1006,7 @@ int bch2_write_super(struct bch_fs *c)
trace_and_count(c, write_super, c, _RET_IP_);
if (c->opts.very_degraded)
if (c->opts.degraded == BCH_DEGRADED_very)
degraded_flags |= BCH_FORCE_IF_LOST;
lockdep_assert_held(&c->sb_lock);
@ -1014,13 +1014,20 @@ int bch2_write_super(struct bch_fs *c)
closure_init_stack(cl);
memset(&sb_written, 0, sizeof(sb_written));
/*
* Note: we do writes to RO devices here, and we might want to change
* that in the future.
*
* For now, we expect to be able to call write_super() when we're not
* yet RW:
*/
for_each_online_member(c, ca) {
ret = darray_push(&online_devices, ca);
if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) {
percpu_ref_put(&ca->io_ref);
percpu_ref_put(&ca->io_ref[READ]);
goto out;
}
percpu_ref_get(&ca->io_ref);
percpu_ref_get(&ca->io_ref[READ]);
}
/* Make sure we're using the new magic numbers: */
@ -1186,7 +1193,7 @@ out:
/* Make new options visible after they're persistent: */
bch2_sb_update(c);
darray_for_each(online_devices, ca)
percpu_ref_put(&(*ca)->io_ref);
percpu_ref_put(&(*ca)->io_ref[READ]);
darray_exit(&online_devices);
printbuf_exit(&err);
return ret;

View File

@ -79,6 +79,8 @@ MODULE_SOFTDEP("pre: chacha20");
MODULE_SOFTDEP("pre: poly1305");
MODULE_SOFTDEP("pre: xxhash");
typedef DARRAY(struct bch_sb_handle) bch_sb_handles;
const char * const bch2_fs_flag_strs[] = {
#define x(n) #n,
BCH_FS_FLAGS()
@ -185,7 +187,9 @@ static void bch2_dev_unlink(struct bch_dev *);
static void bch2_dev_free(struct bch_dev *);
static int bch2_dev_alloc(struct bch_fs *, unsigned);
static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
static int bch2_fs_init_rw(struct bch_fs *);
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
@ -294,8 +298,10 @@ static void __bch2_fs_read_only(struct bch_fs *c)
/*
* After stopping journal:
*/
for_each_member_device(c, ca)
for_each_member_device(c, ca) {
bch2_dev_io_ref_stop(ca, WRITE);
bch2_dev_allocator_remove(c, ca);
}
}
#ifndef BCH_WRITE_REF_DEBUG
@ -461,11 +467,11 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch_info(c, "going read-write");
ret = bch2_sb_members_v2_init(c);
ret = bch2_fs_init_rw(c);
if (ret)
goto err;
ret = bch2_fs_mark_dirty(c);
ret = bch2_sb_members_v2_init(c);
if (ret)
goto err;
@ -480,10 +486,24 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
set_bit(JOURNAL_need_flush_write, &c->journal.flags);
set_bit(JOURNAL_running, &c->journal.flags);
for_each_rw_member(c, ca)
__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
bch2_dev_allocator_add(c, ca);
percpu_ref_reinit(&ca->io_ref[WRITE]);
}
bch2_recalc_capacity(c);
ret = bch2_fs_mark_dirty(c);
if (ret)
goto err;
spin_lock(&c->journal.lock);
bch2_journal_space_available(&c->journal);
spin_unlock(&c->journal.lock);
ret = bch2_journal_reclaim_start(&c->journal);
if (ret)
goto err;
set_bit(BCH_FS_rw, &c->flags);
set_bit(BCH_FS_was_rw, &c->flags);
@ -495,11 +515,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
atomic_long_inc(&c->writes[i]);
}
#endif
ret = bch2_journal_reclaim_start(&c->journal);
if (ret)
goto err;
if (!early) {
ret = bch2_fs_read_write_late(c);
if (ret)
@ -573,7 +588,6 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[WRITE]);
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
bch2_fs_btree_gc_exit(c);
bch2_journal_keys_put_initial(c);
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
BUG_ON(atomic_read(&c->journal_keys.ref));
@ -606,8 +620,8 @@ static void __bch2_fs_free(struct bch_fs *c)
destroy_workqueue(c->btree_read_complete_wq);
if (c->copygc_wq)
destroy_workqueue(c->copygc_wq);
if (c->btree_io_complete_wq)
destroy_workqueue(c->btree_io_complete_wq);
if (c->btree_write_complete_wq)
destroy_workqueue(c->btree_write_complete_wq);
if (c->btree_update_wq)
destroy_workqueue(c->btree_update_wq);
@ -675,6 +689,7 @@ void bch2_fs_free(struct bch_fs *c)
if (ca) {
EBUG_ON(atomic_long_read(&ca->ref) != 1);
bch2_dev_io_ref_stop(ca, READ);
bch2_free_super(&ca->disk_sb);
bch2_dev_free(ca);
}
@ -697,7 +712,8 @@ static int bch2_fs_online(struct bch_fs *c)
lockdep_assert_held(&bch_fs_list_lock);
if (__bch2_uuid_to_fs(c->sb.uuid)) {
if (!c->opts.single_device &&
__bch2_uuid_to_fs(c->sb.uuid)) {
bch_err(c, "filesystem UUID already open");
return -EINVAL;
}
@ -710,7 +726,9 @@ static int bch2_fs_online(struct bch_fs *c)
bch2_fs_debug_init(c);
ret = kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ?:
ret = (!c->opts.single_device
? kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b)
: kobject_add(&c->kobj, NULL, "%s", c->name)) ?:
kobject_add(&c->internal, &c->kobj, "internal") ?:
kobject_add(&c->opts_dir, &c->kobj, "options") ?:
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
@ -741,7 +759,37 @@ err:
return ret;
}
static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
static int bch2_fs_init_rw(struct bch_fs *c)
{
if (test_bit(BCH_FS_rw_init_done, &c->flags))
return 0;
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
!(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
WQ_FREEZABLE, 0)))
return -BCH_ERR_ENOMEM_fs_other_alloc;
int ret = bch2_fs_btree_interior_update_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
bch2_fs_fs_io_buffered_init(c) ?:
bch2_fs_io_write_init(c) ?:
bch2_fs_journal_init(&c->journal);
if (ret)
return ret;
set_bit(BCH_FS_rw_init_done, &c->flags);
return 0;
}
static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts,
bch_sb_handles *sbs)
{
struct bch_fs *c;
struct printbuf name = PRINTBUF;
@ -784,18 +832,24 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_init(&c->times[i]);
bch2_fs_copygc_init(c);
bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
bch2_fs_btree_iter_init_early(c);
bch2_fs_btree_interior_update_init_early(c);
bch2_fs_journal_keys_init(c);
bch2_fs_allocator_background_init(c);
bch2_fs_allocator_foreground_init(c);
bch2_fs_rebalance_init(c);
bch2_fs_quota_init(c);
bch2_fs_btree_cache_init_early(&c->btree_cache);
bch2_fs_btree_gc_init_early(c);
bch2_fs_btree_interior_update_init_early(c);
bch2_fs_btree_iter_init_early(c);
bch2_fs_btree_key_cache_init_early(&c->btree_key_cache);
bch2_fs_btree_write_buffer_init_early(c);
bch2_fs_copygc_init(c);
bch2_fs_ec_init_early(c);
bch2_fs_journal_init_early(&c->journal);
bch2_fs_journal_keys_init(c);
bch2_fs_move_init(c);
bch2_fs_nocow_locking_init_early(c);
bch2_fs_quota_init(c);
bch2_fs_rebalance_init(c);
bch2_fs_sb_errors_init_early(c);
bch2_fs_subvolumes_init_early(c);
INIT_LIST_HEAD(&c->list);
@ -821,8 +875,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write];
c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
bch2_fs_btree_cache_init_early(&c->btree_cache);
mutex_init(&c->sectors_available_lock);
ret = percpu_init_rwsem(&c->mark_lock);
@ -855,14 +907,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
}
#endif
pr_uuid(&name, c->sb.user_uuid.b);
ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0;
if (ret)
goto err;
strscpy(c->name, name.buf, sizeof(c->name));
printbuf_exit(&name);
/* Compat: */
if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
!BCH_SB_JOURNAL_FLUSH_DELAY(sb))
@ -893,22 +937,24 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
goto err;
}
if (sbs->nr != 1 && !c->opts.single_device)
pr_uuid(&name, c->sb.user_uuid.b);
else
prt_bdevname(&name, sbs->data[0].bdev);
ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0;
if (ret)
goto err;
strscpy(c->name, name.buf, sizeof(c->name));
printbuf_exit(&name);
iter_size = sizeof(struct sort_iter) +
(btree_blocks(c) + 1) * 2 *
sizeof(struct sort_iter_set);
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
!(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
if (!(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) ||
!(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
WQ_FREEZABLE, 0)) ||
#ifndef BCH_WRITE_REF_DEBUG
percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
@ -928,29 +974,22 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
goto err;
}
ret = bch2_fs_counters_init(c) ?:
bch2_fs_sb_errors_init(c) ?:
ret =
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_io_clock_init(&c->io_clock[READ]) ?:
bch2_io_clock_init(&c->io_clock[WRITE]) ?:
bch2_fs_journal_init(&c->journal) ?:
bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
bch2_fs_btree_interior_update_init(c) ?:
bch2_fs_btree_gc_init(c) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
bch2_fs_subvolumes_init(c) ?:
bch2_fs_io_read_init(c) ?:
bch2_fs_io_write_init(c) ?:
bch2_fs_nocow_locking_init(c) ?:
bch2_fs_encryption_init(c) ?:
bch2_fs_compress_init(c) ?:
bch2_fs_counters_init(c) ?:
bch2_fs_ec_init(c) ?:
bch2_fs_vfs_init(c) ?:
bch2_fs_encryption_init(c) ?:
bch2_fs_fsio_init(c) ?:
bch2_fs_fs_io_buffered_init(c) ?:
bch2_fs_fs_io_direct_init(c);
bch2_fs_fs_io_direct_init(c) ?:
bch2_fs_io_read_init(c) ?:
bch2_fs_sb_errors_init(c) ?:
bch2_fs_vfs_init(c);
if (ret)
goto err;
@ -993,12 +1032,6 @@ static void print_mount_opts(struct bch_fs *c)
prt_str(&p, "starting version ");
bch2_version_to_text(&p, c->sb.version);
if (c->opts.read_only) {
prt_str(&p, " opts=");
first = false;
prt_printf(&p, "ro");
}
for (i = 0; i < bch2_opts_nr; i++) {
const struct bch_option *opt = &bch2_opt_table[i];
u64 v = bch2_opt_get_by_id(&c->opts, i);
@ -1098,6 +1131,9 @@ static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
{
struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx);
if (c->opts.single_device)
return -BCH_ERR_single_device_filesystem;
if (le16_to_cpu(sb->block_size) != block_sectors(c))
return -BCH_ERR_mismatched_block_size;
@ -1199,6 +1235,15 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
/* Device startup/shutdown: */
static void bch2_dev_io_ref_stop(struct bch_dev *ca, int rw)
{
if (!percpu_ref_is_zero(&ca->io_ref[rw])) {
reinit_completion(&ca->io_ref_completion[rw]);
percpu_ref_kill(&ca->io_ref[rw]);
wait_for_completion(&ca->io_ref_completion[rw]);
}
}
static void bch2_dev_release(struct kobject *kobj)
{
struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
@ -1208,6 +1253,9 @@ static void bch2_dev_release(struct kobject *kobj)
static void bch2_dev_free(struct bch_dev *ca)
{
WARN_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE]));
WARN_ON(!percpu_ref_is_zero(&ca->io_ref[READ]));
cancel_work_sync(&ca->io_error_work);
bch2_dev_unlink(ca);
@ -1226,7 +1274,8 @@ static void bch2_dev_free(struct bch_dev *ca)
bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]);
bch2_time_stats_quantiles_exit(&ca->io_latency[READ]);
percpu_ref_exit(&ca->io_ref);
percpu_ref_exit(&ca->io_ref[WRITE]);
percpu_ref_exit(&ca->io_ref[READ]);
#ifndef CONFIG_BCACHEFS_DEBUG
percpu_ref_exit(&ca->ref);
#endif
@ -1238,14 +1287,12 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
lockdep_assert_held(&c->state_lock);
if (percpu_ref_is_zero(&ca->io_ref))
if (percpu_ref_is_zero(&ca->io_ref[READ]))
return;
__bch2_dev_read_only(c, ca);
reinit_completion(&ca->io_ref_completion);
percpu_ref_kill(&ca->io_ref);
wait_for_completion(&ca->io_ref_completion);
bch2_dev_io_ref_stop(ca, READ);
bch2_dev_unlink(ca);
@ -1262,11 +1309,18 @@ static void bch2_dev_ref_complete(struct percpu_ref *ref)
}
#endif
static void bch2_dev_io_ref_complete(struct percpu_ref *ref)
static void bch2_dev_io_ref_read_complete(struct percpu_ref *ref)
{
struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref);
struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[READ]);
complete(&ca->io_ref_completion);
complete(&ca->io_ref_completion[READ]);
}
static void bch2_dev_io_ref_write_complete(struct percpu_ref *ref)
{
struct bch_dev *ca = container_of(ref, struct bch_dev, io_ref[WRITE]);
complete(&ca->io_ref_completion[WRITE]);
}
static void bch2_dev_unlink(struct bch_dev *ca)
@ -1330,7 +1384,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
kobject_init(&ca->kobj, &bch2_dev_ktype);
init_completion(&ca->ref_completion);
init_completion(&ca->io_ref_completion);
init_completion(&ca->io_ref_completion[READ]);
init_completion(&ca->io_ref_completion[WRITE]);
INIT_WORK(&ca->io_error_work, bch2_io_error_work);
@ -1356,7 +1411,9 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
bch2_dev_allocator_background_init(ca);
if (percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
if (percpu_ref_init(&ca->io_ref[READ], bch2_dev_io_ref_read_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
percpu_ref_init(&ca->io_ref[WRITE], bch2_dev_io_ref_write_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
!(ca->sb_read_scratch = kmalloc(BCH_SB_READ_SCRATCH_BUF_SIZE, GFP_KERNEL)) ||
bch2_dev_buckets_alloc(c, ca) ||
@ -1419,7 +1476,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
return -BCH_ERR_device_size_too_small;
}
BUG_ON(!percpu_ref_is_zero(&ca->io_ref));
BUG_ON(!percpu_ref_is_zero(&ca->io_ref[READ]));
BUG_ON(!percpu_ref_is_zero(&ca->io_ref[WRITE]));
ret = bch2_dev_journal_init(ca, sb->sb);
if (ret)
@ -1438,7 +1496,7 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
ca->dev = ca->disk_sb.bdev->bd_dev;
percpu_ref_reinit(&ca->io_ref);
percpu_ref_reinit(&ca->io_ref[READ]);
return 0;
}
@ -1466,11 +1524,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
struct printbuf name = PRINTBUF;
prt_bdevname(&name, ca->disk_sb.bdev);
if (c->sb.nr_devices == 1)
strscpy(c->name, name.buf, sizeof(c->name));
strscpy(ca->name, name.buf, sizeof(ca->name));
printbuf_exit(&name);
rebalance_wakeup(c);
@ -1535,19 +1589,18 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
static bool bch2_fs_may_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned i, flags = 0;
unsigned flags = 0;
if (c->opts.very_degraded)
switch (c->opts.degraded) {
case BCH_DEGRADED_very:
flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
if (c->opts.degraded)
break;
case BCH_DEGRADED_yes:
flags |= BCH_FORCE_IF_DEGRADED;
if (!c->opts.degraded &&
!c->opts.very_degraded) {
break;
default:
mutex_lock(&c->sb_lock);
for (i = 0; i < c->disk_sb.sb->nr_devices; i++) {
for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) {
if (!bch2_member_exists(c->disk_sb.sb, i))
continue;
@ -1561,6 +1614,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
}
}
mutex_unlock(&c->sb_lock);
break;
}
return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
@ -1568,6 +1622,8 @@ static bool bch2_fs_may_start(struct bch_fs *c)
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
{
bch2_dev_io_ref_stop(ca, WRITE);
/*
* The allocator thread itself allocates btree nodes, so stop it first:
*/
@ -1584,6 +1640,10 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
if (percpu_ref_is_zero(&ca->io_ref[WRITE]))
percpu_ref_reinit(&ca->io_ref[WRITE]);
bch2_dev_do_discards(ca);
}
@ -1731,7 +1791,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
return 0;
err:
if (ca->mi.state == BCH_MEMBER_STATE_rw &&
!percpu_ref_is_zero(&ca->io_ref))
!percpu_ref_is_zero(&ca->io_ref[READ]))
__bch2_dev_read_write(c, ca);
up_write(&c->state_lock);
return ret;
@ -1741,7 +1801,7 @@ err:
int bch2_dev_add(struct bch_fs *c, const char *path)
{
struct bch_opts opts = bch2_opts_empty();
struct bch_sb_handle sb;
struct bch_sb_handle sb = {};
struct bch_dev *ca = NULL;
struct printbuf errbuf = PRINTBUF;
struct printbuf label = PRINTBUF;
@ -2126,7 +2186,7 @@ static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r)
struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
struct bch_opts opts)
{
DARRAY(struct bch_sb_handle) sbs = { 0 };
bch_sb_handles sbs = {};
struct bch_fs *c = NULL;
struct bch_sb_handle *best = NULL;
struct printbuf errbuf = PRINTBUF;
@ -2179,7 +2239,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
goto err_print;
}
c = bch2_fs_alloc(best->sb, opts);
c = bch2_fs_alloc(best->sb, opts, &sbs);
ret = PTR_ERR_OR_ZERO(c);
if (ret)
goto err;

View File

@ -43,7 +43,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
BTREE_ITER_intent);
ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
bch2_btree_iter_traverse(trans, &iter) ?:
bch2_trans_update(trans, &iter, &k.k_i, 0));
bch_err_msg(c, ret, "update error");
if (ret)
@ -51,7 +51,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
pr_info("deleting once");
ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_delete_at(trans, &iter, 0));
bch_err_msg(c, ret, "delete error (first)");
if (ret)
@ -59,7 +59,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
pr_info("deleting twice");
ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_delete_at(trans, &iter, 0));
bch_err_msg(c, ret, "delete error (second)");
if (ret)
@ -84,7 +84,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
BTREE_ITER_intent);
ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
bch2_btree_iter_traverse(trans, &iter) ?:
bch2_trans_update(trans, &iter, &k.k_i, 0));
bch_err_msg(c, ret, "update error");
if (ret)
@ -94,7 +94,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
bch2_journal_flush_all_pins(&c->journal);
ret = commit_do(trans, NULL, NULL, 0,
bch2_btree_iter_traverse(&iter) ?:
bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_delete_at(trans, &iter, 0));
bch_err_msg(c, ret, "delete error");
if (ret)
@ -349,10 +349,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0);
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX))));
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX))));
BUG_ON(k.k);
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX))));
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX))));
BUG_ON(k.k);
bch2_trans_iter_exit(trans, &iter);
@ -369,10 +369,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr)
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), 0);
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX))));
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX))));
BUG_ON(k.k);
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX))));
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX))));
BUG_ON(k.k);
bch2_trans_iter_exit(trans, &iter);
@ -488,7 +488,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
trans = bch2_trans_get(c);
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, snapid_lo), 0);
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX))));
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX))));
BUG_ON(k.k->p.snapshot != U32_MAX);
@ -602,9 +602,9 @@ static int rand_lookup(struct bch_fs *c, u64 nr)
SPOS(0, 0, U32_MAX), 0);
for (i = 0; i < nr; i++) {
bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX));
bch2_btree_iter_set_pos(trans, &iter, SPOS(0, test_rand(), U32_MAX));
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(trans, &iter)));
ret = bkey_err(k);
if (ret)
break;
@ -623,9 +623,9 @@ static int rand_mixed_trans(struct btree_trans *trans,
struct bkey_s_c k;
int ret;
bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX));
bch2_btree_iter_set_pos(trans, iter, SPOS(0, pos, U32_MAX));
k = bch2_btree_iter_peek(iter);
k = bch2_btree_iter_peek(trans, iter);
ret = bkey_err(k);
bch_err_msg(trans->c, ret, "lookup error");
if (ret)
@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
BTREE_ITER_intent);
k = bch2_btree_iter_peek_max(&iter, POS(0, U64_MAX));
k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX));
ret = bkey_err(k);
if (ret)
goto err;

View File

@ -339,6 +339,11 @@ DEFINE_EVENT(bio, io_read_reuse_race,
TP_ARGS(bio)
);
DEFINE_EVENT(bio, io_read_fail_and_poison,
TP_PROTO(struct bio *bio),
TP_ARGS(bio)
);
/* ec.c */
TRACE_EVENT(stripe_create,

View File

@ -55,15 +55,16 @@ static inline size_t buf_pages(void *p, size_t len)
PAGE_SIZE);
}
static inline void *bch2_kvmalloc(size_t n, gfp_t flags)
static inline void *bch2_kvmalloc_noprof(size_t n, gfp_t flags)
{
void *p = unlikely(n >= INT_MAX)
? vmalloc(n)
: kvmalloc(n, flags & ~__GFP_ZERO);
? vmalloc_noprof(n)
: kvmalloc_noprof(n, flags & ~__GFP_ZERO);
if (p && (flags & __GFP_ZERO))
memset(p, 0, n);
return p;
}
#define bch2_kvmalloc(...) alloc_hooks(bch2_kvmalloc_noprof(__VA_ARGS__))
#define init_heap(heap, _size, gfp) \
({ \

View File

@ -168,7 +168,7 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
int type, int flags)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter = { NULL };
struct btree_iter inode_iter = {};
int ret;
ret = bch2_subvol_is_ro_trans(trans, inum.subvol) ?:

View File

@ -7,6 +7,7 @@ use bch_bindgen::btree::BtreeNodeIter;
use bch_bindgen::btree::BtreeTrans;
use bch_bindgen::fs::Fs;
use bch_bindgen::opt_set;
use bch_bindgen::c::bch_degraded_actions;
use clap::Parser;
use std::io::{stdout, IsTerminal};
@ -167,8 +168,7 @@ fn cmd_list_inner(opt: &Cli) -> anyhow::Result<()> {
opt_set!(fs_opts, nochanges, 1);
opt_set!(fs_opts, read_only, 1);
opt_set!(fs_opts, norecovery, 1);
opt_set!(fs_opts, degraded, 1);
opt_set!(fs_opts, very_degraded, 1);
opt_set!(fs_opts, degraded, bch_degraded_actions::BCH_DEGRADED_very as u8);
opt_set!(
fs_opts,
errors,