Update bcachefs sources to ba3f652e4cdc bcachefs: Decrypt before checking if we read the right btree node

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-11-20 13:29:10 -05:00
parent 969305f122
commit 5c811f012b
34 changed files with 533 additions and 383 deletions

View File

@ -1 +1 @@
c53ba9651da768e74b787eb40bc05fd56e9ca5ef ba3f652e4cdc86313cb13380efd59f1e6e6f484f

View File

@ -34,15 +34,9 @@
#define bio_iter_iovec(bio, iter) \ #define bio_iter_iovec(bio, iter) \
bvec_iter_bvec((bio)->bi_io_vec, (iter)) bvec_iter_bvec((bio)->bi_io_vec, (iter))
#define bio_iter_page(bio, iter) \
bvec_iter_page((bio)->bi_io_vec, (iter))
#define bio_iter_len(bio, iter) \ #define bio_iter_len(bio, iter) \
bvec_iter_len((bio)->bi_io_vec, (iter)) bvec_iter_len((bio)->bi_io_vec, (iter))
#define bio_iter_offset(bio, iter) \
bvec_iter_offset((bio)->bi_io_vec, (iter))
#define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter)
#define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter)
#define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) #define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter)
#define bio_multiple_segments(bio) \ #define bio_multiple_segments(bio) \
@ -99,20 +93,6 @@ static inline unsigned int bio_cur_bytes(struct bio *bio)
return bio->bi_iter.bi_size; return bio->bi_iter.bi_size;
} }
static inline void *bio_data(struct bio *bio)
{
if (bio_has_data(bio))
return page_address(bio_page(bio)) + bio_offset(bio);
return NULL;
}
#define __bio_kmap_atomic(bio, iter) \
(kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) + \
bio_iter_iovec((bio), (iter)).bv_offset)
#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
static inline struct bio_vec *bio_next_segment(const struct bio *bio, static inline struct bio_vec *bio_next_segment(const struct bio *bio,
struct bvec_iter_all *iter) struct bvec_iter_all *iter)
{ {
@ -238,7 +218,6 @@ struct bio *bio_alloc_bioset(struct block_device *, unsigned,
extern void bio_put(struct bio *); extern void bio_put(struct bio *);
int bio_add_page(struct bio *, struct page *, unsigned, unsigned);
void bio_add_virt_nofail(struct bio *, void *, unsigned); void bio_add_virt_nofail(struct bio *, void *, unsigned);
static inline void bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned len) static inline void bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned len)
@ -265,8 +244,6 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
struct bio *src, struct bvec_iter *src_iter); struct bio *src, struct bvec_iter *src_iter);
extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_copy_data(struct bio *dst, struct bio *src);
void bio_free_pages(struct bio *bio);
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
static inline void zero_fill_bio(struct bio *bio) static inline void zero_fill_bio(struct bio *bio)
@ -284,30 +261,13 @@ do { \
(dst)->bi_bdev = (src)->bi_bdev; \ (dst)->bi_bdev = (src)->bi_bdev; \
} while (0) } while (0)
static inline void *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
{
return page_address(bvec->bv_page) + bvec->bv_offset;
}
static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
{
*flags = 0;
}
static inline void *bvec_kmap_local(struct bio_vec *bvec) static inline void *bvec_kmap_local(struct bio_vec *bvec)
{ {
return page_address(bvec->bv_page) + bvec->bv_offset; return bvec_virt(bvec);
} }
static inline void bvec_kunmap_local(char *buffer) {} static inline void bvec_kunmap_local(char *buffer) {}
static inline void *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
unsigned long *flags)
{
return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags);
}
#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags)
#define bio_kmap_irq(bio, flags) \ #define bio_kmap_irq(bio, flags) \
__bio_kmap_irq((bio), (bio)->bi_iter, (flags)) __bio_kmap_irq((bio), (bio)->bi_iter, (flags))
#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)

View File

@ -27,9 +27,8 @@
* was unsigned short, but we might as well be ready for > 64kB I/O pages * was unsigned short, but we might as well be ready for > 64kB I/O pages
*/ */
struct bio_vec { struct bio_vec {
struct page *bv_page; void *bv_addr;
unsigned int bv_len; unsigned int bv_len;
unsigned int bv_offset;
}; };
struct bvec_iter { struct bvec_iter {
@ -53,21 +52,22 @@ struct bvec_iter_all {
*/ */
#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) #define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx])
#define bvec_iter_page(bvec, iter) \ static inline void *bvec_virt(struct bio_vec *bv)
(__bvec_iter_bvec((bvec), (iter))->bv_page) {
return bv->bv_addr;
}
#define bvec_iter_addr(bvec, iter) \
(__bvec_iter_bvec((bvec), (iter))->bv_addr + (iter).bi_bvec_done)
#define bvec_iter_len(bvec, iter) \ #define bvec_iter_len(bvec, iter) \
min((iter).bi_size, \ min((iter).bi_size, \
__bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done) __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
#define bvec_iter_offset(bvec, iter) \
(__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
#define bvec_iter_bvec(bvec, iter) \ #define bvec_iter_bvec(bvec, iter) \
((struct bio_vec) { \ ((struct bio_vec) { \
.bv_page = bvec_iter_page((bvec), (iter)), \ .bv_addr = bvec_iter_addr((bvec), (iter)), \
.bv_len = bvec_iter_len((bvec), (iter)), \ .bv_len = bvec_iter_len((bvec), (iter)), \
.bv_offset = bvec_iter_offset((bvec), (iter)), \
}) })
static inline void bvec_iter_advance(const struct bio_vec *bv, static inline void bvec_iter_advance(const struct bio_vec *bv,

View File

@ -593,6 +593,7 @@ struct bch_dev {
* Committed by bch2_write_super() -> bch_fs_mi_update() * Committed by bch2_write_super() -> bch_fs_mi_update()
*/ */
struct bch_member_cpu mi; struct bch_member_cpu mi;
u64 btree_allocated_bitmap_gc;
atomic64_t errors[BCH_MEMBER_ERROR_NR]; atomic64_t errors[BCH_MEMBER_ERROR_NR];
unsigned long write_errors_start; unsigned long write_errors_start;
@ -865,6 +866,8 @@ struct bch_fs {
struct closure sb_write; struct closure sb_write;
struct mutex sb_lock; struct mutex sb_lock;
struct delayed_work maybe_schedule_btree_bitmap_gc;
/* snapshot.c: */ /* snapshot.c: */
struct snapshot_table __rcu *snapshots; struct snapshot_table __rcu *snapshots;
struct mutex snapshot_table_lock; struct mutex snapshot_table_lock;
@ -1037,7 +1040,7 @@ struct bch_fs {
struct bio_set bio_write; struct bio_set bio_write;
struct bio_set replica_set; struct bio_set replica_set;
struct mutex bio_bounce_pages_lock; struct mutex bio_bounce_pages_lock;
mempool_t bio_bounce_pages; mempool_t bio_bounce_bufs;
struct bucket_nocow_lock_table struct bucket_nocow_lock_table
nocow_locks; nocow_locks;
struct rhashtable promote_table; struct rhashtable promote_table;

View File

@ -661,16 +661,13 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
atomic64_set(&c->key_version, k.k->bversion.lo); atomic64_set(&c->key_version, k.k->bversion.lo);
} }
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k), if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked_nogc(c, k),
trans, btree_bitmap_not_marked, trans, btree_bitmap_not_marked,
"btree ptr not marked in member info btree allocated bitmap\n%s", "btree ptr not marked in member info btree allocated bitmap\n%s",
(printbuf_reset(&buf), (printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) { buf.buf)))
guard(mutex)(&c->sb_lock);
bch2_dev_btree_bitmap_mark(c, k); bch2_dev_btree_bitmap_mark(c, k);
bch2_write_super(c);
}
/* /*
* We require a commit before key_trigger() because * We require a commit before key_trigger() because

View File

@ -639,10 +639,12 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as)
struct bch_fs *c = as->c; struct bch_fs *c = as->c;
guard(mutex)(&c->sb_lock); guard(mutex)(&c->sb_lock);
bool write_sb = false;
darray_for_each(as->new_nodes, i) darray_for_each(as->new_nodes, i)
bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(&i->key)); bch2_dev_btree_bitmap_mark_locked(c, bkey_i_to_s_c(&i->key), &write_sb);
bch2_write_super(c); if (write_sb)
bch2_write_super(c);
} }
static void bkey_strip_reconcile(const struct bch_fs *c, struct bkey_s k) static void bkey_strip_reconcile(const struct bch_fs *c, struct bkey_s k)
@ -2133,18 +2135,35 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
sib_u64s = btree_node_u64s_with_format(b->nr, &b->format, &new_f) + sib_u64s = btree_node_u64s_with_format(b->nr, &b->format, &new_f) +
btree_node_u64s_with_format(m->nr, &m->format, &new_f); btree_node_u64s_with_format(m->nr, &m->format, &new_f);
if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) { if (trace_btree_node_merge_attempt_enabled()) {
sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c); CLASS(printbuf, buf)();
sib_u64s /= 2; guard(printbuf_indent)(&buf);
sib_u64s += BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
bch2_btree_pos_to_text(&buf, c, prev);
prt_printf(&buf, "live u64s %u (%zu%% full)\n",
prev->nr.live_u64s,
prev->nr.live_u64s * 100 / btree_max_u64s(c));
bch2_btree_pos_to_text(&buf, c, next);
prt_printf(&buf, "live u64s %u (%zu%% full)\n",
next->nr.live_u64s,
next->nr.live_u64s * 100 / btree_max_u64s(c));
prt_printf(&buf, "merged would have %zu threshold %u\n",
sib_u64s, c->btree_foreground_merge_threshold);
trace_btree_node_merge_attempt(c, buf.buf);
} }
count_event(c, btree_node_merge_attempt);
sib_u64s = min(sib_u64s, btree_max_u64s(c)); if (sib_u64s > c->btree_foreground_merge_threshold) {
sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1); if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c))
b->sib_u64s[sib] = sib_u64s; sib_u64s -= (sib_u64s - BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) / 2;
if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold) sib_u64s = min(sib_u64s, btree_max_u64s(c));
sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1);
b->sib_u64s[sib] = sib_u64s;
goto out; goto out;
}
parent = btree_node_parent(trans->paths + path, b); parent = btree_node_parent(trans->paths + path, b);
as = bch2_btree_update_start(trans, trans->paths + path, level, false, as = bch2_btree_update_start(trans, trans->paths + path, level, false,

View File

@ -235,27 +235,30 @@ static int read_btree_nodes_worker(void *p)
goto err; goto err;
} }
u64 buckets_to_scan = 0;
for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++) for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
buckets_to_scan += c->sb.version_upgrade_complete < bcachefs_metadata_version_mi_btree_bitmap ||
bch2_dev_btree_bitmap_marked_sectors_any(ca, bucket_to_sector(ca, bucket), ca->mi.bucket_size);
u64 buckets_scanned = 0;
for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++) {
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
!bch2_dev_btree_bitmap_marked_sectors_any(ca, bucket_to_sector(ca, bucket), ca->mi.bucket_size))
continue;
for (unsigned bucket_offset = 0; for (unsigned bucket_offset = 0;
bucket_offset + btree_sectors(c) <= ca->mi.bucket_size; bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
bucket_offset += btree_sectors(c)) { bucket_offset += btree_sectors(c))
if (time_after(jiffies, last_print + HZ * 30)) { try_read_btree_node(w->f, ca, b, bio, bucket_to_sector(ca, bucket) + bucket_offset);
u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
bch_info(ca, "%s: %2u%% done", __func__, buckets_scanned++;
(unsigned) div64_u64(cur_sector * 100, end_sector));
last_print = jiffies;
}
u64 sector = bucket * ca->mi.bucket_size + bucket_offset; if (time_after(jiffies, last_print + HZ * 30)) {
bch_info(ca, "%s: %2u%% done", __func__,
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap && (unsigned) div64_u64(buckets_scanned * 100, buckets_to_scan));
!bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c))) last_print = jiffies;
continue;
try_read_btree_node(w->f, ca, b, bio, sector);
} }
}
err: err:
if (b) if (b)
__btree_node_data_free(b); __btree_node_data_free(b);

View File

@ -660,33 +660,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
"bad magic: want %llx, got %llx", "bad magic: want %llx, got %llx",
bset_magic(c), le64_to_cpu(b->data->magic)); bset_magic(c), le64_to_cpu(b->data->magic));
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
struct bch_btree_ptr_v2 *bp =
&bkey_i_to_btree_ptr_v2(&b->key)->v;
bch2_bpos_to_text(&buf, b->data->min_key);
prt_str(&buf, "-");
bch2_bpos_to_text(&buf, b->data->max_key);
btree_err_on(b->data->keys.seq != bp->seq,
-BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, NULL,
btree_node_bad_seq,
"got wrong btree node: got\n%s",
(printbuf_reset(&buf),
bch2_btree_node_header_to_text(&buf, b->data),
buf.buf));
} else {
btree_err_on(!b->data->keys.seq,
-BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, NULL,
btree_node_bad_seq,
"bad btree header: seq 0\n%s",
(printbuf_reset(&buf),
bch2_btree_node_header_to_text(&buf, b->data),
buf.buf));
}
while (b->written < (ptr_written ?: btree_sectors(c))) { while (b->written < (ptr_written ?: btree_sectors(c))) {
unsigned sectors; unsigned sectors;
bool first = !b->written; bool first = !b->written;
@ -743,6 +716,33 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
goto fsck_err; goto fsck_err;
} }
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
struct bch_btree_ptr_v2 *bp =
&bkey_i_to_btree_ptr_v2(&b->key)->v;
bch2_bpos_to_text(&buf, b->data->min_key);
prt_str(&buf, "-");
bch2_bpos_to_text(&buf, b->data->max_key);
btree_err_on(b->data->keys.seq != bp->seq,
-BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, NULL,
btree_node_bad_seq,
"got wrong btree node: got\n%s",
(printbuf_reset(&buf),
bch2_btree_node_header_to_text(&buf, b->data),
buf.buf));
} else {
btree_err_on(!b->data->keys.seq,
-BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, NULL,
btree_node_bad_seq,
"bad btree header: seq 0\n%s",
(printbuf_reset(&buf),
bch2_btree_node_header_to_text(&buf, b->data),
buf.buf));
}
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
-BCH_ERR_btree_node_read_err_incompatible, -BCH_ERR_btree_node_read_err_incompatible,

View File

@ -202,15 +202,14 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
__bio_for_each_segment(bv, bio, *iter, *iter) { __bio_for_each_segment(bv, bio, *iter, *iter) {
void *p = kmap_local_page(bv.bv_page) + bv.bv_offset; void *p = bvec_kmap_local(&bv);
bch2_checksum_update(&state, p, bv.bv_len); bch2_checksum_update(&state, p, bv.bv_len);
kunmap_local(p); kunmap_local(p);
} }
#else #else
__bio_for_each_bvec(bv, bio, *iter, *iter) __bio_for_each_bvec(bv, bio, *iter, *iter)
bch2_checksum_update(&state, page_address(bv.bv_page) + bv.bv_offset, bch2_checksum_update(&state, bvec_virt(&bv), bv.bv_len);
bv.bv_len);
#endif #endif
return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) }; return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) };
} }
@ -225,16 +224,14 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
__bio_for_each_segment(bv, bio, *iter, *iter) { __bio_for_each_segment(bv, bio, *iter, *iter) {
void *p = kmap_local_page(bv.bv_page) + bv.bv_offset; void *p = bvec_kmap_local(&bv);
poly1305_update(&dctx, p, bv.bv_len); poly1305_update(&dctx, p, bv.bv_len);
kunmap_local(p); kunmap_local(p);
} }
#else #else
__bio_for_each_bvec(bv, bio, *iter, *iter) __bio_for_each_bvec(bv, bio, *iter, *iter)
poly1305_update(&dctx, poly1305_update(&dctx, bvec_virt(&bv), bv.bv_len);
page_address(bv.bv_page) + bv.bv_offset,
bv.bv_len);
#endif #endif
poly1305_final(&dctx, digest); poly1305_final(&dctx, digest);

View File

@ -95,12 +95,12 @@ static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
void *expected_start = NULL; void *expected_start = NULL;
__bio_for_each_bvec(bv, bio, iter, start) { __bio_for_each_bvec(bv, bio, iter, start) {
if (expected_start && void *bv_addr = bvec_virt(&bv);
expected_start != page_address(bv.bv_page) + bv.bv_offset)
if (expected_start && expected_start != bv_addr)
return false; return false;
expected_start = page_address(bv.bv_page) + expected_start = bv_addr + bv.bv_len;
bv.bv_offset + bv.bv_len;
} }
return true; return true;
@ -109,27 +109,27 @@ static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio, static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
struct bvec_iter start, int rw) struct bvec_iter start, int rw)
{ {
struct bio_vec bv;
struct bvec_iter iter;
unsigned nr_pages = 0;
struct page *stack_pages[16];
struct page **pages = NULL;
void *data;
BUG_ON(start.bi_size > c->opts.encoded_extent_max); BUG_ON(start.bi_size > c->opts.encoded_extent_max);
if (!PageHighMem(bio_iter_page(bio, start)) && #ifndef CONFIG_HIGHMEM
bio_phys_contig(bio, start)) if (bio_phys_contig(bio, start))
return (struct bbuf) { return (struct bbuf) {
.c = c, .c = c,
.b = page_address(bio_iter_page(bio, start)) + .b = bvec_virt(&bio_iter_iovec(bio, start)),
bio_iter_offset(bio, start),
.type = BB_none, .type = BB_none,
.rw = rw .rw = rw
}; };
#endif
#ifdef __KERNEL__
/* check if we can map the pages contiguously: */ /* check if we can map the pages contiguously: */
struct bio_vec bv;
struct bvec_iter iter;
unsigned nr_pages = 0;
__bio_for_each_segment(bv, bio, iter, start) { __bio_for_each_segment(bv, bio, iter, start) {
BUG_ON(bv.bv_offset + bv.bv_len > PAGE_SIZE);
if (iter.bi_size != start.bi_size && if (iter.bi_size != start.bi_size &&
bv.bv_offset) bv.bv_offset)
return bio_bounce(c, bio, start, rw); return bio_bounce(c, bio, start, rw);
@ -143,7 +143,8 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages); BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
pages = nr_pages > ARRAY_SIZE(stack_pages) struct page *stack_pages[16];
struct page **pages = nr_pages > ARRAY_SIZE(stack_pages)
? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS) ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
: stack_pages; : stack_pages;
if (!pages) if (!pages)
@ -153,19 +154,20 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
__bio_for_each_segment(bv, bio, iter, start) __bio_for_each_segment(bv, bio, iter, start)
pages[nr_pages++] = bv.bv_page; pages[nr_pages++] = bv.bv_page;
data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); void *data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
if (pages != stack_pages) if (pages != stack_pages)
kfree(pages); kfree(pages);
if (!data) if (data)
return bio_bounce(c, bio, start, rw); return (struct bbuf) {
c,
data + bio_iter_offset(bio, start),
BB_vmap,
rw
};
#endif /* __KERNEL__ */
return (struct bbuf) { return bio_bounce(c, bio, start, rw);
c,
data + bio_iter_offset(bio, start),
BB_vmap,
rw
};
} }
static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw) static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)

View File

@ -28,6 +28,8 @@
#include "util/util.h" #include "util/util.h"
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
#include <linux/module.h>
static int bch2_force_read_device = -1; static int bch2_force_read_device = -1;
module_param_named(force_read_device, bch2_force_read_device, int, 0644); module_param_named(force_read_device, bch2_force_read_device, int, 0644);

View File

@ -344,7 +344,7 @@ err_remove_hash:
BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
bch_promote_params)); bch_promote_params));
err: err:
bio_free_pages(&op->write.op.wbio.bio); bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
/* We may have added to the rhashtable and thus need rcu freeing: */ /* We may have added to the rhashtable and thus need rcu freeing: */
kfree_rcu(op, rcu); kfree_rcu(op, rcu);
err_put: err_put:
@ -1253,7 +1253,7 @@ retry_pick:
&c->bio_read_split), &c->bio_read_split),
orig); orig);
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); bch2_bio_alloc_pages_pool(c, &rbio->bio, 512, sectors << 9);
rbio->bounce = true; rbio->bounce = true;
} else if (flags & BCH_READ_must_clone) { } else if (flags & BCH_READ_must_clone) {
/* /*
@ -1591,16 +1591,29 @@ void bch2_fs_io_read_exit(struct bch_fs *c)
rhashtable_destroy(&c->promote_table); rhashtable_destroy(&c->promote_table);
bioset_exit(&c->bio_read_split); bioset_exit(&c->bio_read_split);
bioset_exit(&c->bio_read); bioset_exit(&c->bio_read);
mempool_exit(&c->bio_bounce_pages); mempool_exit(&c->bio_bounce_bufs);
}
static void *bio_bounce_buf_alloc_fn(gfp_t gfp, void *pool_data)
{
return (void *) __get_free_pages(gfp, PAGE_ALLOC_COSTLY_ORDER);
}
static void bio_bounce_buf_free_fn(void *p, void *pool_data)
{
free_pages((unsigned long) p, PAGE_ALLOC_COSTLY_ORDER);
} }
int bch2_fs_io_read_init(struct bch_fs *c) int bch2_fs_io_read_init(struct bch_fs *c)
{ {
if (mempool_init_page_pool(&c->bio_bounce_pages, if (mempool_init(&c->bio_bounce_bufs,
max_t(unsigned, max_t(unsigned,
c->opts.btree_node_size, c->opts.btree_node_size,
c->opts.encoded_extent_max) / c->opts.encoded_extent_max) /
PAGE_SIZE, 0)) BIO_BOUNCE_BUF_POOL_LEN,
bio_bounce_buf_alloc_fn,
bio_bounce_buf_free_fn,
NULL))
return bch_err_throw(c, ENOMEM_bio_bounce_pages_init); return bch_err_throw(c, ENOMEM_bio_bounce_pages_init);
if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),

View File

@ -7,6 +7,8 @@
#include "extents_types.h" #include "extents_types.h"
#include "data/reflink.h" #include "data/reflink.h"
#define BIO_BOUNCE_BUF_POOL_LEN (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
void bch2_dev_congested_to_text(struct printbuf *, struct bch_dev *); void bch2_dev_congested_to_text(struct printbuf *, struct bch_dev *);
#endif #endif

View File

@ -113,42 +113,41 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio) void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
{ {
struct bvec_iter_all iter; for (struct bio_vec *bv = bio->bi_io_vec;
struct bio_vec *bv; bv < bio->bi_io_vec + bio->bi_vcnt;
bv++) {
void *p = bvec_virt(bv);
bio_for_each_segment_all(bv, bio, iter) if (bv->bv_len == BIO_BOUNCE_BUF_POOL_LEN)
mempool_free(bv->bv_page, &c->bio_bounce_pages); mempool_free(p, &c->bio_bounce_bufs);
else
free_pages((unsigned long) p, get_order(bv->bv_len));
}
bio->bi_vcnt = 0; bio->bi_vcnt = 0;
} }
static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool) static void __bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
unsigned bs, size_t size)
{ {
if (likely(!*using_mempool)) { mutex_lock(&c->bio_bounce_pages_lock);
struct page *page = alloc_page(GFP_NOFS);
if (likely(page))
return page;
mutex_lock(&c->bio_bounce_pages_lock); while (bio->bi_iter.bi_size < size)
*using_mempool = true; bio_add_virt_nofail(bio,
} mempool_alloc(&c->bio_bounce_bufs, GFP_NOFS),
return mempool_alloc(&c->bio_bounce_pages, GFP_NOFS); BIO_BOUNCE_BUF_POOL_LEN);
bio->bi_iter.bi_size = min(bio->bi_iter.bi_size, size);
mutex_unlock(&c->bio_bounce_pages_lock);
} }
void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
size_t size) unsigned bs, size_t size)
{ {
bool using_mempool = false; bch2_bio_alloc_pages(bio, c->opts.block_size, size, GFP_NOFS);
while (size) { if (bio->bi_iter.bi_size < size)
struct page *page = __bio_alloc_page_pool(c, &using_mempool); __bch2_bio_alloc_pages_pool(c, bio, bs, size);
unsigned len = min_t(size_t, PAGE_SIZE, size);
BUG_ON(!bio_add_page(bio, page, len, 0));
size -= len;
}
if (using_mempool)
mutex_unlock(&c->bio_bounce_pages_lock);
} }
/* Extent update path: */ /* Extent update path: */
@ -837,23 +836,22 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
return bio; return bio;
} }
wbio->bounce = true; wbio->bounce = true;
/* /*
* We can't use mempool for more than c->sb.encoded_extent_max * We can't use mempool for more than c->sb.encoded_extent_max
* worth of pages, but we'd like to allocate more if we can: * worth of pages, but we'd like to allocate more if we can:
*/ */
bch2_bio_alloc_pages_pool(c, bio, bch2_bio_alloc_pages(bio,
min_t(unsigned, output_available, c->opts.block_size,
c->opts.encoded_extent_max)); output_available,
GFP_NOFS);
if (bio->bi_iter.bi_size < output_available) unsigned required = min(output_available, c->opts.encoded_extent_max);
*page_alloc_failed =
bch2_bio_alloc_pages(bio, if (unlikely(bio->bi_iter.bi_size < required))
c->opts.block_size, __bch2_bio_alloc_pages_pool(c, bio, c->opts.block_size, required);
output_available -
bio->bi_iter.bi_size,
GFP_NOFS) != 0;
return bio; return bio;
} }

View File

@ -9,7 +9,7 @@
container_of((_bio), struct bch_write_bio, bio) container_of((_bio), struct bch_write_bio, bio)
void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *); void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t); void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, unsigned, size_t);
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
enum bch_data_type, const struct bkey_i *, bool); enum bch_data_type, const struct bkey_i *, bool);

View File

@ -556,6 +556,11 @@ DEFINE_EVENT(fs_str, btree_node_rewrite,
TP_ARGS(c, str) TP_ARGS(c, str)
); );
DEFINE_EVENT(fs_str, btree_node_merge_attempt,
TP_PROTO(struct bch_fs *c, const char *str),
TP_ARGS(c, str)
);
DEFINE_EVENT(fs_str, btree_node_merge, DEFINE_EVENT(fs_str, btree_node_merge,
TP_PROTO(struct bch_fs *c, const char *str), TP_PROTO(struct bch_fs *c, const char *str),
TP_ARGS(c, str) TP_ARGS(c, str)

View File

@ -331,6 +331,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
bch2_time_stats_quantiles_init(&ca->io_latency[WRITE]); bch2_time_stats_quantiles_init(&ca->io_latency[WRITE]);
ca->mi = bch2_mi_to_cpu(member); ca->mi = bch2_mi_to_cpu(member);
ca->btree_allocated_bitmap_gc = le64_to_cpu(member->btree_allocated_bitmap);
for (i = 0; i < ARRAY_SIZE(member->errors); i++) for (i = 0; i < ARRAY_SIZE(member->errors); i++)
atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i])); atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i]));

View File

@ -264,11 +264,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
unsigned clean_passes = 0; unsigned clean_passes = 0;
u64 seq = 0; u64 seq = 0;
bch2_maybe_schedule_btree_bitmap_gc_stop(c);
bch2_fs_ec_stop(c); bch2_fs_ec_stop(c);
bch2_open_buckets_stop(c, NULL, true); bch2_open_buckets_stop(c, NULL, true);
bch2_reconcile_stop(c); bch2_reconcile_stop(c);
bch2_copygc_stop(c); bch2_copygc_stop(c);
bch2_fs_ec_flush(c); bch2_fs_ec_flush(c);
cancel_delayed_work_sync(&c->maybe_schedule_btree_bitmap_gc);
bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu", bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
journal_cur_seq(&c->journal)); journal_cur_seq(&c->journal));
@ -524,6 +526,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch2_do_invalidates(c); bch2_do_invalidates(c);
bch2_do_stripe_deletes(c); bch2_do_stripe_deletes(c);
bch2_do_pending_node_rewrites(c); bch2_do_pending_node_rewrites(c);
bch2_maybe_schedule_btree_bitmap_gc(c);
return 0; return 0;
} }

View File

@ -176,31 +176,43 @@ void bch2_recovery_pass_set_no_ratelimit(struct bch_fs *c,
} }
} }
static bool bch2_recovery_pass_want_ratelimit(struct bch_fs *c, enum bch_recovery_pass pass) static bool bch2_recovery_pass_entry_get_locked(struct bch_fs *c, enum bch_recovery_pass pass,
struct recovery_pass_entry *e)
{ {
enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass);
bool ret = false;
lockdep_assert_held(&c->sb_lock); lockdep_assert_held(&c->sb_lock);
struct bch_sb_field_recovery_passes *r = struct bch_sb_field_recovery_passes *r =
bch2_sb_field_get(c->disk_sb.sb, recovery_passes); bch2_sb_field_get(c->disk_sb.sb, recovery_passes);
if (stable < recovery_passes_nr_entries(r)) { enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass);
struct recovery_pass_entry *i = r->start + stable; bool found = stable < recovery_passes_nr_entries(r);
if (found)
*e = r->start[stable];
/* return found;
* Ratelimit if the last runtime was more than 1% of the time }
* since we last ran
*/
ret = (u64) le32_to_cpu(i->last_runtime) * 100 >
ktime_get_real_seconds() - le64_to_cpu(i->last_run);
if (BCH_RECOVERY_PASS_NO_RATELIMIT(i)) static bool bch2_recovery_pass_want_ratelimit_locked(struct bch_fs *c, enum bch_recovery_pass pass,
ret = false; unsigned runtime_fraction)
} {
struct recovery_pass_entry e;
if (!bch2_recovery_pass_entry_get_locked(c, pass, &e))
return false;
return ret; /*
* Ratelimit if the last runtime was more than 1% of the time
* since we last ran
*/
return !BCH_RECOVERY_PASS_NO_RATELIMIT(&e) &&
(u64) le32_to_cpu(e.last_runtime) * runtime_fraction >
ktime_get_real_seconds() - le64_to_cpu(e.last_run);
}
bool bch2_recovery_pass_want_ratelimit(struct bch_fs *c, enum bch_recovery_pass pass,
unsigned runtime_fraction)
{
guard(mutex)(&c->sb_lock);
return bch2_recovery_pass_want_ratelimit_locked(c, pass, runtime_fraction);
} }
const struct bch_sb_field_ops bch_sb_field_ops_recovery_passes = { const struct bch_sb_field_ops bch_sb_field_ops_recovery_passes = {
@ -311,7 +323,7 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
*flags |= RUN_RECOVERY_PASS_nopersistent; *flags |= RUN_RECOVERY_PASS_nopersistent;
if ((*flags & RUN_RECOVERY_PASS_ratelimit) && if ((*flags & RUN_RECOVERY_PASS_ratelimit) &&
!bch2_recovery_pass_want_ratelimit(c, pass)) !bch2_recovery_pass_want_ratelimit_locked(c, pass, 100))
*flags &= ~RUN_RECOVERY_PASS_ratelimit; *flags &= ~RUN_RECOVERY_PASS_ratelimit;
/* /*
@ -451,7 +463,7 @@ int bch2_require_recovery_pass(struct bch_fs *c,
guard(mutex)(&c->sb_lock); guard(mutex)(&c->sb_lock);
if (bch2_recovery_pass_want_ratelimit(c, pass)) if (bch2_recovery_pass_want_ratelimit_locked(c, pass, 100))
return 0; return 0;
enum bch_run_recovery_pass_flags flags = 0; enum bch_run_recovery_pass_flags flags = 0;

View File

@ -46,6 +46,8 @@ static inline int bch2_recovery_cancelled(struct bch_fs *c)
return 0; return 0;
} }
bool bch2_recovery_pass_want_ratelimit(struct bch_fs *, enum bch_recovery_pass, unsigned);
int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *,

View File

@ -66,6 +66,7 @@
x(delete_dead_inodes, 32, PASS_ALWAYS) \ x(delete_dead_inodes, 32, PASS_ALWAYS) \
x(fix_reflink_p, 33, 0) \ x(fix_reflink_p, 33, 0) \
x(set_fs_needs_reconcile, 34, 0) \ x(set_fs_needs_reconcile, 34, 0) \
x(btree_bitmap_gc, 46, PASS_ONLINE) \
x(lookup_root_inode, 42, PASS_ALWAYS|PASS_SILENT) x(lookup_root_inode, 42, PASS_ALWAYS|PASS_SILENT)
/* We normally enumerate recovery passes in the order we run them: */ /* We normally enumerate recovery passes in the order we run them: */

View File

@ -63,10 +63,12 @@ void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
int bch2_jset_validate(struct bch_fs *, struct bch_dev *, struct jset *, int bch2_jset_validate(struct bch_fs *, struct bch_dev *, struct jset *,
u64, enum bch_validate_flags); u64, enum bch_validate_flags);
struct u64_range { typedef struct u64_range {
u64 start; u64 start;
u64 end; u64 end;
}; } u64_range;
DEFINE_DARRAY(u64_range);
struct u64_range bch2_journal_entry_missing_range(struct bch_fs *, u64, u64); struct u64_range bch2_journal_entry_missing_range(struct bch_fs *, u64, u64);

View File

@ -726,7 +726,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
* we're holding the reclaim lock: * we're holding the reclaim lock:
*/ */
lockdep_assert_held(&j->reclaim_lock); lockdep_assert_held(&j->reclaim_lock);
flags = memalloc_noreclaim_save(); flags = memalloc_nofs_save();
do { do {
if (kthread && kthread_should_stop()) if (kthread && kthread_should_stop())
@ -780,7 +780,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
wake_up(&j->reclaim_wait); wake_up(&j->reclaim_wait);
} while ((min_nr || min_key_cache) && nr_flushed && !direct); } while ((min_nr || min_key_cache) && nr_flushed && !direct);
memalloc_noreclaim_restore(flags); memalloc_flags_restore(flags);
return ret; return ret;
} }

View File

@ -2,6 +2,7 @@
#include "bcachefs.h" #include "bcachefs.h"
#include "journal/read.h"
#include "journal/sb.h" #include "journal/sb.h"
#include "util/darray.h" #include "util/darray.h"
@ -28,35 +29,33 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, struct bch_sb_field *f,
if (!nr) if (!nr)
return 0; return 0;
u64 *b __free(kvfree) = kvmalloc_array(nr, sizeof(u64), GFP_KERNEL); CLASS(darray_u64, b)();
if (!b)
return -BCH_ERR_ENOMEM_sb_journal_validate;
for (unsigned i = 0; i < nr; i++) for (unsigned i = 0; i < nr; i++)
b[i] = le64_to_cpu(journal->buckets[i]); try(darray_push(&b, le64_to_cpu(journal->buckets[i])));
sort(b, nr, sizeof(u64), u64_cmp, NULL); darray_sort(b, u64_cmp);
if (!b[0]) { if (!darray_first(b)) {
prt_printf(err, "journal bucket at sector 0"); prt_printf(err, "journal bucket at sector 0");
return -BCH_ERR_invalid_sb_journal; return -BCH_ERR_invalid_sb_journal;
} }
if (b[0] < le16_to_cpu(m.first_bucket)) { if (darray_first(b) < le16_to_cpu(m.first_bucket)) {
prt_printf(err, "journal bucket %llu before first bucket %u", prt_printf(err, "journal bucket %llu before first bucket %u",
b[0], le16_to_cpu(m.first_bucket)); darray_first(b), le16_to_cpu(m.first_bucket));
return -BCH_ERR_invalid_sb_journal; return -BCH_ERR_invalid_sb_journal;
} }
if (b[nr - 1] >= le64_to_cpu(m.nbuckets)) { if (darray_last(b) >= le64_to_cpu(m.nbuckets)) {
prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)", prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)",
b[nr - 1], le64_to_cpu(m.nbuckets)); darray_last(b), le64_to_cpu(m.nbuckets));
return -BCH_ERR_invalid_sb_journal; return -BCH_ERR_invalid_sb_journal;
} }
for (unsigned i = 0; i + 1 < nr; i++) darray_for_each(b, i)
if (b[i] == b[i + 1]) { if (i != &darray_last(b) && i[0] == i[1]) {
prt_printf(err, "duplicate journal buckets %llu", b[i]); prt_printf(err, "duplicate journal buckets %llu", *i);
return -BCH_ERR_invalid_sb_journal; return -BCH_ERR_invalid_sb_journal;
} }
@ -80,11 +79,6 @@ const struct bch_sb_field_ops bch_sb_field_ops_journal = {
.to_text = bch2_sb_journal_to_text, .to_text = bch2_sb_journal_to_text,
}; };
struct u64_range {
u64 start;
u64 end;
};
static int u64_range_cmp(const void *_l, const void *_r) static int u64_range_cmp(const void *_l, const void *_r)
{ {
const struct u64_range *l = _l; const struct u64_range *l = _l;
@ -104,15 +98,16 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
if (!nr) if (!nr)
return 0; return 0;
struct u64_range *b __free(kvfree) = kvmalloc_array(nr, sizeof(*b), GFP_KERNEL); CLASS(darray_u64_range, b)();
if (!b)
return -BCH_ERR_ENOMEM_sb_journal_v2_validate;
for (unsigned i = 0; i < nr; i++) { for (unsigned i = 0; i < nr; i++) {
b[i].start = le64_to_cpu(journal->d[i].start); struct u64_range r = {
b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr); .start = le64_to_cpu(journal->d[i].start),
.end = le64_to_cpu(journal->d[i].start) +
le64_to_cpu(journal->d[i].nr),
};
if (b[i].end <= b[i].start) { if (r.end <= r.start) {
prt_printf(err, "journal buckets entry with bad nr: %llu+%llu", prt_printf(err, "journal buckets entry with bad nr: %llu+%llu",
le64_to_cpu(journal->d[i].start), le64_to_cpu(journal->d[i].start),
le64_to_cpu(journal->d[i].nr)); le64_to_cpu(journal->d[i].nr));
@ -120,34 +115,34 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
} }
sum += le64_to_cpu(journal->d[i].nr); sum += le64_to_cpu(journal->d[i].nr);
try(darray_push(&b, r));
} }
sort(b, nr, sizeof(*b), u64_range_cmp, NULL); darray_sort(b, u64_range_cmp);
if (!b[0].start) { if (!darray_first(b).start) {
prt_printf(err, "journal bucket at sector 0"); prt_printf(err, "journal bucket at sector 0");
return -BCH_ERR_invalid_sb_journal; return -BCH_ERR_invalid_sb_journal;
} }
if (b[0].start < le16_to_cpu(m.first_bucket)) { if (darray_first(b).start < le16_to_cpu(m.first_bucket)) {
prt_printf(err, "journal bucket %llu before first bucket %u", prt_printf(err, "journal bucket %llu before first bucket %u",
b[0].start, le16_to_cpu(m.first_bucket)); darray_first(b).start, le16_to_cpu(m.first_bucket));
return -BCH_ERR_invalid_sb_journal; return -BCH_ERR_invalid_sb_journal;
} }
if (b[nr - 1].end > le64_to_cpu(m.nbuckets)) { if (darray_last(b).end > le64_to_cpu(m.nbuckets)) {
prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)", prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)",
b[nr - 1].end - 1, le64_to_cpu(m.nbuckets)); darray_last(b).end - 1, le64_to_cpu(m.nbuckets));
return -BCH_ERR_invalid_sb_journal; return -BCH_ERR_invalid_sb_journal;
} }
for (unsigned i = 0; i + 1 < nr; i++) { darray_for_each(b, i)
if (b[i].end > b[i + 1].start) { if (i != &darray_last(b) && i[0].end > i[1].start) {
prt_printf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu", prt_printf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu",
b[i].start, b[i].end, b[i + 1].start, b[i + 1].end); i[0].start, i[0].end, i[1].start, i[1].end);
return -BCH_ERR_invalid_sb_journal; return -BCH_ERR_invalid_sb_journal;
} }
}
if (sum > UINT_MAX) { if (sum > UINT_MAX) {
prt_printf(err, "too many journal buckets: %llu > %u", sum, UINT_MAX); prt_printf(err, "too many journal buckets: %llu > %u", sum, UINT_MAX);
@ -179,11 +174,9 @@ const struct bch_sb_field_ops bch_sb_field_ops_journal_v2 = {
int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca, int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
u64 *buckets, unsigned nr) u64 *buckets, unsigned nr)
{ {
struct bch_sb_field_journal_v2 *j; unsigned dst = 0, nr_compacted = 1;
unsigned i, dst = 0, nr_compacted = 1;
if (c) lockdep_assert_held(&c->sb_lock);
lockdep_assert_held(&c->sb_lock);
if (!nr) { if (!nr) {
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal); bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
@ -191,11 +184,12 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
return 0; return 0;
} }
for (i = 0; i + 1 < nr; i++) for (unsigned i = 0; i + 1 < nr; i++)
if (buckets[i] + 1 != buckets[i + 1]) if (buckets[i] + 1 != buckets[i + 1])
nr_compacted++; nr_compacted++;
j = bch2_sb_field_resize(&ca->disk_sb, journal_v2, struct bch_sb_field_journal_v2 *j =
bch2_sb_field_resize(&ca->disk_sb, journal_v2,
(sizeof(*j) + sizeof(j->d[0]) * nr_compacted) / sizeof(u64)); (sizeof(*j) + sizeof(j->d[0]) * nr_compacted) / sizeof(u64));
if (!j) if (!j)
return bch_err_throw(c, ENOSPC_sb_journal); return bch_err_throw(c, ENOSPC_sb_journal);
@ -205,7 +199,7 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
j->d[dst].start = cpu_to_le64(buckets[0]); j->d[dst].start = cpu_to_le64(buckets[0]);
j->d[dst].nr = cpu_to_le64(1); j->d[dst].nr = cpu_to_le64(1);
for (i = 1; i < nr; i++) { for (unsigned i = 1; i < nr; i++) {
if (buckets[i] == buckets[i - 1] + 1) { if (buckets[i] == buckets[i - 1] + 1) {
le64_add_cpu(&j->d[dst].nr, 1); le64_add_cpu(&j->d[dst].nr, 1);
} else { } else {

View File

@ -55,6 +55,7 @@ enum counters_flags {
x(btree_node_read, 14, TYPE_COUNTER) \ x(btree_node_read, 14, TYPE_COUNTER) \
x(btree_node_compact, 15, TYPE_COUNTER) \ x(btree_node_compact, 15, TYPE_COUNTER) \
x(btree_node_merge, 16, TYPE_COUNTER) \ x(btree_node_merge, 16, TYPE_COUNTER) \
x(btree_node_merge_attempt, 101, TYPE_COUNTER) \
x(btree_node_split, 17, TYPE_COUNTER) \ x(btree_node_split, 17, TYPE_COUNTER) \
x(btree_node_rewrite, 18, TYPE_COUNTER) \ x(btree_node_rewrite, 18, TYPE_COUNTER) \
x(btree_node_alloc, 19, TYPE_COUNTER) \ x(btree_node_alloc, 19, TYPE_COUNTER) \

View File

@ -5,6 +5,8 @@
#include "sb/errors.h" #include "sb/errors.h"
#include "sb/io.h" #include "sb/io.h"
#include "util/darray.h"
const char * const bch2_sb_error_strs[] = { const char * const bch2_sb_error_strs[] = {
#define x(t, n, ...) [n] = #t, #define x(t, n, ...) [n] = #t,
BCH_SB_ERRS() BCH_SB_ERRS()
@ -63,25 +65,25 @@ static int error_entry_cmp(const void *_l, const void *_r)
return -cmp_int(l->last_error_time, r->last_error_time); return -cmp_int(l->last_error_time, r->last_error_time);
} }
DEFINE_DARRAY(bch_sb_field_error_entry);
static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb, static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f) struct bch_sb_field *f)
{ {
struct bch_sb_field_errors *e = field_to_type(f, errors); struct bch_sb_field_errors *e = field_to_type(f, errors);
unsigned nr = bch2_sb_field_errors_nr_entries(e); unsigned nr = bch2_sb_field_errors_nr_entries(e);
struct bch_sb_field_error_entry *sorted = kvmalloc_array(nr, sizeof(*sorted), GFP_KERNEL);
if (sorted) {
memcpy(sorted, e->entries, nr * sizeof(e->entries[0]));
sort(sorted, nr, sizeof(*sorted), error_entry_cmp, NULL);
} else {
sorted = e->entries;
}
if (out->nr_tabstops <= 1) if (out->nr_tabstops <= 1)
printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16);
for (struct bch_sb_field_error_entry *i = sorted; i < sorted + nr; i++) { CLASS(darray_bch_sb_field_error_entry, sorted)();
for (struct bch_sb_field_error_entry *i = e->entries; i < e->entries + nr; i++)
darray_push(&sorted, *i);
darray_sort(sorted, error_entry_cmp);
darray_for_each(sorted, i) {
bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(i)); bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(i));
prt_tab(out); prt_tab(out);
prt_u64(out, BCH_SB_ERROR_ENTRY_NR(i)); prt_u64(out, BCH_SB_ERROR_ENTRY_NR(i));
@ -89,9 +91,6 @@ static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
bch2_prt_datetime(out, le64_to_cpu(i->last_error_time)); bch2_prt_datetime(out, le64_to_cpu(i->last_error_time));
prt_newline(out); prt_newline(out);
} }
if (sorted != e->entries)
kvfree(sorted);
} }
const struct bch_sb_field_ops bch_sb_field_ops_errors = { const struct bch_sb_field_ops bch_sb_field_ops_errors = {

View File

@ -360,12 +360,14 @@ enum bch_sb_error_id {
#undef x #undef x
}; };
typedef struct bch_sb_field_error_entry {
__le64 v;
__le64 last_error_time;
} bch_sb_field_error_entry;
struct bch_sb_field_errors { struct bch_sb_field_errors {
struct bch_sb_field field; struct bch_sb_field field;
struct bch_sb_field_error_entry { bch_sb_field_error_entry entries[];
__le64 v;
__le64 last_error_time;
} entries[];
}; };
LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16); LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);

View File

@ -2,16 +2,19 @@
#include "bcachefs.h" #include "bcachefs.h"
#include "alloc/buckets.h"
#include "alloc/disk_groups.h" #include "alloc/disk_groups.h"
#include "alloc/replicas.h" #include "alloc/replicas.h"
#include "btree/cache.h" #include "btree/cache.h"
#include "btree/iter.h"
#include "sb/members.h" #include "sb/members.h"
#include "sb/io.h" #include "sb/io.h"
#include "init/error.h" #include "init/error.h"
#include "init/passes.h" #include "init/passes.h"
#include "init/progress.h"
int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
{ {
@ -512,35 +515,54 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
* have to scan full devices: * have to scan full devices:
*/ */
bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) static bool __bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k, bool with_gc)
{ {
guard(rcu)(); guard(rcu)();
bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
if (ca && if (ca &&
!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c))) !__bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c), with_gc))
return false; return false;
} }
return true; return true;
} }
static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
u64 start, unsigned sectors)
{ {
struct bch_member *m = __bch2_members_v2_get_mut(mi, dev); return __bch2_dev_btree_bitmap_marked(c, k, true);
u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); }
bool bch2_dev_btree_bitmap_marked_nogc(struct bch_fs *c, struct bkey_s_c k)
{
return __bch2_dev_btree_bitmap_marked(c, k, false);
}
static void __bch2_dev_btree_bitmap_mark(struct bch_dev *ca,
struct bch_sb_field_members_v2 *mi,
u64 start, unsigned sectors, bool *write_sb)
{
struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
u64 end = start + sectors; u64 end = start + sectors;
int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
if (resize > 0) { if (resize > 0) {
u64 old_bitmap = le64_to_cpu(m->btree_allocated_bitmap);
u64 new_bitmap = 0; u64 new_bitmap = 0;
u64 new_gc_bitmap = 0;
for (unsigned i = 0; i < 64; i++) for (unsigned i = 0; i < 64; i++) {
if (bitmap & BIT_ULL(i)) if (old_bitmap & BIT_ULL(i))
new_bitmap |= BIT_ULL(i >> resize); new_bitmap |= BIT_ULL(i >> resize);
bitmap = new_bitmap; if (ca->btree_allocated_bitmap_gc & BIT_ULL(i))
new_gc_bitmap |= BIT_ULL(i >> resize);
}
m->btree_allocated_bitmap = cpu_to_le64(new_bitmap);
m->btree_bitmap_shift += resize; m->btree_bitmap_shift += resize;
*write_sb = true;
ca->btree_allocated_bitmap_gc = new_gc_bitmap;
} }
BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX); BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX);
@ -548,25 +570,164 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns
for (unsigned bit = start >> m->btree_bitmap_shift; for (unsigned bit = start >> m->btree_bitmap_shift;
(u64) bit << m->btree_bitmap_shift < end; (u64) bit << m->btree_bitmap_shift < end;
bit++) bit++) {
bitmap |= BIT_ULL(bit); __le64 b = cpu_to_le64(BIT_ULL(bit));
m->btree_allocated_bitmap = cpu_to_le64(bitmap); if (!(m->btree_allocated_bitmap & b)) {
m->btree_allocated_bitmap |= b;
*write_sb = true;
}
ca->btree_allocated_bitmap_gc |= BIT_ULL(bit);
}
} }
void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) void bch2_dev_btree_bitmap_mark_locked(struct bch_fs *c, struct bkey_s_c k, bool *write_sb)
{ {
lockdep_assert_held(&c->sb_lock); lockdep_assert_held(&c->sb_lock);
struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
guard(rcu)();
bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
if (!bch2_member_exists(c->disk_sb.sb, ptr->dev)) struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
if (!ca)
continue; continue;
__bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c)); __bch2_dev_btree_bitmap_mark(ca, mi, ptr->offset, btree_sectors(c), write_sb);
} }
} }
void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
{
guard(mutex)(&c->sb_lock);
bool write_sb = false;
bch2_dev_btree_bitmap_mark_locked(c, k, &write_sb);
if (write_sb)
bch2_write_super(c);
}
static int btree_bitmap_gc_btree_level(struct btree_trans *trans,
struct progress_indicator *progress,
enum btree_id btree, unsigned level)
{
struct bch_fs *c = trans->c;
CLASS(btree_node_iter, iter)(trans, btree, POS_MIN, 0, level, BTREE_ITER_prefetch);
try(for_each_btree_key_continue(trans, iter, 0, k, ({
if (!bch2_dev_btree_bitmap_marked(c, k))
bch2_dev_btree_bitmap_mark(c, k);
bch2_progress_update_iter(trans, progress, &iter, "btree_bitmap_gc");
})));
return 0;
}
int bch2_btree_bitmap_gc(struct bch_fs *c)
{
struct progress_indicator progress;
bch2_progress_init_inner(&progress, c, 0, ~0ULL);
scoped_guard(mutex, &c->sb_lock) {
guard(rcu)();
for_each_member_device_rcu(c, ca, NULL)
ca->btree_allocated_bitmap_gc = 0;
}
{
CLASS(btree_trans, trans)(c);
for (unsigned btree = 0; btree < btree_id_nr_alive(c); btree++) {
for (unsigned level = 1; level < BTREE_MAX_DEPTH; level++)
try(btree_bitmap_gc_btree_level(trans, &progress, btree, level));
CLASS(btree_node_iter, iter)(trans, btree, POS_MIN, 0,
bch2_btree_id_root(c, btree)->b->c.level, 0);
struct btree *b;
try(lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter))));
if (!bch2_dev_btree_bitmap_marked(c, bkey_i_to_s_c(&b->key)))
bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(&b->key));
}
}
u64 sectors_marked_old = 0, sectors_marked_new = 0;
scoped_guard(mutex, &c->sb_lock) {
struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
scoped_guard(rcu)
for_each_member_device_rcu(c, ca, NULL) {
sectors_marked_old += hweight64(ca->mi.btree_allocated_bitmap) << ca->mi.btree_bitmap_shift;
sectors_marked_new += hweight64(ca->btree_allocated_bitmap_gc) << ca->mi.btree_bitmap_shift;
struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
m->btree_allocated_bitmap = cpu_to_le64(ca->btree_allocated_bitmap_gc);
}
bch2_write_super(c);
}
CLASS(printbuf, buf)();
prt_str(&buf, "mi_btree_bitmap sectors ");
prt_human_readable_u64(&buf, sectors_marked_old << 9);
prt_str(&buf, " -> ");
prt_human_readable_u64(&buf, sectors_marked_new << 9);
bch_info(c, "%s", buf.buf);
return 0;
}
static void bch2_maybe_schedule_btree_bitmap_gc_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, maybe_schedule_btree_bitmap_gc.work);
if (bch2_recovery_pass_want_ratelimit(c, BCH_RECOVERY_PASS_btree_bitmap_gc, 1000))
return;
CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
bool want_schedule = false;
for_each_member_device(c, ca) {
struct bch_dev_usage u;
bch2_dev_usage_read_fast(ca, &u);
u64 btree_sectors = bucket_to_sector(ca, u.buckets[BCH_DATA_btree]);
u64 bitmap_sectors = hweight64(ca->mi.btree_allocated_bitmap) << ca->mi.btree_bitmap_shift;
if (btree_sectors * 4 < bitmap_sectors) {
prt_printf(&buf, "%s has ", ca->name);
prt_human_readable_u64(&buf, btree_sectors << 9);
prt_printf(&buf, " btree buckets and ");
prt_human_readable_u64(&buf, bitmap_sectors << 9);
prt_printf(&buf, " marked in bitmap\n");
want_schedule = true;
}
}
if (want_schedule) {
bch2_run_explicit_recovery_pass(c, &buf,
BCH_RECOVERY_PASS_btree_bitmap_gc,
RUN_RECOVERY_PASS_ratelimit);
bch2_print_str(c, KERN_NOTICE, buf.buf);
}
queue_delayed_work(system_long_wq, &c->maybe_schedule_btree_bitmap_gc, HZ * 60 * 60 * 24);
}
void bch2_maybe_schedule_btree_bitmap_gc_stop(struct bch_fs *c)
{
cancel_delayed_work_sync(&c->maybe_schedule_btree_bitmap_gc);
}
void bch2_maybe_schedule_btree_bitmap_gc(struct bch_fs *c)
{
INIT_DELAYED_WORK(&c->maybe_schedule_btree_bitmap_gc,
bch2_maybe_schedule_btree_bitmap_gc_work);
bch2_maybe_schedule_btree_bitmap_gc_work(&c->maybe_schedule_btree_bitmap_gc.work);
}
unsigned bch2_sb_nr_devices(const struct bch_sb *sb) unsigned bch2_sb_nr_devices(const struct bch_sb *sb)
{ {
unsigned nr = 0; unsigned nr = 0;

View File

@ -389,7 +389,8 @@ void bch2_sb_members_to_cpu(struct bch_fs *);
void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *); void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
void bch2_dev_errors_reset(struct bch_dev *); void bch2_dev_errors_reset(struct bch_dev *);
static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors) static inline bool __bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start,
unsigned sectors, bool with_gc)
{ {
u64 end = start + sectors; u64 end = start + sectors;
@ -399,14 +400,46 @@ static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64
for (unsigned bit = start >> ca->mi.btree_bitmap_shift; for (unsigned bit = start >> ca->mi.btree_bitmap_shift;
(u64) bit << ca->mi.btree_bitmap_shift < end; (u64) bit << ca->mi.btree_bitmap_shift < end;
bit++) bit++)
if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit))) if (!(BIT_ULL(bit) &
ca->mi.btree_allocated_bitmap &
(with_gc
? ca->btree_allocated_bitmap_gc
: ~0ULL)))
return false; return false;
return true; return true;
} }
static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start,
unsigned sectors)
{
return __bch2_dev_btree_bitmap_marked_sectors(ca, start, sectors, false);
}
static inline bool bch2_dev_btree_bitmap_marked_sectors_any(struct bch_dev *ca, u64 start, unsigned sectors)
{
u64 end = start + sectors;
if (start >= 64ULL << ca->mi.btree_bitmap_shift)
return false;
for (unsigned bit = start >> ca->mi.btree_bitmap_shift;
(u64) bit << ca->mi.btree_bitmap_shift < end;
bit++)
if (ca->mi.btree_allocated_bitmap & BIT_ULL(bit))
return true;
return false;
}
bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c); bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
bool bch2_dev_btree_bitmap_marked_nogc(struct bch_fs *, struct bkey_s_c);
void bch2_dev_btree_bitmap_mark_locked(struct bch_fs *, struct bkey_s_c, bool *);
void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c); void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
int bch2_btree_bitmap_gc(struct bch_fs *);
void bch2_maybe_schedule_btree_bitmap_gc_stop(struct bch_fs *);
void bch2_maybe_schedule_btree_bitmap_gc(struct bch_fs *);
int bch2_sb_member_alloc(struct bch_fs *); int bch2_sb_member_alloc(struct bch_fs *);
void bch2_sb_members_clean_deleted(struct bch_fs *); void bch2_sb_members_clean_deleted(struct bch_fs *);

View File

@ -125,6 +125,9 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t, bool);
#define darray_for_each_reverse(_d, _i) \ #define darray_for_each_reverse(_d, _i) \
for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i) for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i)
#define darray_sort(_d, _cmp) \
sort((_d).data, (_d).nr, sizeof((_d).data[0]), _cmp, NULL)
/* Init/exit */ /* Init/exit */
#define darray_init(_d) \ #define darray_init(_d) \

View File

@ -606,49 +606,32 @@ void bch2_bio_map(struct bio *bio, void *base, size_t size)
int bch2_bio_alloc_pages(struct bio *bio, unsigned bs, size_t size, gfp_t gfp_mask) int bch2_bio_alloc_pages(struct bio *bio, unsigned bs, size_t size, gfp_t gfp_mask)
{ {
BUG_ON(!is_power_of_2(bs));
BUG_ON(size & (bs - 1)); BUG_ON(size & (bs - 1));
unsigned bs_pages = DIV_ROUND_UP(bs, PAGE_SIZE);
/* unsigned max_alloc = max(bs, PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
* XXX: we could do this by allocating higher order pages, but
*
* - the page allocator gets slower at a certain order (5?) - we'd have
* to check for this
*
* - bch2_bio_free_pages_pool() probably does not handle compound pages
* yet
*/
DARRAY_PREALLOCATED(struct page *, 16) pages;
darray_init(&pages);
darray_make_room_gfp(&pages, bs_pages, gfp_mask|__GFP_NOFAIL);
int ret = 0; while (bio->bi_iter.bi_size < size) {
while (size) { unsigned b = min(size - bio->bi_iter.bi_size, max_alloc);
while (pages.nr < bs_pages) {
struct page *page = alloc_pages(gfp_mask, 0);
if (!page) {
ret = -ENOMEM;
goto out;
}
BUG_ON(darray_push(&pages, page)); BUG_ON(b & (bs - 1));
}
while (pages.nr) { #ifdef __KERNEL__
BUG_ON(!size); /*
* we don't know the device dma alignment, so in kernel make
* sure allocations are page aligned
*/
void *p = (void *) __get_free_pages(gfp_mask, get_order(b));
#else
void *p = kmalloc(b, gfp_mask);
#endif
if (!p)
return -ENOMEM;
unsigned len = min(PAGE_SIZE, size); bio_add_virt_nofail(bio, p, b);
size -= len;
struct page *page = darray_pop(&pages);
BUG_ON(!bio_add_page(bio, page, len, 0));
}
} }
out:
darray_for_each(pages, i) return 0;
__free_page(*i);
darray_exit(&pages);
return ret;
} }
u64 bch2_get_random_u64_below(u64 ceil) u64 bch2_get_random_u64_below(u64 ceil)
@ -678,9 +661,8 @@ void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
struct bvec_iter iter; struct bvec_iter iter;
__bio_for_each_segment(bv, dst, iter, dst_iter) { __bio_for_each_segment(bv, dst, iter, dst_iter) {
void *dstp = kmap_local_page(bv.bv_page); void *dstp = bvec_kmap_local(&bv);
memcpy(dstp, src, bv.bv_len);
memcpy(dstp + bv.bv_offset, src, bv.bv_len);
kunmap_local(dstp); kunmap_local(dstp);
src += bv.bv_len; src += bv.bv_len;
@ -693,9 +675,8 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
struct bvec_iter iter; struct bvec_iter iter;
__bio_for_each_segment(bv, src, iter, src_iter) { __bio_for_each_segment(bv, src, iter, src_iter) {
void *srcp = kmap_local_page(bv.bv_page); void *srcp = bvec_kmap_local(&bv);
memcpy(dst, srcp, bv.bv_len);
memcpy(dst, srcp + bv.bv_offset, bv.bv_len);
kunmap_local(srcp); kunmap_local(srcp);
dst += bv.bv_len; dst += bv.bv_len;

View File

@ -422,14 +422,6 @@ retry:
} }
} }
#define memalloc_flags_do(_flags, _do) \
({ \
unsigned _saved_flags = memalloc_flags_save(_flags); \
typeof(_do) _ret = _do; \
memalloc_noreclaim_restore(_saved_flags); \
_ret; \
})
static struct inode *bch2_alloc_inode(struct super_block *sb) static struct inode *bch2_alloc_inode(struct super_block *sb)
{ {
BUG(); BUG();

View File

@ -64,27 +64,13 @@ const char *blk_status_to_str(blk_status_t status)
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
struct bio *src, struct bvec_iter *src_iter) struct bio *src, struct bvec_iter *src_iter)
{ {
struct bio_vec src_bv, dst_bv;
void *src_p, *dst_p;
unsigned bytes;
while (src_iter->bi_size && dst_iter->bi_size) { while (src_iter->bi_size && dst_iter->bi_size) {
src_bv = bio_iter_iovec(src, *src_iter); struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
dst_bv = bio_iter_iovec(dst, *dst_iter); struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
bytes = min(src_bv.bv_len, dst_bv.bv_len); unsigned bytes = min(src_bv.bv_len, dst_bv.bv_len);
src_p = kmap_atomic(src_bv.bv_page); memcpy(dst_bv.bv_addr, src_bv.bv_addr, bytes);
dst_p = kmap_atomic(dst_bv.bv_page);
memcpy(dst_p + dst_bv.bv_offset,
src_p + src_bv.bv_offset,
bytes);
kunmap_atomic(dst_p);
kunmap_atomic(src_p);
flush_dcache_page(dst_bv.bv_page);
bio_advance_iter(src, src_iter, bytes); bio_advance_iter(src, src_iter, bytes);
bio_advance_iter(dst, dst_iter, bytes); bio_advance_iter(dst, dst_iter, bytes);
@ -109,15 +95,11 @@ void bio_copy_data(struct bio *dst, struct bio *src)
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start) void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
{ {
unsigned long flags;
struct bio_vec bv; struct bio_vec bv;
struct bvec_iter iter; struct bvec_iter iter;
__bio_for_each_segment(bv, bio, iter, start) { __bio_for_each_segment(bv, bio, iter, start)
char *data = bvec_kmap_irq(&bv, &flags); memset(bv.bv_addr, 0, bv.bv_len);
memset(data, 0, bv.bv_len);
bvec_kunmap_irq(data, &flags);
}
} }
static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
@ -165,15 +147,6 @@ struct bio *bio_split(struct bio *bio, int sectors,
return split; return split;
} }
void bio_free_pages(struct bio *bio)
{
struct bvec_iter_all iter;
struct bio_vec *bvec;
bio_for_each_segment_all(bvec, bio, iter)
__free_page(bvec->bv_page);
}
void bio_advance(struct bio *bio, unsigned bytes) void bio_advance(struct bio *bio, unsigned bytes)
{ {
bio_advance_iter(bio, &bio->bi_iter, bytes); bio_advance_iter(bio, &bio->bi_iter, bytes);
@ -208,26 +181,18 @@ void bio_put(struct bio *bio)
} }
} }
int bio_add_page(struct bio *bio, struct page *page, void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len)
unsigned int len, unsigned int off)
{ {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt]; struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
WARN_ON_ONCE(bio->bi_vcnt >= bio->bi_max_vecs); WARN_ON_ONCE(bio->bi_vcnt >= bio->bi_max_vecs);
bv->bv_page = page; bv->bv_addr = vaddr;
bv->bv_offset = off;
bv->bv_len = len; bv->bv_len = len;
bio->bi_iter.bi_size += len; bio->bi_iter.bi_size += len;
bio->bi_vcnt++; bio->bi_vcnt++;
return len;
}
void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len)
{
bio_add_page(bio, virt_to_page(vaddr), len, offset_in_page(vaddr));
} }
static inline bool bio_remaining_done(struct bio *bio) static inline bool bio_remaining_done(struct bio *bio)

View File

@ -59,18 +59,15 @@ void generic_make_request(struct bio *bio)
i = 0; i = 0;
bio_for_each_segment(bv, bio, iter) { bio_for_each_segment(bv, bio, iter) {
void *start = page_address(bv.bv_page) + bv.bv_offset;
size_t len = bv.bv_len;
iov[i++] = (struct iovec) { iov[i++] = (struct iovec) {
.iov_base = start, .iov_base = bv.bv_addr,
.iov_len = len, .iov_len = bv.bv_len,
}; };
#ifdef CONFIG_VALGRIND #ifdef CONFIG_VALGRIND
/* To be pedantic it should only be on IO completion. */ /* To be pedantic it should only be on IO completion. */
if (bio_op(bio) == REQ_OP_READ) if (bio_op(bio) == REQ_OP_READ)
VALGRIND_MAKE_MEM_DEFINED(start, len); VALGRIND_MAKE_MEM_DEFINED(bv.bv_addr, bv.bv_len);
#endif #endif
} }