mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-12-08 00:00:12 +03:00
Update bcachefs sources to ba3f652e4cdc bcachefs: Decrypt before checking if we read the right btree node
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
969305f122
commit
5c811f012b
@ -1 +1 @@
|
||||
c53ba9651da768e74b787eb40bc05fd56e9ca5ef
|
||||
ba3f652e4cdc86313cb13380efd59f1e6e6f484f
|
||||
|
||||
@ -34,15 +34,9 @@
|
||||
#define bio_iter_iovec(bio, iter) \
|
||||
bvec_iter_bvec((bio)->bi_io_vec, (iter))
|
||||
|
||||
#define bio_iter_page(bio, iter) \
|
||||
bvec_iter_page((bio)->bi_io_vec, (iter))
|
||||
#define bio_iter_len(bio, iter) \
|
||||
bvec_iter_len((bio)->bi_io_vec, (iter))
|
||||
#define bio_iter_offset(bio, iter) \
|
||||
bvec_iter_offset((bio)->bi_io_vec, (iter))
|
||||
|
||||
#define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter)
|
||||
#define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter)
|
||||
#define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter)
|
||||
|
||||
#define bio_multiple_segments(bio) \
|
||||
@ -99,20 +93,6 @@ static inline unsigned int bio_cur_bytes(struct bio *bio)
|
||||
return bio->bi_iter.bi_size;
|
||||
}
|
||||
|
||||
static inline void *bio_data(struct bio *bio)
|
||||
{
|
||||
if (bio_has_data(bio))
|
||||
return page_address(bio_page(bio)) + bio_offset(bio);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define __bio_kmap_atomic(bio, iter) \
|
||||
(kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) + \
|
||||
bio_iter_iovec((bio), (iter)).bv_offset)
|
||||
|
||||
#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
|
||||
|
||||
static inline struct bio_vec *bio_next_segment(const struct bio *bio,
|
||||
struct bvec_iter_all *iter)
|
||||
{
|
||||
@ -238,7 +218,6 @@ struct bio *bio_alloc_bioset(struct block_device *, unsigned,
|
||||
|
||||
extern void bio_put(struct bio *);
|
||||
|
||||
int bio_add_page(struct bio *, struct page *, unsigned, unsigned);
|
||||
void bio_add_virt_nofail(struct bio *, void *, unsigned);
|
||||
|
||||
static inline void bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned len)
|
||||
@ -265,8 +244,6 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||
struct bio *src, struct bvec_iter *src_iter);
|
||||
extern void bio_copy_data(struct bio *dst, struct bio *src);
|
||||
|
||||
void bio_free_pages(struct bio *bio);
|
||||
|
||||
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
|
||||
|
||||
static inline void zero_fill_bio(struct bio *bio)
|
||||
@ -284,30 +261,13 @@ do { \
|
||||
(dst)->bi_bdev = (src)->bi_bdev; \
|
||||
} while (0)
|
||||
|
||||
static inline void *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
|
||||
{
|
||||
return page_address(bvec->bv_page) + bvec->bv_offset;
|
||||
}
|
||||
|
||||
static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
|
||||
{
|
||||
*flags = 0;
|
||||
}
|
||||
|
||||
static inline void *bvec_kmap_local(struct bio_vec *bvec)
|
||||
{
|
||||
return page_address(bvec->bv_page) + bvec->bv_offset;
|
||||
return bvec_virt(bvec);
|
||||
}
|
||||
|
||||
static inline void bvec_kunmap_local(char *buffer) {}
|
||||
|
||||
static inline void *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
|
||||
unsigned long *flags)
|
||||
{
|
||||
return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags);
|
||||
}
|
||||
#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags)
|
||||
|
||||
#define bio_kmap_irq(bio, flags) \
|
||||
__bio_kmap_irq((bio), (bio)->bi_iter, (flags))
|
||||
#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
|
||||
|
||||
@ -27,9 +27,8 @@
|
||||
* was unsigned short, but we might as well be ready for > 64kB I/O pages
|
||||
*/
|
||||
struct bio_vec {
|
||||
struct page *bv_page;
|
||||
void *bv_addr;
|
||||
unsigned int bv_len;
|
||||
unsigned int bv_offset;
|
||||
};
|
||||
|
||||
struct bvec_iter {
|
||||
@ -53,21 +52,22 @@ struct bvec_iter_all {
|
||||
*/
|
||||
#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx])
|
||||
|
||||
#define bvec_iter_page(bvec, iter) \
|
||||
(__bvec_iter_bvec((bvec), (iter))->bv_page)
|
||||
static inline void *bvec_virt(struct bio_vec *bv)
|
||||
{
|
||||
return bv->bv_addr;
|
||||
}
|
||||
|
||||
#define bvec_iter_addr(bvec, iter) \
|
||||
(__bvec_iter_bvec((bvec), (iter))->bv_addr + (iter).bi_bvec_done)
|
||||
|
||||
#define bvec_iter_len(bvec, iter) \
|
||||
min((iter).bi_size, \
|
||||
__bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
|
||||
|
||||
#define bvec_iter_offset(bvec, iter) \
|
||||
(__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
|
||||
|
||||
#define bvec_iter_bvec(bvec, iter) \
|
||||
((struct bio_vec) { \
|
||||
.bv_page = bvec_iter_page((bvec), (iter)), \
|
||||
.bv_addr = bvec_iter_addr((bvec), (iter)), \
|
||||
.bv_len = bvec_iter_len((bvec), (iter)), \
|
||||
.bv_offset = bvec_iter_offset((bvec), (iter)), \
|
||||
})
|
||||
|
||||
static inline void bvec_iter_advance(const struct bio_vec *bv,
|
||||
|
||||
@ -593,6 +593,7 @@ struct bch_dev {
|
||||
* Committed by bch2_write_super() -> bch_fs_mi_update()
|
||||
*/
|
||||
struct bch_member_cpu mi;
|
||||
u64 btree_allocated_bitmap_gc;
|
||||
atomic64_t errors[BCH_MEMBER_ERROR_NR];
|
||||
unsigned long write_errors_start;
|
||||
|
||||
@ -865,6 +866,8 @@ struct bch_fs {
|
||||
struct closure sb_write;
|
||||
struct mutex sb_lock;
|
||||
|
||||
struct delayed_work maybe_schedule_btree_bitmap_gc;
|
||||
|
||||
/* snapshot.c: */
|
||||
struct snapshot_table __rcu *snapshots;
|
||||
struct mutex snapshot_table_lock;
|
||||
@ -1037,7 +1040,7 @@ struct bch_fs {
|
||||
struct bio_set bio_write;
|
||||
struct bio_set replica_set;
|
||||
struct mutex bio_bounce_pages_lock;
|
||||
mempool_t bio_bounce_pages;
|
||||
mempool_t bio_bounce_bufs;
|
||||
struct bucket_nocow_lock_table
|
||||
nocow_locks;
|
||||
struct rhashtable promote_table;
|
||||
|
||||
@ -661,16 +661,13 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
|
||||
atomic64_set(&c->key_version, k.k->bversion.lo);
|
||||
}
|
||||
|
||||
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
|
||||
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked_nogc(c, k),
|
||||
trans, btree_bitmap_not_marked,
|
||||
"btree ptr not marked in member info btree allocated bitmap\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, k),
|
||||
buf.buf))) {
|
||||
guard(mutex)(&c->sb_lock);
|
||||
buf.buf)))
|
||||
bch2_dev_btree_bitmap_mark(c, k);
|
||||
bch2_write_super(c);
|
||||
}
|
||||
|
||||
/*
|
||||
* We require a commit before key_trigger() because
|
||||
|
||||
@ -639,10 +639,12 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as)
|
||||
struct bch_fs *c = as->c;
|
||||
|
||||
guard(mutex)(&c->sb_lock);
|
||||
bool write_sb = false;
|
||||
darray_for_each(as->new_nodes, i)
|
||||
bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(&i->key));
|
||||
bch2_dev_btree_bitmap_mark_locked(c, bkey_i_to_s_c(&i->key), &write_sb);
|
||||
|
||||
bch2_write_super(c);
|
||||
if (write_sb)
|
||||
bch2_write_super(c);
|
||||
}
|
||||
|
||||
static void bkey_strip_reconcile(const struct bch_fs *c, struct bkey_s k)
|
||||
@ -2133,18 +2135,35 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
sib_u64s = btree_node_u64s_with_format(b->nr, &b->format, &new_f) +
|
||||
btree_node_u64s_with_format(m->nr, &m->format, &new_f);
|
||||
|
||||
if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) {
|
||||
sib_u64s -= BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
|
||||
sib_u64s /= 2;
|
||||
sib_u64s += BTREE_FOREGROUND_MERGE_HYSTERESIS(c);
|
||||
if (trace_btree_node_merge_attempt_enabled()) {
|
||||
CLASS(printbuf, buf)();
|
||||
guard(printbuf_indent)(&buf);
|
||||
|
||||
bch2_btree_pos_to_text(&buf, c, prev);
|
||||
prt_printf(&buf, "live u64s %u (%zu%% full)\n",
|
||||
prev->nr.live_u64s,
|
||||
prev->nr.live_u64s * 100 / btree_max_u64s(c));
|
||||
|
||||
bch2_btree_pos_to_text(&buf, c, next);
|
||||
prt_printf(&buf, "live u64s %u (%zu%% full)\n",
|
||||
next->nr.live_u64s,
|
||||
next->nr.live_u64s * 100 / btree_max_u64s(c));
|
||||
|
||||
prt_printf(&buf, "merged would have %zu threshold %u\n",
|
||||
sib_u64s, c->btree_foreground_merge_threshold);
|
||||
trace_btree_node_merge_attempt(c, buf.buf);
|
||||
}
|
||||
count_event(c, btree_node_merge_attempt);
|
||||
|
||||
sib_u64s = min(sib_u64s, btree_max_u64s(c));
|
||||
sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1);
|
||||
b->sib_u64s[sib] = sib_u64s;
|
||||
if (sib_u64s > c->btree_foreground_merge_threshold) {
|
||||
if (sib_u64s > BTREE_FOREGROUND_MERGE_HYSTERESIS(c))
|
||||
sib_u64s -= (sib_u64s - BTREE_FOREGROUND_MERGE_HYSTERESIS(c)) / 2;
|
||||
|
||||
if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
|
||||
sib_u64s = min(sib_u64s, btree_max_u64s(c));
|
||||
sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1);
|
||||
b->sib_u64s[sib] = sib_u64s;
|
||||
goto out;
|
||||
}
|
||||
|
||||
parent = btree_node_parent(trans->paths + path, b);
|
||||
as = bch2_btree_update_start(trans, trans->paths + path, level, false,
|
||||
|
||||
@ -235,27 +235,30 @@ static int read_btree_nodes_worker(void *p)
|
||||
goto err;
|
||||
}
|
||||
|
||||
u64 buckets_to_scan = 0;
|
||||
for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
|
||||
buckets_to_scan += c->sb.version_upgrade_complete < bcachefs_metadata_version_mi_btree_bitmap ||
|
||||
bch2_dev_btree_bitmap_marked_sectors_any(ca, bucket_to_sector(ca, bucket), ca->mi.bucket_size);
|
||||
|
||||
u64 buckets_scanned = 0;
|
||||
for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++) {
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
|
||||
!bch2_dev_btree_bitmap_marked_sectors_any(ca, bucket_to_sector(ca, bucket), ca->mi.bucket_size))
|
||||
continue;
|
||||
|
||||
for (unsigned bucket_offset = 0;
|
||||
bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
|
||||
bucket_offset += btree_sectors(c)) {
|
||||
if (time_after(jiffies, last_print + HZ * 30)) {
|
||||
u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
|
||||
u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
|
||||
bucket_offset += btree_sectors(c))
|
||||
try_read_btree_node(w->f, ca, b, bio, bucket_to_sector(ca, bucket) + bucket_offset);
|
||||
|
||||
bch_info(ca, "%s: %2u%% done", __func__,
|
||||
(unsigned) div64_u64(cur_sector * 100, end_sector));
|
||||
last_print = jiffies;
|
||||
}
|
||||
buckets_scanned++;
|
||||
|
||||
u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
|
||||
|
||||
if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
|
||||
!bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
|
||||
continue;
|
||||
|
||||
try_read_btree_node(w->f, ca, b, bio, sector);
|
||||
if (time_after(jiffies, last_print + HZ * 30)) {
|
||||
bch_info(ca, "%s: %2u%% done", __func__,
|
||||
(unsigned) div64_u64(buckets_scanned * 100, buckets_to_scan));
|
||||
last_print = jiffies;
|
||||
}
|
||||
}
|
||||
err:
|
||||
if (b)
|
||||
__btree_node_data_free(b);
|
||||
|
||||
@ -660,33 +660,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
"bad magic: want %llx, got %llx",
|
||||
bset_magic(c), le64_to_cpu(b->data->magic));
|
||||
|
||||
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bch_btree_ptr_v2 *bp =
|
||||
&bkey_i_to_btree_ptr_v2(&b->key)->v;
|
||||
|
||||
bch2_bpos_to_text(&buf, b->data->min_key);
|
||||
prt_str(&buf, "-");
|
||||
bch2_bpos_to_text(&buf, b->data->max_key);
|
||||
|
||||
btree_err_on(b->data->keys.seq != bp->seq,
|
||||
-BCH_ERR_btree_node_read_err_must_retry,
|
||||
c, ca, b, NULL, NULL,
|
||||
btree_node_bad_seq,
|
||||
"got wrong btree node: got\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_btree_node_header_to_text(&buf, b->data),
|
||||
buf.buf));
|
||||
} else {
|
||||
btree_err_on(!b->data->keys.seq,
|
||||
-BCH_ERR_btree_node_read_err_must_retry,
|
||||
c, ca, b, NULL, NULL,
|
||||
btree_node_bad_seq,
|
||||
"bad btree header: seq 0\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_btree_node_header_to_text(&buf, b->data),
|
||||
buf.buf));
|
||||
}
|
||||
|
||||
while (b->written < (ptr_written ?: btree_sectors(c))) {
|
||||
unsigned sectors;
|
||||
bool first = !b->written;
|
||||
@ -743,6 +716,33 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bch_btree_ptr_v2 *bp =
|
||||
&bkey_i_to_btree_ptr_v2(&b->key)->v;
|
||||
|
||||
bch2_bpos_to_text(&buf, b->data->min_key);
|
||||
prt_str(&buf, "-");
|
||||
bch2_bpos_to_text(&buf, b->data->max_key);
|
||||
|
||||
btree_err_on(b->data->keys.seq != bp->seq,
|
||||
-BCH_ERR_btree_node_read_err_must_retry,
|
||||
c, ca, b, NULL, NULL,
|
||||
btree_node_bad_seq,
|
||||
"got wrong btree node: got\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_btree_node_header_to_text(&buf, b->data),
|
||||
buf.buf));
|
||||
} else {
|
||||
btree_err_on(!b->data->keys.seq,
|
||||
-BCH_ERR_btree_node_read_err_must_retry,
|
||||
c, ca, b, NULL, NULL,
|
||||
btree_node_bad_seq,
|
||||
"bad btree header: seq 0\n%s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_btree_node_header_to_text(&buf, b->data),
|
||||
buf.buf));
|
||||
}
|
||||
|
||||
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
|
||||
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
|
||||
-BCH_ERR_btree_node_read_err_incompatible,
|
||||
|
||||
@ -202,15 +202,14 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
__bio_for_each_segment(bv, bio, *iter, *iter) {
|
||||
void *p = kmap_local_page(bv.bv_page) + bv.bv_offset;
|
||||
void *p = bvec_kmap_local(&bv);
|
||||
|
||||
bch2_checksum_update(&state, p, bv.bv_len);
|
||||
kunmap_local(p);
|
||||
}
|
||||
#else
|
||||
__bio_for_each_bvec(bv, bio, *iter, *iter)
|
||||
bch2_checksum_update(&state, page_address(bv.bv_page) + bv.bv_offset,
|
||||
bv.bv_len);
|
||||
bch2_checksum_update(&state, bvec_virt(&bv), bv.bv_len);
|
||||
#endif
|
||||
return (struct bch_csum) { .lo = cpu_to_le64(bch2_checksum_final(&state)) };
|
||||
}
|
||||
@ -225,16 +224,14 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type,
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
__bio_for_each_segment(bv, bio, *iter, *iter) {
|
||||
void *p = kmap_local_page(bv.bv_page) + bv.bv_offset;
|
||||
void *p = bvec_kmap_local(&bv);
|
||||
|
||||
poly1305_update(&dctx, p, bv.bv_len);
|
||||
kunmap_local(p);
|
||||
}
|
||||
#else
|
||||
__bio_for_each_bvec(bv, bio, *iter, *iter)
|
||||
poly1305_update(&dctx,
|
||||
page_address(bv.bv_page) + bv.bv_offset,
|
||||
bv.bv_len);
|
||||
poly1305_update(&dctx, bvec_virt(&bv), bv.bv_len);
|
||||
#endif
|
||||
poly1305_final(&dctx, digest);
|
||||
|
||||
|
||||
@ -95,12 +95,12 @@ static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
|
||||
void *expected_start = NULL;
|
||||
|
||||
__bio_for_each_bvec(bv, bio, iter, start) {
|
||||
if (expected_start &&
|
||||
expected_start != page_address(bv.bv_page) + bv.bv_offset)
|
||||
void *bv_addr = bvec_virt(&bv);
|
||||
|
||||
if (expected_start && expected_start != bv_addr)
|
||||
return false;
|
||||
|
||||
expected_start = page_address(bv.bv_page) +
|
||||
bv.bv_offset + bv.bv_len;
|
||||
expected_start = bv_addr + bv.bv_len;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -109,27 +109,27 @@ static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
|
||||
static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
|
||||
struct bvec_iter start, int rw)
|
||||
{
|
||||
struct bio_vec bv;
|
||||
struct bvec_iter iter;
|
||||
unsigned nr_pages = 0;
|
||||
struct page *stack_pages[16];
|
||||
struct page **pages = NULL;
|
||||
void *data;
|
||||
|
||||
BUG_ON(start.bi_size > c->opts.encoded_extent_max);
|
||||
|
||||
if (!PageHighMem(bio_iter_page(bio, start)) &&
|
||||
bio_phys_contig(bio, start))
|
||||
#ifndef CONFIG_HIGHMEM
|
||||
if (bio_phys_contig(bio, start))
|
||||
return (struct bbuf) {
|
||||
.c = c,
|
||||
.b = page_address(bio_iter_page(bio, start)) +
|
||||
bio_iter_offset(bio, start),
|
||||
.b = bvec_virt(&bio_iter_iovec(bio, start)),
|
||||
.type = BB_none,
|
||||
.rw = rw
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL__
|
||||
/* check if we can map the pages contiguously: */
|
||||
struct bio_vec bv;
|
||||
struct bvec_iter iter;
|
||||
unsigned nr_pages = 0;
|
||||
|
||||
__bio_for_each_segment(bv, bio, iter, start) {
|
||||
BUG_ON(bv.bv_offset + bv.bv_len > PAGE_SIZE);
|
||||
|
||||
if (iter.bi_size != start.bi_size &&
|
||||
bv.bv_offset)
|
||||
return bio_bounce(c, bio, start, rw);
|
||||
@ -143,7 +143,8 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
|
||||
|
||||
BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
|
||||
|
||||
pages = nr_pages > ARRAY_SIZE(stack_pages)
|
||||
struct page *stack_pages[16];
|
||||
struct page **pages = nr_pages > ARRAY_SIZE(stack_pages)
|
||||
? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
|
||||
: stack_pages;
|
||||
if (!pages)
|
||||
@ -153,19 +154,20 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
|
||||
__bio_for_each_segment(bv, bio, iter, start)
|
||||
pages[nr_pages++] = bv.bv_page;
|
||||
|
||||
data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
|
||||
void *data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
|
||||
if (pages != stack_pages)
|
||||
kfree(pages);
|
||||
|
||||
if (!data)
|
||||
return bio_bounce(c, bio, start, rw);
|
||||
if (data)
|
||||
return (struct bbuf) {
|
||||
c,
|
||||
data + bio_iter_offset(bio, start),
|
||||
BB_vmap,
|
||||
rw
|
||||
};
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
return (struct bbuf) {
|
||||
c,
|
||||
data + bio_iter_offset(bio, start),
|
||||
BB_vmap,
|
||||
rw
|
||||
};
|
||||
return bio_bounce(c, bio, start, rw);
|
||||
}
|
||||
|
||||
static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
|
||||
|
||||
@ -28,6 +28,8 @@
|
||||
#include "util/util.h"
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||
#include <linux/module.h>
|
||||
|
||||
static int bch2_force_read_device = -1;
|
||||
|
||||
module_param_named(force_read_device, bch2_force_read_device, int, 0644);
|
||||
|
||||
@ -344,7 +344,7 @@ err_remove_hash:
|
||||
BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
|
||||
bch_promote_params));
|
||||
err:
|
||||
bio_free_pages(&op->write.op.wbio.bio);
|
||||
bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
|
||||
/* We may have added to the rhashtable and thus need rcu freeing: */
|
||||
kfree_rcu(op, rcu);
|
||||
err_put:
|
||||
@ -1253,7 +1253,7 @@ retry_pick:
|
||||
&c->bio_read_split),
|
||||
orig);
|
||||
|
||||
bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9);
|
||||
bch2_bio_alloc_pages_pool(c, &rbio->bio, 512, sectors << 9);
|
||||
rbio->bounce = true;
|
||||
} else if (flags & BCH_READ_must_clone) {
|
||||
/*
|
||||
@ -1591,16 +1591,29 @@ void bch2_fs_io_read_exit(struct bch_fs *c)
|
||||
rhashtable_destroy(&c->promote_table);
|
||||
bioset_exit(&c->bio_read_split);
|
||||
bioset_exit(&c->bio_read);
|
||||
mempool_exit(&c->bio_bounce_pages);
|
||||
mempool_exit(&c->bio_bounce_bufs);
|
||||
}
|
||||
|
||||
static void *bio_bounce_buf_alloc_fn(gfp_t gfp, void *pool_data)
|
||||
{
|
||||
return (void *) __get_free_pages(gfp, PAGE_ALLOC_COSTLY_ORDER);
|
||||
}
|
||||
|
||||
static void bio_bounce_buf_free_fn(void *p, void *pool_data)
|
||||
{
|
||||
free_pages((unsigned long) p, PAGE_ALLOC_COSTLY_ORDER);
|
||||
}
|
||||
|
||||
int bch2_fs_io_read_init(struct bch_fs *c)
|
||||
{
|
||||
if (mempool_init_page_pool(&c->bio_bounce_pages,
|
||||
max_t(unsigned,
|
||||
c->opts.btree_node_size,
|
||||
c->opts.encoded_extent_max) /
|
||||
PAGE_SIZE, 0))
|
||||
if (mempool_init(&c->bio_bounce_bufs,
|
||||
max_t(unsigned,
|
||||
c->opts.btree_node_size,
|
||||
c->opts.encoded_extent_max) /
|
||||
BIO_BOUNCE_BUF_POOL_LEN,
|
||||
bio_bounce_buf_alloc_fn,
|
||||
bio_bounce_buf_free_fn,
|
||||
NULL))
|
||||
return bch_err_throw(c, ENOMEM_bio_bounce_pages_init);
|
||||
|
||||
if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
|
||||
|
||||
@ -7,6 +7,8 @@
|
||||
#include "extents_types.h"
|
||||
#include "data/reflink.h"
|
||||
|
||||
#define BIO_BOUNCE_BUF_POOL_LEN (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)
|
||||
|
||||
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
|
||||
void bch2_dev_congested_to_text(struct printbuf *, struct bch_dev *);
|
||||
#endif
|
||||
|
||||
@ -113,42 +113,41 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
|
||||
|
||||
void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
|
||||
{
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bv;
|
||||
for (struct bio_vec *bv = bio->bi_io_vec;
|
||||
bv < bio->bi_io_vec + bio->bi_vcnt;
|
||||
bv++) {
|
||||
void *p = bvec_virt(bv);
|
||||
|
||||
bio_for_each_segment_all(bv, bio, iter)
|
||||
mempool_free(bv->bv_page, &c->bio_bounce_pages);
|
||||
if (bv->bv_len == BIO_BOUNCE_BUF_POOL_LEN)
|
||||
mempool_free(p, &c->bio_bounce_bufs);
|
||||
else
|
||||
free_pages((unsigned long) p, get_order(bv->bv_len));
|
||||
}
|
||||
bio->bi_vcnt = 0;
|
||||
}
|
||||
|
||||
static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
|
||||
static void __bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
|
||||
unsigned bs, size_t size)
|
||||
{
|
||||
if (likely(!*using_mempool)) {
|
||||
struct page *page = alloc_page(GFP_NOFS);
|
||||
if (likely(page))
|
||||
return page;
|
||||
mutex_lock(&c->bio_bounce_pages_lock);
|
||||
|
||||
mutex_lock(&c->bio_bounce_pages_lock);
|
||||
*using_mempool = true;
|
||||
}
|
||||
return mempool_alloc(&c->bio_bounce_pages, GFP_NOFS);
|
||||
while (bio->bi_iter.bi_size < size)
|
||||
bio_add_virt_nofail(bio,
|
||||
mempool_alloc(&c->bio_bounce_bufs, GFP_NOFS),
|
||||
BIO_BOUNCE_BUF_POOL_LEN);
|
||||
|
||||
bio->bi_iter.bi_size = min(bio->bi_iter.bi_size, size);
|
||||
|
||||
mutex_unlock(&c->bio_bounce_pages_lock);
|
||||
}
|
||||
|
||||
void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
|
||||
size_t size)
|
||||
unsigned bs, size_t size)
|
||||
{
|
||||
bool using_mempool = false;
|
||||
bch2_bio_alloc_pages(bio, c->opts.block_size, size, GFP_NOFS);
|
||||
|
||||
while (size) {
|
||||
struct page *page = __bio_alloc_page_pool(c, &using_mempool);
|
||||
unsigned len = min_t(size_t, PAGE_SIZE, size);
|
||||
|
||||
BUG_ON(!bio_add_page(bio, page, len, 0));
|
||||
size -= len;
|
||||
}
|
||||
|
||||
if (using_mempool)
|
||||
mutex_unlock(&c->bio_bounce_pages_lock);
|
||||
if (bio->bi_iter.bi_size < size)
|
||||
__bch2_bio_alloc_pages_pool(c, bio, bs, size);
|
||||
}
|
||||
|
||||
/* Extent update path: */
|
||||
@ -837,23 +836,22 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
|
||||
return bio;
|
||||
}
|
||||
|
||||
wbio->bounce = true;
|
||||
wbio->bounce = true;
|
||||
|
||||
|
||||
/*
|
||||
* We can't use mempool for more than c->sb.encoded_extent_max
|
||||
* worth of pages, but we'd like to allocate more if we can:
|
||||
*/
|
||||
bch2_bio_alloc_pages_pool(c, bio,
|
||||
min_t(unsigned, output_available,
|
||||
c->opts.encoded_extent_max));
|
||||
bch2_bio_alloc_pages(bio,
|
||||
c->opts.block_size,
|
||||
output_available,
|
||||
GFP_NOFS);
|
||||
|
||||
if (bio->bi_iter.bi_size < output_available)
|
||||
*page_alloc_failed =
|
||||
bch2_bio_alloc_pages(bio,
|
||||
c->opts.block_size,
|
||||
output_available -
|
||||
bio->bi_iter.bi_size,
|
||||
GFP_NOFS) != 0;
|
||||
unsigned required = min(output_available, c->opts.encoded_extent_max);
|
||||
|
||||
if (unlikely(bio->bi_iter.bi_size < required))
|
||||
__bch2_bio_alloc_pages_pool(c, bio, c->opts.block_size, required);
|
||||
|
||||
return bio;
|
||||
}
|
||||
|
||||
@ -9,7 +9,7 @@
|
||||
container_of((_bio), struct bch_write_bio, bio)
|
||||
|
||||
void bch2_bio_free_pages_pool(struct bch_fs *, struct bio *);
|
||||
void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, size_t);
|
||||
void bch2_bio_alloc_pages_pool(struct bch_fs *, struct bio *, unsigned, size_t);
|
||||
|
||||
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
|
||||
enum bch_data_type, const struct bkey_i *, bool);
|
||||
|
||||
@ -556,6 +556,11 @@ DEFINE_EVENT(fs_str, btree_node_rewrite,
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, btree_node_merge_attempt,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(fs_str, btree_node_merge,
|
||||
TP_PROTO(struct bch_fs *c, const char *str),
|
||||
TP_ARGS(c, str)
|
||||
|
||||
@ -331,6 +331,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
|
||||
bch2_time_stats_quantiles_init(&ca->io_latency[WRITE]);
|
||||
|
||||
ca->mi = bch2_mi_to_cpu(member);
|
||||
ca->btree_allocated_bitmap_gc = le64_to_cpu(member->btree_allocated_bitmap);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(member->errors); i++)
|
||||
atomic64_set(&ca->errors[i], le64_to_cpu(member->errors[i]));
|
||||
|
||||
@ -264,11 +264,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
unsigned clean_passes = 0;
|
||||
u64 seq = 0;
|
||||
|
||||
bch2_maybe_schedule_btree_bitmap_gc_stop(c);
|
||||
bch2_fs_ec_stop(c);
|
||||
bch2_open_buckets_stop(c, NULL, true);
|
||||
bch2_reconcile_stop(c);
|
||||
bch2_copygc_stop(c);
|
||||
bch2_fs_ec_flush(c);
|
||||
cancel_delayed_work_sync(&c->maybe_schedule_btree_bitmap_gc);
|
||||
|
||||
bch_verbose(c, "flushing journal and stopping allocators, journal seq %llu",
|
||||
journal_cur_seq(&c->journal));
|
||||
@ -524,6 +526,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
|
||||
bch2_do_invalidates(c);
|
||||
bch2_do_stripe_deletes(c);
|
||||
bch2_do_pending_node_rewrites(c);
|
||||
bch2_maybe_schedule_btree_bitmap_gc(c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -176,31 +176,43 @@ void bch2_recovery_pass_set_no_ratelimit(struct bch_fs *c,
|
||||
}
|
||||
}
|
||||
|
||||
static bool bch2_recovery_pass_want_ratelimit(struct bch_fs *c, enum bch_recovery_pass pass)
|
||||
static bool bch2_recovery_pass_entry_get_locked(struct bch_fs *c, enum bch_recovery_pass pass,
|
||||
struct recovery_pass_entry *e)
|
||||
{
|
||||
enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass);
|
||||
bool ret = false;
|
||||
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
struct bch_sb_field_recovery_passes *r =
|
||||
bch2_sb_field_get(c->disk_sb.sb, recovery_passes);
|
||||
|
||||
if (stable < recovery_passes_nr_entries(r)) {
|
||||
struct recovery_pass_entry *i = r->start + stable;
|
||||
enum bch_recovery_pass_stable stable = bch2_recovery_pass_to_stable(pass);
|
||||
bool found = stable < recovery_passes_nr_entries(r);
|
||||
if (found)
|
||||
*e = r->start[stable];
|
||||
|
||||
/*
|
||||
* Ratelimit if the last runtime was more than 1% of the time
|
||||
* since we last ran
|
||||
*/
|
||||
ret = (u64) le32_to_cpu(i->last_runtime) * 100 >
|
||||
ktime_get_real_seconds() - le64_to_cpu(i->last_run);
|
||||
return found;
|
||||
}
|
||||
|
||||
if (BCH_RECOVERY_PASS_NO_RATELIMIT(i))
|
||||
ret = false;
|
||||
}
|
||||
static bool bch2_recovery_pass_want_ratelimit_locked(struct bch_fs *c, enum bch_recovery_pass pass,
|
||||
unsigned runtime_fraction)
|
||||
{
|
||||
struct recovery_pass_entry e;
|
||||
if (!bch2_recovery_pass_entry_get_locked(c, pass, &e))
|
||||
return false;
|
||||
|
||||
return ret;
|
||||
/*
|
||||
* Ratelimit if the last runtime was more than 1% of the time
|
||||
* since we last ran
|
||||
*/
|
||||
return !BCH_RECOVERY_PASS_NO_RATELIMIT(&e) &&
|
||||
(u64) le32_to_cpu(e.last_runtime) * runtime_fraction >
|
||||
ktime_get_real_seconds() - le64_to_cpu(e.last_run);
|
||||
}
|
||||
|
||||
bool bch2_recovery_pass_want_ratelimit(struct bch_fs *c, enum bch_recovery_pass pass,
|
||||
unsigned runtime_fraction)
|
||||
{
|
||||
guard(mutex)(&c->sb_lock);
|
||||
return bch2_recovery_pass_want_ratelimit_locked(c, pass, runtime_fraction);
|
||||
}
|
||||
|
||||
const struct bch_sb_field_ops bch_sb_field_ops_recovery_passes = {
|
||||
@ -311,7 +323,7 @@ static bool recovery_pass_needs_set(struct bch_fs *c,
|
||||
*flags |= RUN_RECOVERY_PASS_nopersistent;
|
||||
|
||||
if ((*flags & RUN_RECOVERY_PASS_ratelimit) &&
|
||||
!bch2_recovery_pass_want_ratelimit(c, pass))
|
||||
!bch2_recovery_pass_want_ratelimit_locked(c, pass, 100))
|
||||
*flags &= ~RUN_RECOVERY_PASS_ratelimit;
|
||||
|
||||
/*
|
||||
@ -451,7 +463,7 @@ int bch2_require_recovery_pass(struct bch_fs *c,
|
||||
|
||||
guard(mutex)(&c->sb_lock);
|
||||
|
||||
if (bch2_recovery_pass_want_ratelimit(c, pass))
|
||||
if (bch2_recovery_pass_want_ratelimit_locked(c, pass, 100))
|
||||
return 0;
|
||||
|
||||
enum bch_run_recovery_pass_flags flags = 0;
|
||||
|
||||
@ -46,6 +46,8 @@ static inline int bch2_recovery_cancelled(struct bch_fs *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool bch2_recovery_pass_want_ratelimit(struct bch_fs *, enum bch_recovery_pass, unsigned);
|
||||
|
||||
int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
|
||||
|
||||
int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *,
|
||||
|
||||
@ -66,6 +66,7 @@
|
||||
x(delete_dead_inodes, 32, PASS_ALWAYS) \
|
||||
x(fix_reflink_p, 33, 0) \
|
||||
x(set_fs_needs_reconcile, 34, 0) \
|
||||
x(btree_bitmap_gc, 46, PASS_ONLINE) \
|
||||
x(lookup_root_inode, 42, PASS_ALWAYS|PASS_SILENT)
|
||||
|
||||
/* We normally enumerate recovery passes in the order we run them: */
|
||||
|
||||
@ -63,10 +63,12 @@ void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
|
||||
int bch2_jset_validate(struct bch_fs *, struct bch_dev *, struct jset *,
|
||||
u64, enum bch_validate_flags);
|
||||
|
||||
struct u64_range {
|
||||
typedef struct u64_range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
} u64_range;
|
||||
|
||||
DEFINE_DARRAY(u64_range);
|
||||
|
||||
struct u64_range bch2_journal_entry_missing_range(struct bch_fs *, u64, u64);
|
||||
|
||||
|
||||
@ -726,7 +726,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||
* we're holding the reclaim lock:
|
||||
*/
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
flags = memalloc_noreclaim_save();
|
||||
flags = memalloc_nofs_save();
|
||||
|
||||
do {
|
||||
if (kthread && kthread_should_stop())
|
||||
@ -780,7 +780,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
||||
wake_up(&j->reclaim_wait);
|
||||
} while ((min_nr || min_key_cache) && nr_flushed && !direct);
|
||||
|
||||
memalloc_noreclaim_restore(flags);
|
||||
memalloc_flags_restore(flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
|
||||
#include "journal/read.h"
|
||||
#include "journal/sb.h"
|
||||
|
||||
#include "util/darray.h"
|
||||
@ -28,35 +29,33 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, struct bch_sb_field *f,
|
||||
if (!nr)
|
||||
return 0;
|
||||
|
||||
u64 *b __free(kvfree) = kvmalloc_array(nr, sizeof(u64), GFP_KERNEL);
|
||||
if (!b)
|
||||
return -BCH_ERR_ENOMEM_sb_journal_validate;
|
||||
CLASS(darray_u64, b)();
|
||||
|
||||
for (unsigned i = 0; i < nr; i++)
|
||||
b[i] = le64_to_cpu(journal->buckets[i]);
|
||||
try(darray_push(&b, le64_to_cpu(journal->buckets[i])));
|
||||
|
||||
sort(b, nr, sizeof(u64), u64_cmp, NULL);
|
||||
darray_sort(b, u64_cmp);
|
||||
|
||||
if (!b[0]) {
|
||||
if (!darray_first(b)) {
|
||||
prt_printf(err, "journal bucket at sector 0");
|
||||
return -BCH_ERR_invalid_sb_journal;
|
||||
}
|
||||
|
||||
if (b[0] < le16_to_cpu(m.first_bucket)) {
|
||||
if (darray_first(b) < le16_to_cpu(m.first_bucket)) {
|
||||
prt_printf(err, "journal bucket %llu before first bucket %u",
|
||||
b[0], le16_to_cpu(m.first_bucket));
|
||||
darray_first(b), le16_to_cpu(m.first_bucket));
|
||||
return -BCH_ERR_invalid_sb_journal;
|
||||
}
|
||||
|
||||
if (b[nr - 1] >= le64_to_cpu(m.nbuckets)) {
|
||||
if (darray_last(b) >= le64_to_cpu(m.nbuckets)) {
|
||||
prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)",
|
||||
b[nr - 1], le64_to_cpu(m.nbuckets));
|
||||
darray_last(b), le64_to_cpu(m.nbuckets));
|
||||
return -BCH_ERR_invalid_sb_journal;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i + 1 < nr; i++)
|
||||
if (b[i] == b[i + 1]) {
|
||||
prt_printf(err, "duplicate journal buckets %llu", b[i]);
|
||||
darray_for_each(b, i)
|
||||
if (i != &darray_last(b) && i[0] == i[1]) {
|
||||
prt_printf(err, "duplicate journal buckets %llu", *i);
|
||||
return -BCH_ERR_invalid_sb_journal;
|
||||
}
|
||||
|
||||
@ -80,11 +79,6 @@ const struct bch_sb_field_ops bch_sb_field_ops_journal = {
|
||||
.to_text = bch2_sb_journal_to_text,
|
||||
};
|
||||
|
||||
struct u64_range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
static int u64_range_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct u64_range *l = _l;
|
||||
@ -104,15 +98,16 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
|
||||
if (!nr)
|
||||
return 0;
|
||||
|
||||
struct u64_range *b __free(kvfree) = kvmalloc_array(nr, sizeof(*b), GFP_KERNEL);
|
||||
if (!b)
|
||||
return -BCH_ERR_ENOMEM_sb_journal_v2_validate;
|
||||
CLASS(darray_u64_range, b)();
|
||||
|
||||
for (unsigned i = 0; i < nr; i++) {
|
||||
b[i].start = le64_to_cpu(journal->d[i].start);
|
||||
b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr);
|
||||
struct u64_range r = {
|
||||
.start = le64_to_cpu(journal->d[i].start),
|
||||
.end = le64_to_cpu(journal->d[i].start) +
|
||||
le64_to_cpu(journal->d[i].nr),
|
||||
};
|
||||
|
||||
if (b[i].end <= b[i].start) {
|
||||
if (r.end <= r.start) {
|
||||
prt_printf(err, "journal buckets entry with bad nr: %llu+%llu",
|
||||
le64_to_cpu(journal->d[i].start),
|
||||
le64_to_cpu(journal->d[i].nr));
|
||||
@ -120,34 +115,34 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
|
||||
}
|
||||
|
||||
sum += le64_to_cpu(journal->d[i].nr);
|
||||
try(darray_push(&b, r));
|
||||
}
|
||||
|
||||
sort(b, nr, sizeof(*b), u64_range_cmp, NULL);
|
||||
darray_sort(b, u64_range_cmp);
|
||||
|
||||
if (!b[0].start) {
|
||||
if (!darray_first(b).start) {
|
||||
prt_printf(err, "journal bucket at sector 0");
|
||||
return -BCH_ERR_invalid_sb_journal;
|
||||
}
|
||||
|
||||
if (b[0].start < le16_to_cpu(m.first_bucket)) {
|
||||
if (darray_first(b).start < le16_to_cpu(m.first_bucket)) {
|
||||
prt_printf(err, "journal bucket %llu before first bucket %u",
|
||||
b[0].start, le16_to_cpu(m.first_bucket));
|
||||
darray_first(b).start, le16_to_cpu(m.first_bucket));
|
||||
return -BCH_ERR_invalid_sb_journal;
|
||||
}
|
||||
|
||||
if (b[nr - 1].end > le64_to_cpu(m.nbuckets)) {
|
||||
if (darray_last(b).end > le64_to_cpu(m.nbuckets)) {
|
||||
prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)",
|
||||
b[nr - 1].end - 1, le64_to_cpu(m.nbuckets));
|
||||
darray_last(b).end - 1, le64_to_cpu(m.nbuckets));
|
||||
return -BCH_ERR_invalid_sb_journal;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i + 1 < nr; i++) {
|
||||
if (b[i].end > b[i + 1].start) {
|
||||
darray_for_each(b, i)
|
||||
if (i != &darray_last(b) && i[0].end > i[1].start) {
|
||||
prt_printf(err, "duplicate journal buckets in ranges %llu-%llu, %llu-%llu",
|
||||
b[i].start, b[i].end, b[i + 1].start, b[i + 1].end);
|
||||
i[0].start, i[0].end, i[1].start, i[1].end);
|
||||
return -BCH_ERR_invalid_sb_journal;
|
||||
}
|
||||
}
|
||||
|
||||
if (sum > UINT_MAX) {
|
||||
prt_printf(err, "too many journal buckets: %llu > %u", sum, UINT_MAX);
|
||||
@ -179,11 +174,9 @@ const struct bch_sb_field_ops bch_sb_field_ops_journal_v2 = {
|
||||
int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
|
||||
u64 *buckets, unsigned nr)
|
||||
{
|
||||
struct bch_sb_field_journal_v2 *j;
|
||||
unsigned i, dst = 0, nr_compacted = 1;
|
||||
unsigned dst = 0, nr_compacted = 1;
|
||||
|
||||
if (c)
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
if (!nr) {
|
||||
bch2_sb_field_delete(&ca->disk_sb, BCH_SB_FIELD_journal);
|
||||
@ -191,11 +184,12 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i + 1 < nr; i++)
|
||||
for (unsigned i = 0; i + 1 < nr; i++)
|
||||
if (buckets[i] + 1 != buckets[i + 1])
|
||||
nr_compacted++;
|
||||
|
||||
j = bch2_sb_field_resize(&ca->disk_sb, journal_v2,
|
||||
struct bch_sb_field_journal_v2 *j =
|
||||
bch2_sb_field_resize(&ca->disk_sb, journal_v2,
|
||||
(sizeof(*j) + sizeof(j->d[0]) * nr_compacted) / sizeof(u64));
|
||||
if (!j)
|
||||
return bch_err_throw(c, ENOSPC_sb_journal);
|
||||
@ -205,7 +199,7 @@ int bch2_journal_buckets_to_sb(struct bch_fs *c, struct bch_dev *ca,
|
||||
j->d[dst].start = cpu_to_le64(buckets[0]);
|
||||
j->d[dst].nr = cpu_to_le64(1);
|
||||
|
||||
for (i = 1; i < nr; i++) {
|
||||
for (unsigned i = 1; i < nr; i++) {
|
||||
if (buckets[i] == buckets[i - 1] + 1) {
|
||||
le64_add_cpu(&j->d[dst].nr, 1);
|
||||
} else {
|
||||
|
||||
@ -55,6 +55,7 @@ enum counters_flags {
|
||||
x(btree_node_read, 14, TYPE_COUNTER) \
|
||||
x(btree_node_compact, 15, TYPE_COUNTER) \
|
||||
x(btree_node_merge, 16, TYPE_COUNTER) \
|
||||
x(btree_node_merge_attempt, 101, TYPE_COUNTER) \
|
||||
x(btree_node_split, 17, TYPE_COUNTER) \
|
||||
x(btree_node_rewrite, 18, TYPE_COUNTER) \
|
||||
x(btree_node_alloc, 19, TYPE_COUNTER) \
|
||||
|
||||
@ -5,6 +5,8 @@
|
||||
#include "sb/errors.h"
|
||||
#include "sb/io.h"
|
||||
|
||||
#include "util/darray.h"
|
||||
|
||||
const char * const bch2_sb_error_strs[] = {
|
||||
#define x(t, n, ...) [n] = #t,
|
||||
BCH_SB_ERRS()
|
||||
@ -63,25 +65,25 @@ static int error_entry_cmp(const void *_l, const void *_r)
|
||||
return -cmp_int(l->last_error_time, r->last_error_time);
|
||||
}
|
||||
|
||||
DEFINE_DARRAY(bch_sb_field_error_entry);
|
||||
|
||||
static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_errors *e = field_to_type(f, errors);
|
||||
unsigned nr = bch2_sb_field_errors_nr_entries(e);
|
||||
|
||||
struct bch_sb_field_error_entry *sorted = kvmalloc_array(nr, sizeof(*sorted), GFP_KERNEL);
|
||||
|
||||
if (sorted) {
|
||||
memcpy(sorted, e->entries, nr * sizeof(e->entries[0]));
|
||||
sort(sorted, nr, sizeof(*sorted), error_entry_cmp, NULL);
|
||||
} else {
|
||||
sorted = e->entries;
|
||||
}
|
||||
|
||||
if (out->nr_tabstops <= 1)
|
||||
printbuf_tabstop_push(out, 16);
|
||||
|
||||
for (struct bch_sb_field_error_entry *i = sorted; i < sorted + nr; i++) {
|
||||
CLASS(darray_bch_sb_field_error_entry, sorted)();
|
||||
|
||||
for (struct bch_sb_field_error_entry *i = e->entries; i < e->entries + nr; i++)
|
||||
darray_push(&sorted, *i);
|
||||
|
||||
darray_sort(sorted, error_entry_cmp);
|
||||
|
||||
darray_for_each(sorted, i) {
|
||||
bch2_sb_error_id_to_text(out, BCH_SB_ERROR_ENTRY_ID(i));
|
||||
prt_tab(out);
|
||||
prt_u64(out, BCH_SB_ERROR_ENTRY_NR(i));
|
||||
@ -89,9 +91,6 @@ static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
|
||||
bch2_prt_datetime(out, le64_to_cpu(i->last_error_time));
|
||||
prt_newline(out);
|
||||
}
|
||||
|
||||
if (sorted != e->entries)
|
||||
kvfree(sorted);
|
||||
}
|
||||
|
||||
const struct bch_sb_field_ops bch_sb_field_ops_errors = {
|
||||
|
||||
@ -360,12 +360,14 @@ enum bch_sb_error_id {
|
||||
#undef x
|
||||
};
|
||||
|
||||
typedef struct bch_sb_field_error_entry {
|
||||
__le64 v;
|
||||
__le64 last_error_time;
|
||||
} bch_sb_field_error_entry;
|
||||
|
||||
struct bch_sb_field_errors {
|
||||
struct bch_sb_field field;
|
||||
struct bch_sb_field_error_entry {
|
||||
__le64 v;
|
||||
__le64 last_error_time;
|
||||
} entries[];
|
||||
bch_sb_field_error_entry entries[];
|
||||
};
|
||||
|
||||
LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
|
||||
|
||||
@ -2,16 +2,19 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
|
||||
#include "alloc/buckets.h"
|
||||
#include "alloc/disk_groups.h"
|
||||
#include "alloc/replicas.h"
|
||||
|
||||
#include "btree/cache.h"
|
||||
#include "btree/iter.h"
|
||||
|
||||
#include "sb/members.h"
|
||||
#include "sb/io.h"
|
||||
|
||||
#include "init/error.h"
|
||||
#include "init/passes.h"
|
||||
#include "init/progress.h"
|
||||
|
||||
int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev)
|
||||
{
|
||||
@ -512,35 +515,54 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
|
||||
* have to scan full devices:
|
||||
*/
|
||||
|
||||
bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
|
||||
static bool __bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k, bool with_gc)
|
||||
{
|
||||
guard(rcu)();
|
||||
bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
|
||||
struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
|
||||
if (ca &&
|
||||
!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c)))
|
||||
!__bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c), with_gc))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
|
||||
u64 start, unsigned sectors)
|
||||
bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
|
||||
u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
|
||||
return __bch2_dev_btree_bitmap_marked(c, k, true);
|
||||
}
|
||||
|
||||
bool bch2_dev_btree_bitmap_marked_nogc(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
return __bch2_dev_btree_bitmap_marked(c, k, false);
|
||||
}
|
||||
|
||||
static void __bch2_dev_btree_bitmap_mark(struct bch_dev *ca,
|
||||
struct bch_sb_field_members_v2 *mi,
|
||||
u64 start, unsigned sectors, bool *write_sb)
|
||||
{
|
||||
struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
|
||||
|
||||
u64 end = start + sectors;
|
||||
|
||||
int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
|
||||
if (resize > 0) {
|
||||
u64 old_bitmap = le64_to_cpu(m->btree_allocated_bitmap);
|
||||
u64 new_bitmap = 0;
|
||||
u64 new_gc_bitmap = 0;
|
||||
|
||||
for (unsigned i = 0; i < 64; i++)
|
||||
if (bitmap & BIT_ULL(i))
|
||||
for (unsigned i = 0; i < 64; i++) {
|
||||
if (old_bitmap & BIT_ULL(i))
|
||||
new_bitmap |= BIT_ULL(i >> resize);
|
||||
bitmap = new_bitmap;
|
||||
if (ca->btree_allocated_bitmap_gc & BIT_ULL(i))
|
||||
new_gc_bitmap |= BIT_ULL(i >> resize);
|
||||
}
|
||||
|
||||
m->btree_allocated_bitmap = cpu_to_le64(new_bitmap);
|
||||
m->btree_bitmap_shift += resize;
|
||||
*write_sb = true;
|
||||
|
||||
ca->btree_allocated_bitmap_gc = new_gc_bitmap;
|
||||
}
|
||||
|
||||
BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX);
|
||||
@ -548,25 +570,164 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns
|
||||
|
||||
for (unsigned bit = start >> m->btree_bitmap_shift;
|
||||
(u64) bit << m->btree_bitmap_shift < end;
|
||||
bit++)
|
||||
bitmap |= BIT_ULL(bit);
|
||||
bit++) {
|
||||
__le64 b = cpu_to_le64(BIT_ULL(bit));
|
||||
|
||||
m->btree_allocated_bitmap = cpu_to_le64(bitmap);
|
||||
if (!(m->btree_allocated_bitmap & b)) {
|
||||
m->btree_allocated_bitmap |= b;
|
||||
*write_sb = true;
|
||||
}
|
||||
|
||||
ca->btree_allocated_bitmap_gc |= BIT_ULL(bit);
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
|
||||
void bch2_dev_btree_bitmap_mark_locked(struct bch_fs *c, struct bkey_s_c k, bool *write_sb)
|
||||
{
|
||||
lockdep_assert_held(&c->sb_lock);
|
||||
|
||||
struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
|
||||
|
||||
guard(rcu)();
|
||||
bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
|
||||
if (!bch2_member_exists(c->disk_sb.sb, ptr->dev))
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
|
||||
if (!ca)
|
||||
continue;
|
||||
|
||||
__bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
|
||||
__bch2_dev_btree_bitmap_mark(ca, mi, ptr->offset, btree_sectors(c), write_sb);
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
guard(mutex)(&c->sb_lock);
|
||||
bool write_sb = false;
|
||||
bch2_dev_btree_bitmap_mark_locked(c, k, &write_sb);
|
||||
if (write_sb)
|
||||
bch2_write_super(c);
|
||||
}
|
||||
|
||||
static int btree_bitmap_gc_btree_level(struct btree_trans *trans,
|
||||
struct progress_indicator *progress,
|
||||
enum btree_id btree, unsigned level)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
CLASS(btree_node_iter, iter)(trans, btree, POS_MIN, 0, level, BTREE_ITER_prefetch);
|
||||
|
||||
try(for_each_btree_key_continue(trans, iter, 0, k, ({
|
||||
if (!bch2_dev_btree_bitmap_marked(c, k))
|
||||
bch2_dev_btree_bitmap_mark(c, k);
|
||||
|
||||
bch2_progress_update_iter(trans, progress, &iter, "btree_bitmap_gc");
|
||||
})));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_btree_bitmap_gc(struct bch_fs *c)
|
||||
{
|
||||
struct progress_indicator progress;
|
||||
bch2_progress_init_inner(&progress, c, 0, ~0ULL);
|
||||
|
||||
scoped_guard(mutex, &c->sb_lock) {
|
||||
guard(rcu)();
|
||||
for_each_member_device_rcu(c, ca, NULL)
|
||||
ca->btree_allocated_bitmap_gc = 0;
|
||||
}
|
||||
|
||||
{
|
||||
CLASS(btree_trans, trans)(c);
|
||||
|
||||
for (unsigned btree = 0; btree < btree_id_nr_alive(c); btree++) {
|
||||
for (unsigned level = 1; level < BTREE_MAX_DEPTH; level++)
|
||||
try(btree_bitmap_gc_btree_level(trans, &progress, btree, level));
|
||||
|
||||
CLASS(btree_node_iter, iter)(trans, btree, POS_MIN, 0,
|
||||
bch2_btree_id_root(c, btree)->b->c.level, 0);
|
||||
struct btree *b;
|
||||
try(lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter))));
|
||||
|
||||
if (!bch2_dev_btree_bitmap_marked(c, bkey_i_to_s_c(&b->key)))
|
||||
bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(&b->key));
|
||||
}
|
||||
}
|
||||
|
||||
u64 sectors_marked_old = 0, sectors_marked_new = 0;
|
||||
|
||||
scoped_guard(mutex, &c->sb_lock) {
|
||||
struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
|
||||
|
||||
scoped_guard(rcu)
|
||||
for_each_member_device_rcu(c, ca, NULL) {
|
||||
sectors_marked_old += hweight64(ca->mi.btree_allocated_bitmap) << ca->mi.btree_bitmap_shift;
|
||||
sectors_marked_new += hweight64(ca->btree_allocated_bitmap_gc) << ca->mi.btree_bitmap_shift;
|
||||
|
||||
struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
|
||||
m->btree_allocated_bitmap = cpu_to_le64(ca->btree_allocated_bitmap_gc);
|
||||
}
|
||||
bch2_write_super(c);
|
||||
}
|
||||
|
||||
CLASS(printbuf, buf)();
|
||||
prt_str(&buf, "mi_btree_bitmap sectors ");
|
||||
prt_human_readable_u64(&buf, sectors_marked_old << 9);
|
||||
prt_str(&buf, " -> ");
|
||||
prt_human_readable_u64(&buf, sectors_marked_new << 9);
|
||||
bch_info(c, "%s", buf.buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bch2_maybe_schedule_btree_bitmap_gc_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, maybe_schedule_btree_bitmap_gc.work);
|
||||
|
||||
if (bch2_recovery_pass_want_ratelimit(c, BCH_RECOVERY_PASS_btree_bitmap_gc, 1000))
|
||||
return;
|
||||
|
||||
CLASS(printbuf, buf)();
|
||||
bch2_log_msg_start(c, &buf);
|
||||
|
||||
bool want_schedule = false;
|
||||
for_each_member_device(c, ca) {
|
||||
struct bch_dev_usage u;
|
||||
bch2_dev_usage_read_fast(ca, &u);
|
||||
|
||||
u64 btree_sectors = bucket_to_sector(ca, u.buckets[BCH_DATA_btree]);
|
||||
u64 bitmap_sectors = hweight64(ca->mi.btree_allocated_bitmap) << ca->mi.btree_bitmap_shift;
|
||||
|
||||
if (btree_sectors * 4 < bitmap_sectors) {
|
||||
prt_printf(&buf, "%s has ", ca->name);
|
||||
prt_human_readable_u64(&buf, btree_sectors << 9);
|
||||
prt_printf(&buf, " btree buckets and ");
|
||||
prt_human_readable_u64(&buf, bitmap_sectors << 9);
|
||||
prt_printf(&buf, " marked in bitmap\n");
|
||||
want_schedule = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (want_schedule) {
|
||||
bch2_run_explicit_recovery_pass(c, &buf,
|
||||
BCH_RECOVERY_PASS_btree_bitmap_gc,
|
||||
RUN_RECOVERY_PASS_ratelimit);
|
||||
bch2_print_str(c, KERN_NOTICE, buf.buf);
|
||||
}
|
||||
|
||||
queue_delayed_work(system_long_wq, &c->maybe_schedule_btree_bitmap_gc, HZ * 60 * 60 * 24);
|
||||
}
|
||||
|
||||
void bch2_maybe_schedule_btree_bitmap_gc_stop(struct bch_fs *c)
|
||||
{
|
||||
cancel_delayed_work_sync(&c->maybe_schedule_btree_bitmap_gc);
|
||||
}
|
||||
|
||||
void bch2_maybe_schedule_btree_bitmap_gc(struct bch_fs *c)
|
||||
{
|
||||
INIT_DELAYED_WORK(&c->maybe_schedule_btree_bitmap_gc,
|
||||
bch2_maybe_schedule_btree_bitmap_gc_work);
|
||||
bch2_maybe_schedule_btree_bitmap_gc_work(&c->maybe_schedule_btree_bitmap_gc.work);
|
||||
}
|
||||
|
||||
unsigned bch2_sb_nr_devices(const struct bch_sb *sb)
|
||||
{
|
||||
unsigned nr = 0;
|
||||
|
||||
@ -389,7 +389,8 @@ void bch2_sb_members_to_cpu(struct bch_fs *);
|
||||
void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
|
||||
void bch2_dev_errors_reset(struct bch_dev *);
|
||||
|
||||
static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors)
|
||||
static inline bool __bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start,
|
||||
unsigned sectors, bool with_gc)
|
||||
{
|
||||
u64 end = start + sectors;
|
||||
|
||||
@ -399,14 +400,46 @@ static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64
|
||||
for (unsigned bit = start >> ca->mi.btree_bitmap_shift;
|
||||
(u64) bit << ca->mi.btree_bitmap_shift < end;
|
||||
bit++)
|
||||
if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit)))
|
||||
if (!(BIT_ULL(bit) &
|
||||
ca->mi.btree_allocated_bitmap &
|
||||
(with_gc
|
||||
? ca->btree_allocated_bitmap_gc
|
||||
: ~0ULL)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start,
|
||||
unsigned sectors)
|
||||
{
|
||||
return __bch2_dev_btree_bitmap_marked_sectors(ca, start, sectors, false);
|
||||
}
|
||||
|
||||
static inline bool bch2_dev_btree_bitmap_marked_sectors_any(struct bch_dev *ca, u64 start, unsigned sectors)
|
||||
{
|
||||
u64 end = start + sectors;
|
||||
|
||||
if (start >= 64ULL << ca->mi.btree_bitmap_shift)
|
||||
return false;
|
||||
|
||||
for (unsigned bit = start >> ca->mi.btree_bitmap_shift;
|
||||
(u64) bit << ca->mi.btree_bitmap_shift < end;
|
||||
bit++)
|
||||
if (ca->mi.btree_allocated_bitmap & BIT_ULL(bit))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
|
||||
bool bch2_dev_btree_bitmap_marked_nogc(struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
void bch2_dev_btree_bitmap_mark_locked(struct bch_fs *, struct bkey_s_c, bool *);
|
||||
void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
int bch2_btree_bitmap_gc(struct bch_fs *);
|
||||
void bch2_maybe_schedule_btree_bitmap_gc_stop(struct bch_fs *);
|
||||
void bch2_maybe_schedule_btree_bitmap_gc(struct bch_fs *);
|
||||
|
||||
int bch2_sb_member_alloc(struct bch_fs *);
|
||||
void bch2_sb_members_clean_deleted(struct bch_fs *);
|
||||
|
||||
|
||||
@ -125,6 +125,9 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t, bool);
|
||||
#define darray_for_each_reverse(_d, _i) \
|
||||
for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i)
|
||||
|
||||
#define darray_sort(_d, _cmp) \
|
||||
sort((_d).data, (_d).nr, sizeof((_d).data[0]), _cmp, NULL)
|
||||
|
||||
/* Init/exit */
|
||||
|
||||
#define darray_init(_d) \
|
||||
|
||||
@ -606,49 +606,32 @@ void bch2_bio_map(struct bio *bio, void *base, size_t size)
|
||||
|
||||
int bch2_bio_alloc_pages(struct bio *bio, unsigned bs, size_t size, gfp_t gfp_mask)
|
||||
{
|
||||
BUG_ON(!is_power_of_2(bs));
|
||||
BUG_ON(size & (bs - 1));
|
||||
unsigned bs_pages = DIV_ROUND_UP(bs, PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* XXX: we could do this by allocating higher order pages, but
|
||||
*
|
||||
* - the page allocator gets slower at a certain order (5?) - we'd have
|
||||
* to check for this
|
||||
*
|
||||
* - bch2_bio_free_pages_pool() probably does not handle compound pages
|
||||
* yet
|
||||
*/
|
||||
DARRAY_PREALLOCATED(struct page *, 16) pages;
|
||||
darray_init(&pages);
|
||||
darray_make_room_gfp(&pages, bs_pages, gfp_mask|__GFP_NOFAIL);
|
||||
unsigned max_alloc = max(bs, PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
|
||||
|
||||
int ret = 0;
|
||||
while (size) {
|
||||
while (pages.nr < bs_pages) {
|
||||
struct page *page = alloc_pages(gfp_mask, 0);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
while (bio->bi_iter.bi_size < size) {
|
||||
unsigned b = min(size - bio->bi_iter.bi_size, max_alloc);
|
||||
|
||||
BUG_ON(darray_push(&pages, page));
|
||||
}
|
||||
BUG_ON(b & (bs - 1));
|
||||
|
||||
while (pages.nr) {
|
||||
BUG_ON(!size);
|
||||
#ifdef __KERNEL__
|
||||
/*
|
||||
* we don't know the device dma alignment, so in kernel make
|
||||
* sure allocations are page aligned
|
||||
*/
|
||||
void *p = (void *) __get_free_pages(gfp_mask, get_order(b));
|
||||
#else
|
||||
void *p = kmalloc(b, gfp_mask);
|
||||
#endif
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
unsigned len = min(PAGE_SIZE, size);
|
||||
size -= len;
|
||||
|
||||
struct page *page = darray_pop(&pages);
|
||||
BUG_ON(!bio_add_page(bio, page, len, 0));
|
||||
}
|
||||
bio_add_virt_nofail(bio, p, b);
|
||||
}
|
||||
out:
|
||||
darray_for_each(pages, i)
|
||||
__free_page(*i);
|
||||
darray_exit(&pages);
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
u64 bch2_get_random_u64_below(u64 ceil)
|
||||
@ -678,9 +661,8 @@ void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
|
||||
struct bvec_iter iter;
|
||||
|
||||
__bio_for_each_segment(bv, dst, iter, dst_iter) {
|
||||
void *dstp = kmap_local_page(bv.bv_page);
|
||||
|
||||
memcpy(dstp + bv.bv_offset, src, bv.bv_len);
|
||||
void *dstp = bvec_kmap_local(&bv);
|
||||
memcpy(dstp, src, bv.bv_len);
|
||||
kunmap_local(dstp);
|
||||
|
||||
src += bv.bv_len;
|
||||
@ -693,9 +675,8 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
|
||||
struct bvec_iter iter;
|
||||
|
||||
__bio_for_each_segment(bv, src, iter, src_iter) {
|
||||
void *srcp = kmap_local_page(bv.bv_page);
|
||||
|
||||
memcpy(dst, srcp + bv.bv_offset, bv.bv_len);
|
||||
void *srcp = bvec_kmap_local(&bv);
|
||||
memcpy(dst, srcp, bv.bv_len);
|
||||
kunmap_local(srcp);
|
||||
|
||||
dst += bv.bv_len;
|
||||
|
||||
@ -422,14 +422,6 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
#define memalloc_flags_do(_flags, _do) \
|
||||
({ \
|
||||
unsigned _saved_flags = memalloc_flags_save(_flags); \
|
||||
typeof(_do) _ret = _do; \
|
||||
memalloc_noreclaim_restore(_saved_flags); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static struct inode *bch2_alloc_inode(struct super_block *sb)
|
||||
{
|
||||
BUG();
|
||||
|
||||
51
linux/bio.c
51
linux/bio.c
@ -64,27 +64,13 @@ const char *blk_status_to_str(blk_status_t status)
|
||||
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||
struct bio *src, struct bvec_iter *src_iter)
|
||||
{
|
||||
struct bio_vec src_bv, dst_bv;
|
||||
void *src_p, *dst_p;
|
||||
unsigned bytes;
|
||||
|
||||
while (src_iter->bi_size && dst_iter->bi_size) {
|
||||
src_bv = bio_iter_iovec(src, *src_iter);
|
||||
dst_bv = bio_iter_iovec(dst, *dst_iter);
|
||||
struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
|
||||
struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
|
||||
|
||||
bytes = min(src_bv.bv_len, dst_bv.bv_len);
|
||||
unsigned bytes = min(src_bv.bv_len, dst_bv.bv_len);
|
||||
|
||||
src_p = kmap_atomic(src_bv.bv_page);
|
||||
dst_p = kmap_atomic(dst_bv.bv_page);
|
||||
|
||||
memcpy(dst_p + dst_bv.bv_offset,
|
||||
src_p + src_bv.bv_offset,
|
||||
bytes);
|
||||
|
||||
kunmap_atomic(dst_p);
|
||||
kunmap_atomic(src_p);
|
||||
|
||||
flush_dcache_page(dst_bv.bv_page);
|
||||
memcpy(dst_bv.bv_addr, src_bv.bv_addr, bytes);
|
||||
|
||||
bio_advance_iter(src, src_iter, bytes);
|
||||
bio_advance_iter(dst, dst_iter, bytes);
|
||||
@ -109,15 +95,11 @@ void bio_copy_data(struct bio *dst, struct bio *src)
|
||||
|
||||
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct bio_vec bv;
|
||||
struct bvec_iter iter;
|
||||
|
||||
__bio_for_each_segment(bv, bio, iter, start) {
|
||||
char *data = bvec_kmap_irq(&bv, &flags);
|
||||
memset(data, 0, bv.bv_len);
|
||||
bvec_kunmap_irq(data, &flags);
|
||||
}
|
||||
__bio_for_each_segment(bv, bio, iter, start)
|
||||
memset(bv.bv_addr, 0, bv.bv_len);
|
||||
}
|
||||
|
||||
static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
|
||||
@ -165,15 +147,6 @@ struct bio *bio_split(struct bio *bio, int sectors,
|
||||
return split;
|
||||
}
|
||||
|
||||
void bio_free_pages(struct bio *bio)
|
||||
{
|
||||
struct bvec_iter_all iter;
|
||||
struct bio_vec *bvec;
|
||||
|
||||
bio_for_each_segment_all(bvec, bio, iter)
|
||||
__free_page(bvec->bv_page);
|
||||
}
|
||||
|
||||
void bio_advance(struct bio *bio, unsigned bytes)
|
||||
{
|
||||
bio_advance_iter(bio, &bio->bi_iter, bytes);
|
||||
@ -208,26 +181,18 @@ void bio_put(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
int bio_add_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int off)
|
||||
void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len)
|
||||
{
|
||||
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
|
||||
|
||||
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
|
||||
WARN_ON_ONCE(bio->bi_vcnt >= bio->bi_max_vecs);
|
||||
|
||||
bv->bv_page = page;
|
||||
bv->bv_offset = off;
|
||||
bv->bv_addr = vaddr;
|
||||
bv->bv_len = len;
|
||||
|
||||
bio->bi_iter.bi_size += len;
|
||||
bio->bi_vcnt++;
|
||||
return len;
|
||||
}
|
||||
|
||||
void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len)
|
||||
{
|
||||
bio_add_page(bio, virt_to_page(vaddr), len, offset_in_page(vaddr));
|
||||
}
|
||||
|
||||
static inline bool bio_remaining_done(struct bio *bio)
|
||||
|
||||
@ -59,18 +59,15 @@ void generic_make_request(struct bio *bio)
|
||||
|
||||
i = 0;
|
||||
bio_for_each_segment(bv, bio, iter) {
|
||||
void *start = page_address(bv.bv_page) + bv.bv_offset;
|
||||
size_t len = bv.bv_len;
|
||||
|
||||
iov[i++] = (struct iovec) {
|
||||
.iov_base = start,
|
||||
.iov_len = len,
|
||||
.iov_base = bv.bv_addr,
|
||||
.iov_len = bv.bv_len,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_VALGRIND
|
||||
/* To be pedantic it should only be on IO completion. */
|
||||
if (bio_op(bio) == REQ_OP_READ)
|
||||
VALGRIND_MAKE_MEM_DEFINED(start, len);
|
||||
VALGRIND_MAKE_MEM_DEFINED(bv.bv_addr, bv.bv_len);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user