mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to 1b14994029 bcachefs: Fragmentation LRU
This commit is contained in:
parent
c1677df62e
commit
46a6b9210c
@ -1 +1 @@
|
|||||||
8dbfede1d9e6483c682956c7c8a4900a65f98dde
|
1b149940290c0ef39070b4afaadab84a65bba034
|
||||||
|
@ -723,8 +723,8 @@ TRACE_EVENT(move_data,
|
|||||||
TRACE_EVENT(evacuate_bucket,
|
TRACE_EVENT(evacuate_bucket,
|
||||||
TP_PROTO(struct bch_fs *c, struct bpos *bucket,
|
TP_PROTO(struct bch_fs *c, struct bpos *bucket,
|
||||||
unsigned sectors, unsigned bucket_size,
|
unsigned sectors, unsigned bucket_size,
|
||||||
int ret),
|
u64 fragmentation, int ret),
|
||||||
TP_ARGS(c, bucket, sectors, bucket_size, ret),
|
TP_ARGS(c, bucket, sectors, bucket_size, fragmentation, ret),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__field(dev_t, dev )
|
__field(dev_t, dev )
|
||||||
@ -732,6 +732,7 @@ TRACE_EVENT(evacuate_bucket,
|
|||||||
__field(u64, bucket )
|
__field(u64, bucket )
|
||||||
__field(u32, sectors )
|
__field(u32, sectors )
|
||||||
__field(u32, bucket_size )
|
__field(u32, bucket_size )
|
||||||
|
__field(u64, fragmentation )
|
||||||
__field(int, ret )
|
__field(int, ret )
|
||||||
),
|
),
|
||||||
|
|
||||||
@ -741,14 +742,15 @@ TRACE_EVENT(evacuate_bucket,
|
|||||||
__entry->bucket = bucket->offset;
|
__entry->bucket = bucket->offset;
|
||||||
__entry->sectors = sectors;
|
__entry->sectors = sectors;
|
||||||
__entry->bucket_size = bucket_size;
|
__entry->bucket_size = bucket_size;
|
||||||
|
__entry->fragmentation = fragmentation;
|
||||||
__entry->ret = ret;
|
__entry->ret = ret;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i",
|
TP_printk("%d,%d %llu:%llu sectors %u/%u fragmentation %llu ret %i",
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
__entry->member, __entry->bucket,
|
__entry->member, __entry->bucket,
|
||||||
__entry->sectors, __entry->bucket_size,
|
__entry->sectors, __entry->bucket_size,
|
||||||
__entry->ret)
|
__entry->fragmentation, __entry->ret)
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(copygc,
|
TRACE_EVENT(copygc,
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include "btree_update.h"
|
#include "btree_update.h"
|
||||||
#include "btree_update_interior.h"
|
#include "btree_update_interior.h"
|
||||||
#include "btree_gc.h"
|
#include "btree_gc.h"
|
||||||
|
#include "btree_write_buffer.h"
|
||||||
#include "buckets.h"
|
#include "buckets.h"
|
||||||
#include "buckets_waiting_for_journal.h"
|
#include "buckets_waiting_for_journal.h"
|
||||||
#include "clock.h"
|
#include "clock.h"
|
||||||
@ -414,6 +415,8 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
|
|||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]);
|
prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]);
|
||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
|
prt_printf(out, "fragmentation %llu", a->fragmentation_lru);
|
||||||
|
prt_newline(out);
|
||||||
prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a));
|
prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a));
|
||||||
prt_newline(out);
|
prt_newline(out);
|
||||||
|
|
||||||
@ -909,8 +912,8 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
|
|||||||
!new_a->io_time[READ])
|
!new_a->io_time[READ])
|
||||||
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
|
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
|
||||||
|
|
||||||
old_lru = alloc_lru_idx(*old_a);
|
old_lru = alloc_lru_idx_read(*old_a);
|
||||||
new_lru = alloc_lru_idx(*new_a);
|
new_lru = alloc_lru_idx_read(*new_a);
|
||||||
|
|
||||||
if (old_lru != new_lru) {
|
if (old_lru != new_lru) {
|
||||||
ret = bch2_lru_change(trans, new->k.p.inode,
|
ret = bch2_lru_change(trans, new->k.p.inode,
|
||||||
@ -920,6 +923,18 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a,
|
||||||
|
bch_dev_bkey_exists(c, new->k.p.inode));
|
||||||
|
|
||||||
|
if (old_a->fragmentation_lru != new_a->fragmentation_lru) {
|
||||||
|
ret = bch2_lru_change(trans,
|
||||||
|
BCH_LRU_FRAGMENTATION_START,
|
||||||
|
bucket_to_u64(new->k.p),
|
||||||
|
old_a->fragmentation_lru, new_a->fragmentation_lru);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
if (old_a->gen != new_a->gen) {
|
if (old_a->gen != new_a->gen) {
|
||||||
ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen);
|
ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -1775,15 +1790,11 @@ static int invalidate_one_bucket(struct btree_trans *trans,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx(a->v)) {
|
/* We expect harmless races here due to the btree write buffer: */
|
||||||
prt_str(&buf, "alloc key does not point back to lru entry when invalidating bucket:");
|
if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v))
|
||||||
goto err;
|
goto out;
|
||||||
}
|
|
||||||
|
|
||||||
if (a->v.data_type != BCH_DATA_cached) {
|
BUG_ON(a->v.data_type != BCH_DATA_cached);
|
||||||
prt_str(&buf, "lru entry points to non cached bucket:");
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!a->v.cached_sectors)
|
if (!a->v.cached_sectors)
|
||||||
bch_err(c, "invalidating empty bucket, confused");
|
bch_err(c, "invalidating empty bucket, confused");
|
||||||
@ -1845,6 +1856,10 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
|||||||
|
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
|
|
||||||
|
ret = bch2_btree_write_buffer_flush(&trans);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
for_each_member_device(ca, c, i) {
|
for_each_member_device(ca, c, i) {
|
||||||
s64 nr_to_invalidate =
|
s64 nr_to_invalidate =
|
||||||
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
|
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
|
||||||
@ -1860,7 +1875,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
err:
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
|
||||||
}
|
}
|
||||||
|
@ -64,11 +64,24 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
|
|||||||
a.stripe, a, data_type);
|
a.stripe, a, data_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 alloc_lru_idx(struct bch_alloc_v4 a)
|
static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
|
||||||
{
|
{
|
||||||
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
|
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
|
||||||
|
struct bch_dev *ca)
|
||||||
|
{
|
||||||
|
if (a.data_type != BCH_DATA_btree &&
|
||||||
|
a.data_type != BCH_DATA_user)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (a.dirty_sectors >= ca->mi.bucket_size)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
|
||||||
|
}
|
||||||
|
|
||||||
static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a)
|
static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a)
|
||||||
{
|
{
|
||||||
return ((u64) alloc_gc_gen(a) >> 4) << 56;
|
return ((u64) alloc_gc_gen(a) >> 4) << 56;
|
||||||
|
@ -738,7 +738,7 @@ static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
|
|||||||
|
|
||||||
si_meminfo(&i);
|
si_meminfo(&i);
|
||||||
mem_bytes = i.totalram * i.mem_unit;
|
mem_bytes = i.totalram * i.mem_unit;
|
||||||
return (mem_bytes >> 1) / btree_bytes(c);
|
return div_u64(mem_bytes >> 1, btree_bytes(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
||||||
|
@ -927,7 +927,6 @@ struct bch_fs {
|
|||||||
|
|
||||||
/* COPYGC */
|
/* COPYGC */
|
||||||
struct task_struct *copygc_thread;
|
struct task_struct *copygc_thread;
|
||||||
copygc_heap copygc_heap;
|
|
||||||
struct write_point copygc_write_point;
|
struct write_point copygc_write_point;
|
||||||
s64 copygc_wait;
|
s64 copygc_wait;
|
||||||
bool copygc_running;
|
bool copygc_running;
|
||||||
|
@ -988,6 +988,7 @@ struct bch_alloc_v4 {
|
|||||||
__u64 io_time[2];
|
__u64 io_time[2];
|
||||||
__u32 stripe;
|
__u32 stripe;
|
||||||
__u32 nr_external_backpointers;
|
__u32 nr_external_backpointers;
|
||||||
|
__u64 fragmentation_lru;
|
||||||
} __packed __aligned(8);
|
} __packed __aligned(8);
|
||||||
|
|
||||||
#define BCH_ALLOC_V4_U64s_V0 6
|
#define BCH_ALLOC_V4_U64s_V0 6
|
||||||
@ -1559,7 +1560,8 @@ struct bch_sb_field_journal_seq_blacklist {
|
|||||||
x(inode_v3, 23) \
|
x(inode_v3, 23) \
|
||||||
x(unwritten_extents, 24) \
|
x(unwritten_extents, 24) \
|
||||||
x(bucket_gens, 25) \
|
x(bucket_gens, 25) \
|
||||||
x(lru_v2, 26)
|
x(lru_v2, 26) \
|
||||||
|
x(fragmentation_lru, 27)
|
||||||
|
|
||||||
enum bcachefs_metadata_version {
|
enum bcachefs_metadata_version {
|
||||||
bcachefs_metadata_version_min = 9,
|
bcachefs_metadata_version_min = 9,
|
||||||
|
@ -1174,17 +1174,10 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
|
|||||||
|
|
||||||
path->uptodate = BTREE_ITER_UPTODATE;
|
path->uptodate = BTREE_ITER_UPTODATE;
|
||||||
out:
|
out:
|
||||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted) {
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
|
||||||
struct printbuf buf = PRINTBUF;
|
panic("ret %s (%i) trans->restarted %s (%i)\n",
|
||||||
|
bch2_err_str(ret), ret,
|
||||||
prt_printf(&buf, "ret %s (%i) trans->restarted %s (%i)\n",
|
bch2_err_str(trans->restarted), trans->restarted);
|
||||||
bch2_err_str(ret), ret,
|
|
||||||
bch2_err_str(trans->restarted), trans->restarted);
|
|
||||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
|
||||||
bch2_prt_backtrace(&buf, &trans->last_restarted);
|
|
||||||
#endif
|
|
||||||
panic("%s", buf.buf);
|
|
||||||
}
|
|
||||||
bch2_btree_path_verify(trans, path);
|
bch2_btree_path_verify(trans, path);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1367,14 +1360,14 @@ void bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
|
|||||||
{
|
{
|
||||||
panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
|
panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
|
||||||
trans->restart_count, restart_count,
|
trans->restart_count, restart_count,
|
||||||
(void *) trans->last_begin_ip);
|
(void *) trans->last_restarted_ip);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_trans_in_restart_error(struct btree_trans *trans)
|
void bch2_trans_in_restart_error(struct btree_trans *trans)
|
||||||
{
|
{
|
||||||
panic("in transaction restart: %s, last restarted by %pS\n",
|
panic("in transaction restart: %s, last restarted by %pS\n",
|
||||||
bch2_err_str(trans->restarted),
|
bch2_err_str(trans->restarted),
|
||||||
(void *) trans->last_begin_ip);
|
(void *) trans->last_restarted_ip);
|
||||||
}
|
}
|
||||||
|
|
||||||
noinline __cold
|
noinline __cold
|
||||||
@ -2872,7 +2865,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
|
|||||||
if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
|
if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
|
||||||
bch2_trans_reset_srcu_lock(trans);
|
bch2_trans_reset_srcu_lock(trans);
|
||||||
|
|
||||||
trans->last_begin_ip = _RET_IP_;
|
trans->last_restarted_ip = _RET_IP_;
|
||||||
if (trans->restarted) {
|
if (trans->restarted) {
|
||||||
bch2_btree_path_traverse_all(trans);
|
bch2_btree_path_traverse_all(trans);
|
||||||
trans->notrace_relock_fail = false;
|
trans->notrace_relock_fail = false;
|
||||||
@ -3053,10 +3046,6 @@ void bch2_trans_exit(struct btree_trans *trans)
|
|||||||
if (trans->paths)
|
if (trans->paths)
|
||||||
mempool_free(trans->paths, &c->btree_paths_pool);
|
mempool_free(trans->paths, &c->btree_paths_pool);
|
||||||
|
|
||||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
|
||||||
darray_exit(&trans->last_restarted);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
trans->mem = (void *) 0x1;
|
trans->mem = (void *) 0x1;
|
||||||
trans->paths = (void *) 0x1;
|
trans->paths = (void *) 0x1;
|
||||||
}
|
}
|
||||||
|
@ -251,10 +251,6 @@ static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int er
|
|||||||
BUG_ON(err <= 0);
|
BUG_ON(err <= 0);
|
||||||
BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart));
|
BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart));
|
||||||
|
|
||||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
|
||||||
bch2_save_backtrace(&trans->last_restarted, current);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
trans->restarted = err;
|
trans->restarted = err;
|
||||||
return -err;
|
return -err;
|
||||||
}
|
}
|
||||||
|
@ -442,10 +442,7 @@ struct btree_trans {
|
|||||||
bool notrace_relock_fail:1;
|
bool notrace_relock_fail:1;
|
||||||
enum bch_errcode restarted:16;
|
enum bch_errcode restarted:16;
|
||||||
u32 restart_count;
|
u32 restart_count;
|
||||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
unsigned long last_restarted_ip;
|
||||||
bch_stacktrace last_restarted;
|
|
||||||
#endif
|
|
||||||
unsigned long last_begin_ip;
|
|
||||||
unsigned long srcu_lock_time;
|
unsigned long srcu_lock_time;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -58,6 +58,9 @@ int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
|
|||||||
unsigned, unsigned);
|
unsigned, unsigned);
|
||||||
int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
|
int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
|
||||||
|
|
||||||
|
int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
|
||||||
|
struct bkey_i *, enum btree_update_flags);
|
||||||
|
|
||||||
int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *,
|
int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *,
|
||||||
enum btree_update_flags);
|
enum btree_update_flags);
|
||||||
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
|
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
|
||||||
|
@ -56,9 +56,10 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert
|
|||||||
k = bkey_i_to_s_c(j_k);
|
k = bkey_i_to_s_c(j_k);
|
||||||
}
|
}
|
||||||
|
|
||||||
i->old_k.needs_whiteout = k.k->needs_whiteout;
|
u = *k.k;
|
||||||
|
u.needs_whiteout = i->old_k.needs_whiteout;
|
||||||
|
|
||||||
BUG_ON(memcmp(&i->old_k, k.k, sizeof(struct bkey)));
|
BUG_ON(memcmp(&i->old_k, &u, sizeof(struct bkey)));
|
||||||
BUG_ON(i->old_v != k.v);
|
BUG_ON(i->old_v != k.v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -1306,12 +1307,45 @@ static noinline int extent_back_merge(struct btree_trans *trans,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When deleting, check if we need to emit a whiteout (because we're overwriting
|
||||||
|
* something in an ancestor snapshot)
|
||||||
|
*/
|
||||||
|
static int need_whiteout_for_snapshot(struct btree_trans *trans,
|
||||||
|
enum btree_id btree_id, struct bpos pos)
|
||||||
|
{
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
u32 snapshot = pos.snapshot;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!bch2_snapshot_parent(trans->c, pos.snapshot))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
pos.snapshot++;
|
||||||
|
|
||||||
|
for_each_btree_key_norestart(trans, iter, btree_id, pos,
|
||||||
|
BTREE_ITER_ALL_SNAPSHOTS|
|
||||||
|
BTREE_ITER_NOPRESERVE, k, ret) {
|
||||||
|
if (!bkey_eq(k.k->p, pos))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (bch2_snapshot_is_ancestor(trans->c, snapshot,
|
||||||
|
k.k->p.snapshot)) {
|
||||||
|
ret = !bkey_whiteout(k.k);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
int bch2_trans_update_extent(struct btree_trans *trans,
|
int bch2_trans_update_extent(struct btree_trans *trans,
|
||||||
struct btree_iter *orig_iter,
|
struct btree_iter *orig_iter,
|
||||||
struct bkey_i *insert,
|
struct bkey_i *insert,
|
||||||
enum btree_update_flags flags)
|
enum btree_update_flags flags)
|
||||||
{
|
{
|
||||||
struct btree_iter iter, update_iter;
|
struct btree_iter iter;
|
||||||
struct bpos start = bkey_start_pos(&insert->k);
|
struct bpos start = bkey_start_pos(&insert->k);
|
||||||
struct bkey_i *update;
|
struct bkey_i *update;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
@ -1359,16 +1393,8 @@ int bch2_trans_update_extent(struct btree_trans *trans,
|
|||||||
|
|
||||||
bch2_cut_back(start, update);
|
bch2_cut_back(start, update);
|
||||||
|
|
||||||
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
|
ret = bch2_btree_insert_nonextent(trans, btree_id, update,
|
||||||
BTREE_ITER_NOT_EXTENTS|
|
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
|
||||||
BTREE_ITER_ALL_SNAPSHOTS|
|
|
||||||
BTREE_ITER_INTENT);
|
|
||||||
ret = bch2_btree_iter_traverse(&update_iter) ?:
|
|
||||||
bch2_trans_update(trans, &update_iter, update,
|
|
||||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
|
|
||||||
flags);
|
|
||||||
bch2_trans_iter_exit(trans, &update_iter);
|
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
@ -1382,15 +1408,8 @@ int bch2_trans_update_extent(struct btree_trans *trans,
|
|||||||
bch2_cut_front(start, update);
|
bch2_cut_front(start, update);
|
||||||
bch2_cut_back(insert->k.p, update);
|
bch2_cut_back(insert->k.p, update);
|
||||||
|
|
||||||
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
|
ret = bch2_btree_insert_nonextent(trans, btree_id, update,
|
||||||
BTREE_ITER_NOT_EXTENTS|
|
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
|
||||||
BTREE_ITER_ALL_SNAPSHOTS|
|
|
||||||
BTREE_ITER_INTENT);
|
|
||||||
ret = bch2_btree_iter_traverse(&update_iter) ?:
|
|
||||||
bch2_trans_update(trans, &update_iter, update,
|
|
||||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
|
|
||||||
flags);
|
|
||||||
bch2_trans_iter_exit(trans, &update_iter);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
@ -1402,21 +1421,15 @@ int bch2_trans_update_extent(struct btree_trans *trans,
|
|||||||
|
|
||||||
bkey_init(&update->k);
|
bkey_init(&update->k);
|
||||||
update->k.p = k.k->p;
|
update->k.p = k.k->p;
|
||||||
|
update->k.p.snapshot = insert->k.p.snapshot;
|
||||||
|
|
||||||
if (insert->k.p.snapshot != k.k->p.snapshot) {
|
if (insert->k.p.snapshot != k.k->p.snapshot ||
|
||||||
update->k.p.snapshot = insert->k.p.snapshot;
|
(btree_type_has_snapshots(btree_id) &&
|
||||||
|
need_whiteout_for_snapshot(trans, btree_id, update->k.p)))
|
||||||
update->k.type = KEY_TYPE_whiteout;
|
update->k.type = KEY_TYPE_whiteout;
|
||||||
}
|
|
||||||
|
|
||||||
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
|
|
||||||
BTREE_ITER_NOT_EXTENTS|
|
|
||||||
BTREE_ITER_INTENT);
|
|
||||||
ret = bch2_btree_iter_traverse(&update_iter) ?:
|
|
||||||
bch2_trans_update(trans, &update_iter, update,
|
|
||||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
|
|
||||||
flags);
|
|
||||||
bch2_trans_iter_exit(trans, &update_iter);
|
|
||||||
|
|
||||||
|
ret = bch2_btree_insert_nonextent(trans, btree_id, update,
|
||||||
|
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
@ -1468,40 +1481,6 @@ err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* When deleting, check if we need to emit a whiteout (because we're overwriting
|
|
||||||
* something in an ancestor snapshot)
|
|
||||||
*/
|
|
||||||
static int need_whiteout_for_snapshot(struct btree_trans *trans,
|
|
||||||
enum btree_id btree_id, struct bpos pos)
|
|
||||||
{
|
|
||||||
struct btree_iter iter;
|
|
||||||
struct bkey_s_c k;
|
|
||||||
u32 snapshot = pos.snapshot;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (!bch2_snapshot_parent(trans->c, pos.snapshot))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
pos.snapshot++;
|
|
||||||
|
|
||||||
for_each_btree_key_norestart(trans, iter, btree_id, pos,
|
|
||||||
BTREE_ITER_ALL_SNAPSHOTS|
|
|
||||||
BTREE_ITER_NOPRESERVE, k, ret) {
|
|
||||||
if (!bkey_eq(k.k->p, pos))
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (bch2_snapshot_is_ancestor(trans->c, snapshot,
|
|
||||||
k.k->p.snapshot)) {
|
|
||||||
ret = !bkey_whiteout(k.k);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __must_check
|
static int __must_check
|
||||||
bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path,
|
bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path,
|
||||||
struct bkey_i *k, enum btree_update_flags flags,
|
struct bkey_i *k, enum btree_update_flags flags,
|
||||||
@ -1747,8 +1726,23 @@ void bch2_trans_commit_hook(struct btree_trans *trans,
|
|||||||
trans->hooks = h;
|
trans->hooks = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
int __bch2_btree_insert(struct btree_trans *trans,
|
int bch2_btree_insert_nonextent(struct btree_trans *trans,
|
||||||
enum btree_id id,
|
enum btree_id btree, struct bkey_i *k,
|
||||||
|
enum btree_update_flags flags)
|
||||||
|
{
|
||||||
|
struct btree_iter iter;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
bch2_trans_iter_init(trans, &iter, btree, k->k.p,
|
||||||
|
BTREE_ITER_NOT_EXTENTS|
|
||||||
|
BTREE_ITER_INTENT);
|
||||||
|
ret = bch2_btree_iter_traverse(&iter) ?:
|
||||||
|
bch2_trans_update(trans, &iter, k, flags);
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int __bch2_btree_insert(struct btree_trans *trans, enum btree_id id,
|
||||||
struct bkey_i *k, enum btree_update_flags flags)
|
struct bkey_i *k, enum btree_update_flags flags)
|
||||||
{
|
{
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
|
@ -88,6 +88,8 @@ static union btree_write_buffer_state btree_write_buffer_switch(struct btree_wri
|
|||||||
while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1)
|
while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1)
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
|
|
||||||
|
smp_mb();
|
||||||
|
|
||||||
return old;
|
return old;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,15 +89,4 @@ struct disk_reservation {
|
|||||||
unsigned nr_replicas;
|
unsigned nr_replicas;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct copygc_heap_entry {
|
|
||||||
u8 dev;
|
|
||||||
u8 gen;
|
|
||||||
u8 replicas;
|
|
||||||
u32 fragmentation;
|
|
||||||
u32 sectors;
|
|
||||||
u64 bucket;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef HEAP(struct copygc_heap_entry) copygc_heap;
|
|
||||||
|
|
||||||
#endif /* _BUCKETS_TYPES_H */
|
#endif /* _BUCKETS_TYPES_H */
|
||||||
|
@ -22,9 +22,10 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans,
|
|||||||
struct bpos new_pos)
|
struct bpos new_pos)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct btree_iter iter, update_iter;
|
struct btree_iter iter, iter2;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k, k2;
|
||||||
snapshot_id_list s;
|
snapshot_id_list s;
|
||||||
|
struct bkey_i *update;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!btree_type_has_snapshots(id))
|
if (!btree_type_has_snapshots(id))
|
||||||
@ -32,10 +33,7 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans,
|
|||||||
|
|
||||||
darray_init(&s);
|
darray_init(&s);
|
||||||
|
|
||||||
if (bkey_eq(old_pos, new_pos))
|
if (!bch2_snapshot_has_children(c, old_pos.snapshot))
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (!snapshot_t(c, old_pos.snapshot)->children[0])
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
bch2_trans_iter_init(trans, &iter, id, old_pos,
|
bch2_trans_iter_init(trans, &iter, id, old_pos,
|
||||||
@ -47,33 +45,39 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans,
|
|||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
if (!k.k)
|
||||||
|
break;
|
||||||
|
|
||||||
if (!bkey_eq(old_pos, k.k->p))
|
if (!bkey_eq(old_pos, k.k->p))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) {
|
if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot) &&
|
||||||
struct bkey_i *update;
|
!snapshot_list_has_ancestor(c, &s, k.k->p.snapshot)) {
|
||||||
|
struct bpos whiteout_pos = new_pos;
|
||||||
|
|
||||||
if (snapshot_list_has_ancestor(c, &s, k.k->p.snapshot))
|
whiteout_pos.snapshot = k.k->p.snapshot;
|
||||||
continue;
|
|
||||||
|
|
||||||
update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
|
bch2_trans_iter_init(trans, &iter2, id, whiteout_pos,
|
||||||
|
|
||||||
ret = PTR_ERR_OR_ZERO(update);
|
|
||||||
if (ret)
|
|
||||||
break;
|
|
||||||
|
|
||||||
bkey_init(&update->k);
|
|
||||||
update->k.p = new_pos;
|
|
||||||
update->k.p.snapshot = k.k->p.snapshot;
|
|
||||||
|
|
||||||
bch2_trans_iter_init(trans, &update_iter, id, update->k.p,
|
|
||||||
BTREE_ITER_NOT_EXTENTS|
|
BTREE_ITER_NOT_EXTENTS|
|
||||||
BTREE_ITER_ALL_SNAPSHOTS|
|
|
||||||
BTREE_ITER_INTENT);
|
BTREE_ITER_INTENT);
|
||||||
ret = bch2_btree_iter_traverse(&update_iter) ?:
|
k2 = bch2_btree_iter_peek_slot(&iter2);
|
||||||
bch2_trans_update(trans, &update_iter, update,
|
ret = bkey_err(k2);
|
||||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
|
||||||
bch2_trans_iter_exit(trans, &update_iter);
|
if (!ret && k2.k->type == KEY_TYPE_deleted) {
|
||||||
|
update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
|
||||||
|
ret = PTR_ERR_OR_ZERO(update);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
|
||||||
|
bkey_init(&update->k);
|
||||||
|
update->k.p = whiteout_pos;
|
||||||
|
update->k.type = KEY_TYPE_whiteout;
|
||||||
|
|
||||||
|
ret = bch2_trans_update(trans, &iter2, update,
|
||||||
|
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||||
|
}
|
||||||
|
bch2_trans_iter_exit(trans, &iter2);
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -229,9 +233,21 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
|
|||||||
|
|
||||||
next_pos = insert->k.p;
|
next_pos = insert->k.p;
|
||||||
|
|
||||||
ret = insert_snapshot_whiteouts(trans, m->btree_id,
|
if (!bkey_eq(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
|
||||||
k.k->p, insert->k.p) ?:
|
ret = insert_snapshot_whiteouts(trans, m->btree_id, k.k->p,
|
||||||
bch2_trans_update(trans, &iter, insert,
|
bkey_start_pos(&insert->k));
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!bkey_eq(insert->k.p, k.k->p)) {
|
||||||
|
ret = insert_snapshot_whiteouts(trans, m->btree_id,
|
||||||
|
k.k->p, insert->k.p);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = bch2_trans_update(trans, &iter, insert,
|
||||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
|
||||||
bch2_trans_commit(trans, &op->res,
|
bch2_trans_commit(trans, &op->res,
|
||||||
NULL,
|
NULL,
|
||||||
|
@ -98,7 +98,6 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
|
|||||||
|
|
||||||
INIT_LIST_HEAD(&s->list);
|
INIT_LIST_HEAD(&s->list);
|
||||||
s->fmt = fmt;
|
s->fmt = fmt;
|
||||||
s->buf = PRINTBUF;
|
|
||||||
list_add(&s->list, &c->fsck_errors);
|
list_add(&s->list, &c->fsck_errors);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -111,9 +110,23 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
|
|||||||
struct printbuf buf = PRINTBUF, *out = &buf;
|
struct printbuf buf = PRINTBUF, *out = &buf;
|
||||||
int ret = -BCH_ERR_fsck_ignore;
|
int ret = -BCH_ERR_fsck_ignore;
|
||||||
|
|
||||||
|
va_start(args, fmt);
|
||||||
|
prt_vprintf(out, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
|
||||||
mutex_lock(&c->fsck_error_lock);
|
mutex_lock(&c->fsck_error_lock);
|
||||||
s = fsck_err_get(c, fmt);
|
s = fsck_err_get(c, fmt);
|
||||||
if (s) {
|
if (s) {
|
||||||
|
if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
|
||||||
|
ret = s->ret;
|
||||||
|
mutex_unlock(&c->fsck_error_lock);
|
||||||
|
printbuf_exit(&buf);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
kfree(s->last_msg);
|
||||||
|
s->last_msg = kstrdup(buf.buf, GFP_KERNEL);
|
||||||
|
|
||||||
if (c->opts.ratelimit_errors &&
|
if (c->opts.ratelimit_errors &&
|
||||||
!(flags & FSCK_NO_RATELIMIT) &&
|
!(flags & FSCK_NO_RATELIMIT) &&
|
||||||
s->nr >= FSCK_ERR_RATELIMIT_NR) {
|
s->nr >= FSCK_ERR_RATELIMIT_NR) {
|
||||||
@ -123,8 +136,6 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
|
|||||||
print = false;
|
print = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
printbuf_reset(&s->buf);
|
|
||||||
out = &s->buf;
|
|
||||||
s->nr++;
|
s->nr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,10 +144,6 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
|
|||||||
prt_printf(out, bch2_log_msg(c, ""));
|
prt_printf(out, bch2_log_msg(c, ""));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
va_start(args, fmt);
|
|
||||||
prt_vprintf(out, fmt, args);
|
|
||||||
va_end(args);
|
|
||||||
|
|
||||||
if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
|
if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
|
||||||
if (c->opts.errors != BCH_ON_ERROR_continue ||
|
if (c->opts.errors != BCH_ON_ERROR_continue ||
|
||||||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
|
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
|
||||||
@ -190,6 +197,9 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
|
|||||||
else if (suppressing)
|
else if (suppressing)
|
||||||
bch_err(c, "Ratelimiting new instances of previous error");
|
bch_err(c, "Ratelimiting new instances of previous error");
|
||||||
|
|
||||||
|
if (s)
|
||||||
|
s->ret = ret;
|
||||||
|
|
||||||
mutex_unlock(&c->fsck_error_lock);
|
mutex_unlock(&c->fsck_error_lock);
|
||||||
|
|
||||||
printbuf_exit(&buf);
|
printbuf_exit(&buf);
|
||||||
@ -214,11 +224,11 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
|
|||||||
mutex_lock(&c->fsck_error_lock);
|
mutex_lock(&c->fsck_error_lock);
|
||||||
|
|
||||||
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
|
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
|
||||||
if (s->ratelimited)
|
if (s->ratelimited && s->last_msg)
|
||||||
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf.buf);
|
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
|
||||||
|
|
||||||
list_del(&s->list);
|
list_del(&s->list);
|
||||||
printbuf_exit(&s->buf);
|
kfree(s->last_msg);
|
||||||
kfree(s);
|
kfree(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,7 +103,8 @@ struct fsck_err_state {
|
|||||||
const char *fmt;
|
const char *fmt;
|
||||||
u64 nr;
|
u64 nr;
|
||||||
bool ratelimited;
|
bool ratelimited;
|
||||||
struct printbuf buf;
|
int ret;
|
||||||
|
char *last_msg;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define FSCK_CAN_FIX (1 << 0)
|
#define FSCK_CAN_FIX (1 << 0)
|
||||||
|
@ -605,6 +605,17 @@ static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
|
|||||||
: bch2_snapshot_is_ancestor(c, src, dst);
|
: bch2_snapshot_is_ancestor(c, src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ref_visible2(struct bch_fs *c,
|
||||||
|
u32 src, struct snapshots_seen *src_seen,
|
||||||
|
u32 dst, struct snapshots_seen *dst_seen)
|
||||||
|
{
|
||||||
|
if (dst > src) {
|
||||||
|
swap(dst, src);
|
||||||
|
swap(dst_seen, src_seen);
|
||||||
|
}
|
||||||
|
return key_visible_in_snapshot(c, src_seen, dst, src);
|
||||||
|
}
|
||||||
|
|
||||||
#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
|
#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
|
||||||
for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
|
for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
|
||||||
(_i)->snapshot <= (_snapshot); _i++) \
|
(_i)->snapshot <= (_snapshot); _i++) \
|
||||||
@ -1158,10 +1169,102 @@ fsck_err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct extent_end {
|
||||||
|
u32 snapshot;
|
||||||
|
u64 offset;
|
||||||
|
struct snapshots_seen seen;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef DARRAY(struct extent_end) extent_ends;
|
||||||
|
|
||||||
|
static int check_overlapping_extents(struct btree_trans *trans,
|
||||||
|
struct snapshots_seen *seen,
|
||||||
|
extent_ends *extent_ends,
|
||||||
|
struct bkey_s_c k,
|
||||||
|
struct btree_iter *iter)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
struct extent_end *i;
|
||||||
|
struct printbuf buf = PRINTBUF;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
darray_for_each(*extent_ends, i) {
|
||||||
|
/* duplicate, due to transaction restart: */
|
||||||
|
if (i->offset == k.k->p.offset &&
|
||||||
|
i->snapshot == k.k->p.snapshot)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!ref_visible2(c,
|
||||||
|
k.k->p.snapshot, seen,
|
||||||
|
i->snapshot, &i->seen))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (fsck_err_on(i->offset > bkey_start_offset(k.k), c,
|
||||||
|
"overlapping extents: extent in snapshot %u ends at %llu overlaps with\n%s",
|
||||||
|
i->snapshot,
|
||||||
|
i->offset,
|
||||||
|
(printbuf_reset(&buf),
|
||||||
|
bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
|
||||||
|
struct bkey_i *update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
|
||||||
|
if ((ret = PTR_ERR_OR_ZERO(update)))
|
||||||
|
goto err;
|
||||||
|
bkey_reassemble(update, k);
|
||||||
|
ret = bch2_trans_update_extent(trans, iter, update, 0);
|
||||||
|
if (!ret)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err:
|
||||||
|
fsck_err:
|
||||||
|
printbuf_exit(&buf);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int extent_ends_at(extent_ends *extent_ends,
|
||||||
|
struct snapshots_seen *seen,
|
||||||
|
struct bkey_s_c k)
|
||||||
|
{
|
||||||
|
struct extent_end *i, n = (struct extent_end) {
|
||||||
|
.snapshot = k.k->p.snapshot,
|
||||||
|
.offset = k.k->p.offset,
|
||||||
|
.seen = *seen,
|
||||||
|
};
|
||||||
|
|
||||||
|
n.seen.ids.data = kmemdup(seen->ids.data,
|
||||||
|
sizeof(seen->ids.data[0]) * seen->ids.size,
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!n.seen.ids.data)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
darray_for_each(*extent_ends, i) {
|
||||||
|
if (i->snapshot == k.k->p.snapshot) {
|
||||||
|
snapshots_seen_exit(&i->seen);
|
||||||
|
*i = n;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i->snapshot >= k.k->p.snapshot)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return darray_insert_item(extent_ends, i - extent_ends->data, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void extent_ends_reset(extent_ends *extent_ends)
|
||||||
|
{
|
||||||
|
struct extent_end *i;
|
||||||
|
|
||||||
|
darray_for_each(*extent_ends, i)
|
||||||
|
snapshots_seen_exit(&i->seen);
|
||||||
|
|
||||||
|
extent_ends->nr = 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
||||||
struct bkey_s_c k,
|
struct bkey_s_c k,
|
||||||
struct inode_walker *inode,
|
struct inode_walker *inode,
|
||||||
struct snapshots_seen *s)
|
struct snapshots_seen *s,
|
||||||
|
extent_ends *extent_ends)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct inode_walker_entry *i;
|
struct inode_walker_entry *i;
|
||||||
@ -1189,24 +1292,20 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
|||||||
ret = check_i_sectors(trans, inode);
|
ret = check_i_sectors(trans, inode);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
extent_ends_reset(extent_ends);
|
||||||
}
|
}
|
||||||
|
|
||||||
BUG_ON(!iter->path->should_be_locked);
|
BUG_ON(!iter->path->should_be_locked);
|
||||||
#if 0
|
|
||||||
if (bkey_gt(prev.k->k.p, bkey_start_pos(k.k))) {
|
|
||||||
char buf1[200];
|
|
||||||
char buf2[200];
|
|
||||||
|
|
||||||
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
|
ret = check_overlapping_extents(trans, s, extent_ends, k, iter);
|
||||||
bch2_bkey_val_to_text(&PBUF(buf2), c, k);
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
ret = extent_ends_at(extent_ends, s, k);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
|
|
||||||
ret = fix_overlapping_extent(trans, k, prev.k->k.p)
|
|
||||||
?: -BCH_ERR_transaction_restart_nested;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
ret = __walk_inode(trans, inode, equiv);
|
ret = __walk_inode(trans, inode, equiv);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto err;
|
goto err;
|
||||||
@ -1304,13 +1403,9 @@ static int check_extents(struct bch_fs *c)
|
|||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
|
extent_ends extent_ends = { 0 };
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
#if 0
|
|
||||||
struct bkey_buf prev;
|
|
||||||
bch2_bkey_buf_init(&prev);
|
|
||||||
prev.k->k = KEY(0, 0, 0);
|
|
||||||
#endif
|
|
||||||
snapshots_seen_init(&s);
|
snapshots_seen_init(&s);
|
||||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||||
|
|
||||||
@ -1321,10 +1416,10 @@ static int check_extents(struct bch_fs *c)
|
|||||||
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
|
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
|
||||||
check_extent(&trans, &iter, k, &w, &s));
|
check_extent(&trans, &iter, k, &w, &s, &extent_ends));
|
||||||
#if 0
|
|
||||||
bch2_bkey_buf_exit(&prev, c);
|
extent_ends_reset(&extent_ends);
|
||||||
#endif
|
darray_exit(&extent_ends);
|
||||||
inode_walker_exit(&w);
|
inode_walker_exit(&w);
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
snapshots_seen_exit(&s);
|
snapshots_seen_exit(&s);
|
||||||
|
@ -49,7 +49,6 @@ void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru)
|
|||||||
static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
|
static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
|
||||||
u64 dev_bucket, u64 time, unsigned key_type)
|
u64 dev_bucket, u64 time, unsigned key_type)
|
||||||
{
|
{
|
||||||
struct btree_iter iter;
|
|
||||||
struct bkey_i *k;
|
struct bkey_i *k;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
@ -69,13 +68,7 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
|
|||||||
EBUG_ON(lru_pos_time(k->k.p) != time);
|
EBUG_ON(lru_pos_time(k->k.p) != time);
|
||||||
EBUG_ON(k->k.p.offset != dev_bucket);
|
EBUG_ON(k->k.p.offset != dev_bucket);
|
||||||
|
|
||||||
bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
|
return bch2_trans_update_buffered(trans, BTREE_ID_lru, k);
|
||||||
k->k.p, BTREE_ITER_INTENT);
|
|
||||||
|
|
||||||
ret = bch2_btree_iter_traverse(&iter) ?:
|
|
||||||
bch2_trans_update(trans, &iter, k, 0);
|
|
||||||
bch2_trans_iter_exit(trans, &iter);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
|
int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
|
||||||
@ -99,6 +92,13 @@ int bch2_lru_change(struct btree_trans *trans,
|
|||||||
bch2_lru_set(trans, lru_id, dev_bucket, new_time);
|
bch2_lru_set(trans, lru_id, dev_bucket, new_time);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char * const bch2_lru_types[] = {
|
||||||
|
#define x(n) #n,
|
||||||
|
BCH_LRU_TYPES()
|
||||||
|
#undef x
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
static int bch2_check_lru_key(struct btree_trans *trans,
|
static int bch2_check_lru_key(struct btree_trans *trans,
|
||||||
struct btree_iter *lru_iter,
|
struct btree_iter *lru_iter,
|
||||||
struct bkey_s_c lru_k)
|
struct bkey_s_c lru_k)
|
||||||
@ -110,7 +110,9 @@ static int bch2_check_lru_key(struct btree_trans *trans,
|
|||||||
const struct bch_alloc_v4 *a;
|
const struct bch_alloc_v4 *a;
|
||||||
struct printbuf buf1 = PRINTBUF;
|
struct printbuf buf1 = PRINTBUF;
|
||||||
struct printbuf buf2 = PRINTBUF;
|
struct printbuf buf2 = PRINTBUF;
|
||||||
|
enum bch_lru_type type = lru_type(lru_k);
|
||||||
struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset);
|
struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset);
|
||||||
|
u64 idx;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c,
|
if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c,
|
||||||
@ -126,11 +128,21 @@ static int bch2_check_lru_key(struct btree_trans *trans,
|
|||||||
|
|
||||||
a = bch2_alloc_to_v4(k, &a_convert);
|
a = bch2_alloc_to_v4(k, &a_convert);
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case BCH_LRU_read:
|
||||||
|
idx = alloc_lru_idx_read(*a);
|
||||||
|
break;
|
||||||
|
case BCH_LRU_fragmentation:
|
||||||
|
idx = a->fragmentation_lru;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (fsck_err_on(lru_k.k->type != KEY_TYPE_set ||
|
if (fsck_err_on(lru_k.k->type != KEY_TYPE_set ||
|
||||||
a->data_type != BCH_DATA_cached ||
|
lru_pos_time(lru_k.k->p) != idx, c,
|
||||||
a->io_time[READ] != lru_pos_time(lru_k.k->p), c,
|
"incorrect lru entry: lru %s time %llu\n"
|
||||||
"incorrect lru entry (time %llu) %s\n"
|
" %s\n"
|
||||||
" for %s",
|
" for %s",
|
||||||
|
bch2_lru_types[type],
|
||||||
lru_pos_time(lru_k.k->p),
|
lru_pos_time(lru_k.k->p),
|
||||||
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
|
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
|
||||||
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
|
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
|
||||||
|
@ -22,6 +22,27 @@ static inline u64 lru_pos_time(struct bpos pos)
|
|||||||
return pos.inode & ~(~0ULL << LRU_TIME_BITS);
|
return pos.inode & ~(~0ULL << LRU_TIME_BITS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define BCH_LRU_TYPES() \
|
||||||
|
x(read) \
|
||||||
|
x(fragmentation)
|
||||||
|
|
||||||
|
enum bch_lru_type {
|
||||||
|
#define x(n) BCH_LRU_##n,
|
||||||
|
BCH_LRU_TYPES()
|
||||||
|
#undef x
|
||||||
|
};
|
||||||
|
|
||||||
|
#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1)
|
||||||
|
|
||||||
|
static inline enum bch_lru_type lru_type(struct bkey_s_c l)
|
||||||
|
{
|
||||||
|
u16 lru_id = l.k->p.inode >> 48;
|
||||||
|
|
||||||
|
if (lru_id == BCH_LRU_FRAGMENTATION_START)
|
||||||
|
return BCH_LRU_fragmentation;
|
||||||
|
return BCH_LRU_read;
|
||||||
|
}
|
||||||
|
|
||||||
int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *);
|
||||||
void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||||
|
|
||||||
|
@ -653,13 +653,13 @@ failed_to_evacuate:
|
|||||||
printbuf_exit(&buf);
|
printbuf_exit(&buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
int __bch2_evacuate_bucket(struct btree_trans *trans,
|
||||||
|
struct moving_context *ctxt,
|
||||||
struct bpos bucket, int gen,
|
struct bpos bucket, int gen,
|
||||||
struct data_update_opts _data_opts)
|
struct data_update_opts _data_opts)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = ctxt->c;
|
struct bch_fs *c = ctxt->c;
|
||||||
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
|
||||||
struct btree_trans trans;
|
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
struct bkey_buf sk;
|
struct bkey_buf sk;
|
||||||
struct bch_backpointer bp;
|
struct bch_backpointer bp;
|
||||||
@ -668,17 +668,17 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
|||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
struct data_update_opts data_opts;
|
struct data_update_opts data_opts;
|
||||||
unsigned dirty_sectors, bucket_size;
|
unsigned dirty_sectors, bucket_size;
|
||||||
|
u64 fragmentation;
|
||||||
u64 bp_offset = 0, cur_inum = U64_MAX;
|
u64 bp_offset = 0, cur_inum = U64_MAX;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
bch2_bkey_buf_init(&sk);
|
bch2_bkey_buf_init(&sk);
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
|
||||||
|
|
||||||
bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc,
|
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
|
||||||
bucket, BTREE_ITER_CACHED);
|
bucket, BTREE_ITER_CACHED);
|
||||||
ret = lockrestart_do(&trans,
|
ret = lockrestart_do(trans,
|
||||||
bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
|
bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
|
||||||
bch2_trans_iter_exit(&trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
bch_err(c, "%s: error looking up alloc key: %s", __func__, bch2_err_str(ret));
|
bch_err(c, "%s: error looking up alloc key: %s", __func__, bch2_err_str(ret));
|
||||||
@ -688,17 +688,18 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
|||||||
a = bch2_alloc_to_v4(k, &a_convert);
|
a = bch2_alloc_to_v4(k, &a_convert);
|
||||||
dirty_sectors = a->dirty_sectors;
|
dirty_sectors = a->dirty_sectors;
|
||||||
bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
|
bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
|
||||||
|
fragmentation = a->fragmentation_lru;
|
||||||
|
|
||||||
ret = bch2_btree_write_buffer_flush(&trans);
|
ret = bch2_btree_write_buffer_flush(trans);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
bch_err(c, "%s: error flushing btree write buffer: %s", __func__, bch2_err_str(ret));
|
bch_err(c, "%s: error flushing btree write buffer: %s", __func__, bch2_err_str(ret));
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!(ret = move_ratelimit(&trans, ctxt))) {
|
while (!(ret = move_ratelimit(trans, ctxt))) {
|
||||||
bch2_trans_begin(&trans);
|
bch2_trans_begin(trans);
|
||||||
|
|
||||||
ret = bch2_get_next_backpointer(&trans, bucket, gen,
|
ret = bch2_get_next_backpointer(trans, bucket, gen,
|
||||||
&bp_offset, &bp,
|
&bp_offset, &bp,
|
||||||
BTREE_ITER_CACHED);
|
BTREE_ITER_CACHED);
|
||||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||||
@ -713,7 +714,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
|||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
unsigned i = 0;
|
unsigned i = 0;
|
||||||
|
|
||||||
k = bch2_backpointer_get_key(&trans, &iter,
|
k = bch2_backpointer_get_key(trans, &iter,
|
||||||
bucket, bp_offset, bp);
|
bucket, bp_offset, bp);
|
||||||
ret = bkey_err(k);
|
ret = bkey_err(k);
|
||||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||||
@ -726,9 +727,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
|||||||
bch2_bkey_buf_reassemble(&sk, c, k);
|
bch2_bkey_buf_reassemble(&sk, c, k);
|
||||||
k = bkey_i_to_s_c(sk.k);
|
k = bkey_i_to_s_c(sk.k);
|
||||||
|
|
||||||
ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
|
ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
bch2_trans_iter_exit(&trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -742,15 +743,15 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
|||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = bch2_move_extent(&trans, &iter, ctxt, io_opts,
|
ret = bch2_move_extent(trans, &iter, ctxt, io_opts,
|
||||||
bp.btree_id, k, data_opts);
|
bp.btree_id, k, data_opts);
|
||||||
bch2_trans_iter_exit(&trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||||
continue;
|
continue;
|
||||||
if (ret == -ENOMEM) {
|
if (ret == -ENOMEM) {
|
||||||
/* memory allocation failure, wait for some IO to finish */
|
/* memory allocation failure, wait for some IO to finish */
|
||||||
bch2_move_ctxt_wait_for_io(ctxt, &trans);
|
bch2_move_ctxt_wait_for_io(ctxt, trans);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -762,7 +763,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
|||||||
} else {
|
} else {
|
||||||
struct btree *b;
|
struct btree *b;
|
||||||
|
|
||||||
b = bch2_backpointer_get_node(&trans, &iter,
|
b = bch2_backpointer_get_node(trans, &iter,
|
||||||
bucket, bp_offset, bp);
|
bucket, bp_offset, bp);
|
||||||
ret = PTR_ERR_OR_ZERO(b);
|
ret = PTR_ERR_OR_ZERO(b);
|
||||||
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
|
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
|
||||||
@ -774,8 +775,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
|
|||||||
if (!b)
|
if (!b)
|
||||||
goto next;
|
goto next;
|
||||||
|
|
||||||
ret = bch2_btree_node_rewrite(&trans, &iter, b, 0);
|
ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
|
||||||
bch2_trans_iter_exit(&trans, &iter);
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
||||||
continue;
|
continue;
|
||||||
@ -792,17 +793,16 @@ next:
|
|||||||
bp_offset++;
|
bp_offset++;
|
||||||
}
|
}
|
||||||
|
|
||||||
trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, ret);
|
trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret);
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) {
|
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) {
|
||||||
bch2_trans_unlock(&trans);
|
bch2_trans_unlock(trans);
|
||||||
move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
|
move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
|
||||||
closure_sync(&ctxt->cl);
|
closure_sync(&ctxt->cl);
|
||||||
if (!ctxt->write_error)
|
if (!ctxt->write_error)
|
||||||
verify_bucket_evacuated(&trans, bucket, gen);
|
verify_bucket_evacuated(trans, bucket, gen);
|
||||||
}
|
}
|
||||||
err:
|
err:
|
||||||
bch2_trans_exit(&trans);
|
|
||||||
bch2_bkey_buf_exit(&sk, c);
|
bch2_bkey_buf_exit(&sk, c);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -815,12 +815,15 @@ int bch2_evacuate_bucket(struct bch_fs *c,
|
|||||||
struct write_point_specifier wp,
|
struct write_point_specifier wp,
|
||||||
bool wait_on_copygc)
|
bool wait_on_copygc)
|
||||||
{
|
{
|
||||||
|
struct btree_trans trans;
|
||||||
struct moving_context ctxt;
|
struct moving_context ctxt;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
|
bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
|
||||||
ret = __bch2_evacuate_bucket(&ctxt, bucket, gen, data_opts);
|
ret = __bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts);
|
||||||
bch2_moving_ctxt_exit(&ctxt);
|
bch2_moving_ctxt_exit(&ctxt);
|
||||||
|
bch2_trans_exit(&trans);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -66,7 +66,8 @@ int bch2_move_data(struct bch_fs *,
|
|||||||
bool,
|
bool,
|
||||||
move_pred_fn, void *);
|
move_pred_fn, void *);
|
||||||
|
|
||||||
int __bch2_evacuate_bucket(struct moving_context *,
|
int __bch2_evacuate_bucket(struct btree_trans *,
|
||||||
|
struct moving_context *,
|
||||||
struct bpos, int,
|
struct bpos, int,
|
||||||
struct data_update_opts);
|
struct data_update_opts);
|
||||||
int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int,
|
int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int,
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "alloc_foreground.h"
|
#include "alloc_foreground.h"
|
||||||
#include "btree_iter.h"
|
#include "btree_iter.h"
|
||||||
#include "btree_update.h"
|
#include "btree_update.h"
|
||||||
|
#include "btree_write_buffer.h"
|
||||||
#include "buckets.h"
|
#include "buckets.h"
|
||||||
#include "clock.h"
|
#include "clock.h"
|
||||||
#include "disk_groups.h"
|
#include "disk_groups.h"
|
||||||
@ -19,6 +20,7 @@
|
|||||||
#include "eytzinger.h"
|
#include "eytzinger.h"
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "keylist.h"
|
#include "keylist.h"
|
||||||
|
#include "lru.h"
|
||||||
#include "move.h"
|
#include "move.h"
|
||||||
#include "movinggc.h"
|
#include "movinggc.h"
|
||||||
#include "super-io.h"
|
#include "super-io.h"
|
||||||
@ -31,138 +33,105 @@
|
|||||||
#include <linux/sort.h>
|
#include <linux/sort.h>
|
||||||
#include <linux/wait.h>
|
#include <linux/wait.h>
|
||||||
|
|
||||||
static inline int fragmentation_cmp(copygc_heap *heap,
|
static int bch2_bucket_is_movable(struct btree_trans *trans,
|
||||||
struct copygc_heap_entry l,
|
struct bpos bucket, u64 time, u8 *gen)
|
||||||
struct copygc_heap_entry r)
|
|
||||||
{
|
{
|
||||||
return cmp_int(l.fragmentation, r.fragmentation);
|
struct btree_iter iter;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
struct bch_alloc_v4 _a;
|
||||||
|
const struct bch_alloc_v4 *a;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (bch2_bucket_is_open(trans->c, bucket.inode, bucket.offset))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, 0);
|
||||||
|
k = bch2_btree_iter_peek_slot(&iter);
|
||||||
|
ret = bkey_err(k);
|
||||||
|
bch2_trans_iter_exit(trans, &iter);
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
a = bch2_alloc_to_v4(k, &_a);
|
||||||
|
*gen = a->gen;
|
||||||
|
ret = (a->data_type == BCH_DATA_btree ||
|
||||||
|
a->data_type == BCH_DATA_user) &&
|
||||||
|
a->fragmentation_lru &&
|
||||||
|
a->fragmentation_lru <= time;
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
struct printbuf buf = PRINTBUF;
|
||||||
|
|
||||||
|
bch2_bkey_val_to_text(&buf, trans->c, k);
|
||||||
|
pr_debug("%s", buf.buf);
|
||||||
|
printbuf_exit(&buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int find_buckets_to_copygc(struct bch_fs *c)
|
static int bch2_copygc_next_bucket(struct btree_trans *trans,
|
||||||
|
struct bpos *bucket, u8 *gen, struct bpos *pos)
|
||||||
{
|
{
|
||||||
copygc_heap *h = &c->copygc_heap;
|
|
||||||
struct btree_trans trans;
|
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru,
|
||||||
|
bpos_max(*pos, lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0)),
|
||||||
|
lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
|
||||||
|
0, k, ({
|
||||||
|
*bucket = u64_to_bucket(k.k->p.offset);
|
||||||
|
|
||||||
/*
|
bch2_bucket_is_movable(trans, *bucket, lru_pos_time(k.k->p), gen);
|
||||||
* Find buckets with lowest sector counts, skipping completely
|
}));
|
||||||
* empty buckets, by building a maxheap sorted by sector count,
|
|
||||||
* and repeatedly replacing the maximum element until all
|
|
||||||
* buckets have been visited.
|
|
||||||
*/
|
|
||||||
h->used = 0;
|
|
||||||
|
|
||||||
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
|
*pos = iter.pos;
|
||||||
BTREE_ITER_PREFETCH, k, ret) {
|
if (ret < 0)
|
||||||
struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode);
|
return ret;
|
||||||
struct copygc_heap_entry e;
|
return ret ? 0 : -ENOENT;
|
||||||
struct bch_alloc_v4 a_convert;
|
|
||||||
const struct bch_alloc_v4 *a;
|
|
||||||
|
|
||||||
a = bch2_alloc_to_v4(k, &a_convert);
|
|
||||||
|
|
||||||
if ((a->data_type != BCH_DATA_btree &&
|
|
||||||
a->data_type != BCH_DATA_user) ||
|
|
||||||
a->dirty_sectors >= ca->mi.bucket_size ||
|
|
||||||
bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
e = (struct copygc_heap_entry) {
|
|
||||||
.dev = iter.pos.inode,
|
|
||||||
.gen = a->gen,
|
|
||||||
.replicas = 1 + a->stripe_redundancy,
|
|
||||||
.fragmentation = div_u64((u64) a->dirty_sectors * (1ULL << 31),
|
|
||||||
ca->mi.bucket_size),
|
|
||||||
.sectors = a->dirty_sectors,
|
|
||||||
.bucket = iter.pos.offset,
|
|
||||||
};
|
|
||||||
heap_add_or_replace(h, e, -fragmentation_cmp, NULL);
|
|
||||||
|
|
||||||
}
|
|
||||||
bch2_trans_iter_exit(&trans, &iter);
|
|
||||||
|
|
||||||
bch2_trans_exit(&trans);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_copygc(struct bch_fs *c)
|
static int bch2_copygc(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
copygc_heap *h = &c->copygc_heap;
|
|
||||||
struct copygc_heap_entry e;
|
|
||||||
struct bch_move_stats move_stats;
|
struct bch_move_stats move_stats;
|
||||||
struct bch_dev *ca;
|
struct btree_trans trans;
|
||||||
unsigned dev_idx;
|
|
||||||
size_t heap_size = 0;
|
|
||||||
struct moving_context ctxt;
|
struct moving_context ctxt;
|
||||||
struct data_update_opts data_opts = {
|
struct data_update_opts data_opts = {
|
||||||
.btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
|
.btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc,
|
||||||
};
|
};
|
||||||
|
struct bpos bucket;
|
||||||
|
struct bpos pos;
|
||||||
|
u8 gen = 0;
|
||||||
|
unsigned nr_evacuated;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
bch2_move_stats_init(&move_stats, "copygc");
|
bch2_move_stats_init(&move_stats, "copygc");
|
||||||
|
|
||||||
for_each_rw_member(ca, c, dev_idx)
|
|
||||||
heap_size += ca->mi.nbuckets >> 7;
|
|
||||||
|
|
||||||
if (h->size < heap_size) {
|
|
||||||
free_heap(&c->copygc_heap);
|
|
||||||
if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) {
|
|
||||||
bch_err(c, "error allocating copygc heap");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = find_buckets_to_copygc(c);
|
|
||||||
if (ret) {
|
|
||||||
bch2_fs_fatal_error(c, "error walking buckets to copygc!");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!h->used) {
|
|
||||||
s64 wait = S64_MAX, dev_wait;
|
|
||||||
u64 dev_min_wait_fragmented = 0;
|
|
||||||
u64 dev_min_wait_allowed = 0;
|
|
||||||
int dev_min_wait = -1;
|
|
||||||
|
|
||||||
for_each_rw_member(ca, c, dev_idx) {
|
|
||||||
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
|
|
||||||
s64 allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) *
|
|
||||||
ca->mi.bucket_size) >> 1);
|
|
||||||
s64 fragmented = usage.d[BCH_DATA_user].fragmented;
|
|
||||||
|
|
||||||
dev_wait = max(0LL, allowed - fragmented);
|
|
||||||
|
|
||||||
if (dev_min_wait < 0 || dev_wait < wait) {
|
|
||||||
dev_min_wait = dev_idx;
|
|
||||||
dev_min_wait_fragmented = fragmented;
|
|
||||||
dev_min_wait_allowed = allowed;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bch_err_ratelimited(c, "copygc requested to run but found no buckets to move! dev %u fragmented %llu allowed %llu",
|
|
||||||
dev_min_wait, dev_min_wait_fragmented, dev_min_wait_allowed);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
heap_resort(h, fragmentation_cmp, NULL);
|
|
||||||
|
|
||||||
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
|
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
|
||||||
writepoint_ptr(&c->copygc_write_point),
|
writepoint_ptr(&c->copygc_write_point),
|
||||||
false);
|
false);
|
||||||
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
|
|
||||||
/* not correct w.r.t. device removal */
|
ret = bch2_btree_write_buffer_flush(&trans);
|
||||||
while (h->used && !ret) {
|
BUG_ON(ret);
|
||||||
BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL));
|
|
||||||
ret = __bch2_evacuate_bucket(&ctxt, POS(e.dev, e.bucket), e.gen,
|
for (nr_evacuated = 0, pos = POS_MIN;
|
||||||
data_opts);
|
nr_evacuated < 32 && !ret;
|
||||||
|
nr_evacuated++, pos = bpos_nosnap_successor(pos)) {
|
||||||
|
ret = bch2_copygc_next_bucket(&trans, &bucket, &gen, &pos) ?:
|
||||||
|
__bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts);
|
||||||
|
if (bkey_eq(pos, POS_MAX))
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bch2_trans_exit(&trans);
|
||||||
bch2_moving_ctxt_exit(&ctxt);
|
bch2_moving_ctxt_exit(&ctxt);
|
||||||
|
|
||||||
|
/* no entries in LRU btree found, or got to end: */
|
||||||
|
if (ret == -ENOENT)
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
if (ret < 0 && !bch2_err_matches(ret, EROFS))
|
if (ret < 0 && !bch2_err_matches(ret, EROFS))
|
||||||
bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
|
bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
|
||||||
|
|
||||||
|
@ -1105,6 +1105,9 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||||||
c->opts.version_upgrade = true;
|
c->opts.version_upgrade = true;
|
||||||
c->opts.fsck = true;
|
c->opts.fsck = true;
|
||||||
c->opts.fix_errors = FSCK_OPT_YES;
|
c->opts.fix_errors = FSCK_OPT_YES;
|
||||||
|
} else if (c->sb.version < bcachefs_metadata_version_fragmentation_lru) {
|
||||||
|
bch_info(c, "version prior to backpointers, upgrade required");
|
||||||
|
c->opts.version_upgrade = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -512,8 +512,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
|
|||||||
n->v.pad = 0;
|
n->v.pad = 0;
|
||||||
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
|
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
|
||||||
|
|
||||||
ret = bch2_trans_update(trans, &iter, &n->k_i, 0) ?:
|
ret = bch2_trans_update(trans, &iter, &n->k_i, 0);
|
||||||
bch2_mark_snapshot(trans, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
@ -8,15 +8,15 @@
|
|||||||
void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||||
int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c,
|
int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c,
|
||||||
unsigned, struct printbuf *);
|
unsigned, struct printbuf *);
|
||||||
|
int bch2_mark_snapshot(struct btree_trans *, struct bkey_s_c,
|
||||||
|
struct bkey_s_c, unsigned);
|
||||||
|
|
||||||
#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \
|
#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \
|
||||||
.key_invalid = bch2_snapshot_invalid, \
|
.key_invalid = bch2_snapshot_invalid, \
|
||||||
.val_to_text = bch2_snapshot_to_text, \
|
.val_to_text = bch2_snapshot_to_text, \
|
||||||
|
.atomic_trigger = bch2_mark_snapshot, \
|
||||||
})
|
})
|
||||||
|
|
||||||
int bch2_mark_snapshot(struct btree_trans *, struct bkey_s_c,
|
|
||||||
struct bkey_s_c, unsigned);
|
|
||||||
|
|
||||||
static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
|
static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
|
||||||
{
|
{
|
||||||
return genradix_ptr(&c->snapshots, U32_MAX - id);
|
return genradix_ptr(&c->snapshots, U32_MAX - id);
|
||||||
@ -68,6 +68,13 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances
|
|||||||
return id == ancestor;
|
return id == ancestor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id)
|
||||||
|
{
|
||||||
|
struct snapshot_t *t = snapshot_t(c, id);
|
||||||
|
|
||||||
|
return (t->children[0]|t->children[1]) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)
|
static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)
|
||||||
{
|
{
|
||||||
u32 *i;
|
u32 *i;
|
||||||
|
@ -488,7 +488,6 @@ static void __bch2_fs_free(struct bch_fs *c)
|
|||||||
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
||||||
kfree(c->journal_seq_blacklist_table);
|
kfree(c->journal_seq_blacklist_table);
|
||||||
kfree(c->unused_inode_hints);
|
kfree(c->unused_inode_hints);
|
||||||
free_heap(&c->copygc_heap);
|
|
||||||
|
|
||||||
if (c->io_complete_wq)
|
if (c->io_complete_wq)
|
||||||
destroy_workqueue(c->io_complete_wq);
|
destroy_workqueue(c->io_complete_wq);
|
||||||
|
@ -433,8 +433,8 @@ static const struct time_unit {
|
|||||||
{ "us", NSEC_PER_USEC },
|
{ "us", NSEC_PER_USEC },
|
||||||
{ "ms", NSEC_PER_MSEC },
|
{ "ms", NSEC_PER_MSEC },
|
||||||
{ "s", NSEC_PER_SEC },
|
{ "s", NSEC_PER_SEC },
|
||||||
{ "m", NSEC_PER_SEC * 60},
|
{ "m", (u64) NSEC_PER_SEC * 60},
|
||||||
{ "h", NSEC_PER_SEC * 3600},
|
{ "h", (u64) NSEC_PER_SEC * 3600},
|
||||||
{ "eon", U64_MAX },
|
{ "eon", U64_MAX },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
13
linux/six.c
13
linux/six.c
@ -833,19 +833,12 @@ struct six_lock_count six_lock_counts(struct six_lock *lock)
|
|||||||
{
|
{
|
||||||
struct six_lock_count ret;
|
struct six_lock_count ret;
|
||||||
|
|
||||||
ret.n[SIX_LOCK_read] = 0;
|
ret.n[SIX_LOCK_read] = !lock->readers
|
||||||
|
? lock->state.read_lock
|
||||||
|
: pcpu_read_count(lock);
|
||||||
ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse;
|
ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse;
|
||||||
ret.n[SIX_LOCK_write] = lock->state.seq & 1;
|
ret.n[SIX_LOCK_write] = lock->state.seq & 1;
|
||||||
|
|
||||||
if (!lock->readers)
|
|
||||||
ret.n[SIX_LOCK_read] += lock->state.read_lock;
|
|
||||||
else {
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
for_each_possible_cpu(cpu)
|
|
||||||
ret.n[SIX_LOCK_read] += *per_cpu_ptr(lock->readers, cpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(six_lock_counts);
|
EXPORT_SYMBOL_GPL(six_lock_counts);
|
||||||
|
Loading…
Reference in New Issue
Block a user