mirror of
https://github.com/koverstreet/bcachefs-tools.git
synced 2025-02-23 00:00:02 +03:00
Update bcachefs sources to 02ae70070a bcachefs: Allocate new btree roots lazily
This commit is contained in:
parent
f8cbede6d1
commit
4de98a2712
@ -1 +1 @@
|
||||
0b8c5d0fb7b5de6fb99030565cd2d0411da37f2b
|
||||
02ae70070acc3bc4740d221efa5ff5425cf6fce5
|
||||
|
16
cmd_debug.c
16
cmd_debug.c
@ -80,9 +80,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd)
|
||||
int cmd_dump(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
struct bch_dev *ca;
|
||||
const char *err;
|
||||
char *out = NULL;
|
||||
unsigned i, nr_devices = 0;
|
||||
bool force = false;
|
||||
@ -112,9 +110,9 @@ int cmd_dump(int argc, char *argv[])
|
||||
if (!out)
|
||||
die("Please supply output filename");
|
||||
|
||||
err = bch2_fs_open(argv + optind, argc - optind, opts, &c);
|
||||
if (err)
|
||||
die("error opening %s: %s", argv[optind], err);
|
||||
struct bch_fs *c = bch2_fs_open(argv + optind, argc - optind, opts);
|
||||
if (IS_ERR(c))
|
||||
die("error opening %s: %s", argv[optind], strerror(-PTR_ERR(c)));
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
|
||||
@ -258,10 +256,8 @@ static const char * const list_modes[] = {
|
||||
int cmd_list(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
enum btree_id btree_id = BTREE_ID_EXTENTS;
|
||||
struct bpos start = POS_MIN, end = POS_MAX;
|
||||
const char *err;
|
||||
u64 inum;
|
||||
int mode = 0, opt;
|
||||
|
||||
@ -307,9 +303,9 @@ int cmd_list(int argc, char *argv[])
|
||||
if (optind >= argc)
|
||||
die("Please supply device(s) to check");
|
||||
|
||||
err = bch2_fs_open(argv + optind, argc - optind, opts, &c);
|
||||
if (err)
|
||||
die("error opening %s: %s", argv[optind], err);
|
||||
struct bch_fs *c = bch2_fs_open(argv + optind, argc - optind, opts);
|
||||
if (IS_ERR(c))
|
||||
die("error opening %s: %s", argv[optind], strerror(-PTR_ERR(c)));
|
||||
|
||||
switch (mode) {
|
||||
case 0:
|
||||
|
@ -528,11 +528,9 @@ int cmd_device_resize(int argc, char *argv[])
|
||||
} else {
|
||||
printf("Doing offline resize of %s\n", dev);
|
||||
|
||||
struct bch_fs *c = NULL;
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
const char *err = bch2_fs_open(&dev, 1, opts, &c);
|
||||
if (err)
|
||||
die("error opening %s: %s", dev, err);
|
||||
struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
|
||||
if (IS_ERR(c))
|
||||
die("error opening %s: %s", dev, strerror(-PTR_ERR(c)));
|
||||
|
||||
struct bch_dev *ca, *resize = NULL;
|
||||
unsigned i;
|
||||
|
@ -328,11 +328,11 @@ int cmd_show_super(int argc, char *argv[])
|
||||
if (argc)
|
||||
die("too many arguments");
|
||||
|
||||
const char *err;
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_sb_handle sb;
|
||||
err = bch2_read_super(dev, bch2_opts_empty(), &sb);
|
||||
if (err)
|
||||
die("Error opening %s: %s", dev, err);
|
||||
int ret = bch2_read_super(dev, &opts, &sb);
|
||||
if (ret)
|
||||
die("Error opening %s: %s", dev, strerror(-ret));
|
||||
|
||||
bch2_sb_print(sb.sb, print_layout, fields, HUMAN_READABLE);
|
||||
bch2_free_super(&sb);
|
||||
|
@ -23,8 +23,6 @@ static void usage(void)
|
||||
int cmd_fsck(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
const char *err;
|
||||
int opt;
|
||||
|
||||
opt_set(opts, degraded, true);
|
||||
@ -56,9 +54,9 @@ int cmd_fsck(int argc, char *argv[])
|
||||
if (optind >= argc)
|
||||
die("Please supply device(s) to check");
|
||||
|
||||
err = bch2_fs_open(argv + optind, argc - optind, opts, &c);
|
||||
if (err)
|
||||
die("error opening %s: %s", argv[optind], err);
|
||||
struct bch_fs *c = bch2_fs_open(argv + optind, argc - optind, opts);
|
||||
if (IS_ERR(c))
|
||||
die("error opening %s: %s", argv[optind], strerror(-PTR_ERR(c)));
|
||||
|
||||
bch2_fs_stop(c);
|
||||
return 0;
|
||||
|
26
cmd_key.c
26
cmd_key.c
@ -9,16 +9,16 @@
|
||||
|
||||
int cmd_unlock(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_sb_handle sb;
|
||||
const char *err;
|
||||
char *passphrase;
|
||||
|
||||
if (argc != 2)
|
||||
die("Please supply a single device");
|
||||
|
||||
err = bch2_read_super(argv[1], bch2_opts_empty(), &sb);
|
||||
if (err)
|
||||
die("Error opening %s: %s", argv[1], err);
|
||||
int ret = bch2_read_super(argv[1], &opts, &sb);
|
||||
if (ret)
|
||||
die("Error opening %s: %s", argv[1], strerror(-ret));
|
||||
|
||||
passphrase = read_passphrase("Enter passphrase: ");
|
||||
|
||||
@ -32,16 +32,15 @@ int cmd_unlock(int argc, char *argv[])
|
||||
int cmd_set_passphrase(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
const char *err;
|
||||
struct bch_fs *c;
|
||||
|
||||
if (argc < 2)
|
||||
die("Please supply one or more devices");
|
||||
|
||||
opt_set(opts, nostart, true);
|
||||
err = bch2_fs_open(argv + 1, argc - 1, opts, &c);
|
||||
if (err)
|
||||
die("Error opening %s: %s", argv[1], err);
|
||||
c = bch2_fs_open(argv + 1, argc - 1, opts);
|
||||
if (IS_ERR(c))
|
||||
die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c)));
|
||||
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb);
|
||||
if (!crypt)
|
||||
@ -70,16 +69,15 @@ int cmd_set_passphrase(int argc, char *argv[])
|
||||
int cmd_remove_passphrase(int argc, char *argv[])
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
const char *err;
|
||||
struct bch_fs *c;
|
||||
|
||||
if (argc < 2)
|
||||
die("Please supply one or more devices");
|
||||
|
||||
opt_set(opts, nostart, true);
|
||||
err = bch2_fs_open(argv + 1, argc - 1, opts, &c);
|
||||
if (err)
|
||||
die("Error opening %s: %s", argv[1], err);
|
||||
c = bch2_fs_open(argv + 1, argc - 1, opts);
|
||||
if (IS_ERR(c))
|
||||
die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c)));
|
||||
|
||||
struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb);
|
||||
if (!crypt)
|
||||
|
@ -334,7 +334,8 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
|
||||
die("error reserving space in new filesystem: %s",
|
||||
strerror(-ret));
|
||||
|
||||
bch2_check_mark_super(c, extent_i_to_s_c(e), false);
|
||||
bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_bkey_devs(extent_i_to_s_c(e).s_c));
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i,
|
||||
&res, NULL, NULL, 0);
|
||||
@ -734,19 +735,18 @@ int cmd_migrate(int argc, char *argv[])
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_fs *c = NULL;
|
||||
char *path[1] = { dev.path };
|
||||
const char *err;
|
||||
|
||||
opt_set(opts, sb, sb_offset);
|
||||
opt_set(opts, nostart, true);
|
||||
opt_set(opts, noexcl, true);
|
||||
|
||||
err = bch2_fs_open(path, 1, opts, &c);
|
||||
if (err)
|
||||
die("Error opening new filesystem: %s", err);
|
||||
c = bch2_fs_open(path, 1, opts);
|
||||
if (IS_ERR(c))
|
||||
die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
|
||||
|
||||
mark_unreserved_space(c, extents);
|
||||
|
||||
err = bch2_fs_start(c);
|
||||
const char *err = bch2_fs_start(c);
|
||||
if (err)
|
||||
die("Error starting new filesystem: %s", err);
|
||||
|
||||
@ -758,9 +758,9 @@ int cmd_migrate(int argc, char *argv[])
|
||||
opt_set(opts, nostart, false);
|
||||
opt_set(opts, nochanges, true);
|
||||
|
||||
err = bch2_fs_open(path, 1, opts, &c);
|
||||
if (err)
|
||||
die("Error opening new filesystem: %s", err);
|
||||
c = bch2_fs_open(path, 1, opts);
|
||||
if (IS_ERR(c))
|
||||
die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
|
||||
|
||||
bch2_fs_stop(c);
|
||||
printf("fsck complete\n");
|
||||
|
@ -99,11 +99,11 @@ struct genradix_iter {
|
||||
size_t pos;
|
||||
};
|
||||
|
||||
static inline void genradix_iter_init(struct genradix_iter *iter)
|
||||
{
|
||||
iter->offset = 0;
|
||||
iter->pos = 0;
|
||||
}
|
||||
#define genradix_iter_init(_radix, _idx) \
|
||||
((struct genradix_iter) { \
|
||||
.pos = (_idx), \
|
||||
.offset = __genradix_idx_to_offset((_radix), (_idx)),\
|
||||
})
|
||||
|
||||
void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
|
||||
|
||||
|
@ -454,6 +454,11 @@ static void bch2_sb_print_replicas(struct bch_sb *sb, struct bch_sb_field *f,
|
||||
}
|
||||
}
|
||||
|
||||
static void bch2_sb_print_quota(struct bch_sb *sb, struct bch_sb_field *f,
|
||||
enum units units)
|
||||
{
|
||||
}
|
||||
|
||||
typedef void (*sb_field_print_fn)(struct bch_sb *, struct bch_sb_field *, enum units);
|
||||
|
||||
struct bch_sb_field_ops {
|
||||
|
@ -55,6 +55,8 @@
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "alloc.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_gc.h"
|
||||
#include "buckets.h"
|
||||
@ -290,9 +292,6 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
if (!c->btree_roots[BTREE_ID_ALLOC].b)
|
||||
return 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS_MIN, 0, k) {
|
||||
bch2_alloc_read_key(c, k);
|
||||
bch2_btree_iter_cond_resched(&iter);
|
||||
@ -401,7 +400,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_alloc_write(struct bch_fs *c, struct bch_dev *ca, u64 *journal_seq)
|
||||
static int bch2_alloc_write(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
unsigned long bucket;
|
||||
@ -412,7 +411,7 @@ static int bch2_alloc_write(struct bch_fs *c, struct bch_dev *ca, u64 *journal_s
|
||||
|
||||
down_read(&ca->bucket_lock);
|
||||
for_each_set_bit(bucket, ca->buckets_dirty, ca->mi.nbuckets) {
|
||||
ret = __bch2_alloc_write_key(c, ca, bucket, &iter, journal_seq);
|
||||
ret = __bch2_alloc_write_key(c, ca, bucket, &iter, NULL);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -537,7 +536,8 @@ static void bch2_prio_timer_init(struct bch_fs *c, int rw)
|
||||
static void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t bucket)
|
||||
{
|
||||
if (expensive_debug_checks(c)) {
|
||||
if (expensive_debug_checks(c) &&
|
||||
test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) {
|
||||
size_t iter;
|
||||
long i;
|
||||
unsigned j;
|
||||
@ -692,7 +692,7 @@ static inline int bucket_alloc_cmp(alloc_heap *h,
|
||||
return (l.key > r.key) - (l.key < r.key);
|
||||
}
|
||||
|
||||
static void invalidate_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
|
||||
static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets;
|
||||
struct alloc_heap_entry e;
|
||||
@ -740,7 +740,7 @@ static void invalidate_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
|
||||
bch2_invalidate_one_bucket(c, ca, e.bucket);
|
||||
}
|
||||
|
||||
static void invalidate_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
|
||||
static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets = bucket_array(ca);
|
||||
struct bucket_mark m;
|
||||
@ -762,7 +762,7 @@ static void invalidate_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
|
||||
}
|
||||
}
|
||||
|
||||
static void invalidate_buckets_random(struct bch_fs *c, struct bch_dev *ca)
|
||||
static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets = bucket_array(ca);
|
||||
struct bucket_mark m;
|
||||
@ -782,21 +782,21 @@ static void invalidate_buckets_random(struct bch_fs *c, struct bch_dev *ca)
|
||||
}
|
||||
}
|
||||
|
||||
static void invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
static void find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
ca->inc_gen_needs_gc = 0;
|
||||
ca->inc_gen_really_needs_gc = 0;
|
||||
|
||||
switch (ca->mi.replacement) {
|
||||
case CACHE_REPLACEMENT_LRU:
|
||||
invalidate_buckets_lru(c, ca);
|
||||
break;
|
||||
case CACHE_REPLACEMENT_FIFO:
|
||||
invalidate_buckets_fifo(c, ca);
|
||||
break;
|
||||
case CACHE_REPLACEMENT_RANDOM:
|
||||
invalidate_buckets_random(c, ca);
|
||||
break;
|
||||
case CACHE_REPLACEMENT_LRU:
|
||||
find_reclaimable_buckets_lru(c, ca);
|
||||
break;
|
||||
case CACHE_REPLACEMENT_FIFO:
|
||||
find_reclaimable_buckets_fifo(c, ca);
|
||||
break;
|
||||
case CACHE_REPLACEMENT_RANDOM:
|
||||
find_reclaimable_buckets_random(c, ca);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -807,79 +807,119 @@ static int size_t_cmp(const void *_l, const void *_r)
|
||||
return (*l > *r) - (*l < *r);
|
||||
}
|
||||
|
||||
static void sort_free_inc(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
BUG_ON(ca->free_inc.front);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
sort(ca->free_inc.data,
|
||||
ca->free_inc.back,
|
||||
sizeof(ca->free_inc.data[0]),
|
||||
size_t_cmp, NULL);
|
||||
spin_unlock(&c->freelist_lock);
|
||||
}
|
||||
|
||||
static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca,
|
||||
u64 *journal_seq)
|
||||
u64 *journal_seq, size_t nr)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
unsigned nr_invalidated = 0;
|
||||
size_t b, i;
|
||||
int ret = 0;
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
|
||||
BTREE_ITER_INTENT);
|
||||
|
||||
fifo_for_each_entry(b, &ca->free_inc, i) {
|
||||
/*
|
||||
* XXX: if ca->nr_invalidated != 0, just return if we'd block doing the
|
||||
* btree update or journal_res_get
|
||||
*/
|
||||
while (ca->nr_invalidated < min(nr, fifo_used(&ca->free_inc))) {
|
||||
size_t b = fifo_idx_entry(&ca->free_inc, ca->nr_invalidated);
|
||||
|
||||
ret = __bch2_alloc_write_key(c, ca, b, &iter, journal_seq);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
nr_invalidated++;
|
||||
ca->nr_invalidated++;
|
||||
}
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
return nr_invalidated ?: ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given an invalidated, ready to use bucket: issue a discard to it if enabled,
|
||||
* then add it to the freelist, waiting until there's room if necessary:
|
||||
*/
|
||||
static void discard_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
long bucket)
|
||||
static bool __push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
|
||||
{
|
||||
if (ca->mi.discard &&
|
||||
blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
bucket_to_sector(ca, bucket),
|
||||
ca->mi.bucket_size, GFP_NOIO, 0);
|
||||
unsigned i;
|
||||
|
||||
/*
|
||||
* Don't remove from free_inc until after it's added to
|
||||
* freelist, so gc can find it:
|
||||
*/
|
||||
spin_lock(&c->freelist_lock);
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
if (fifo_push(&ca->free[i], bucket)) {
|
||||
fifo_pop(&ca->free_inc, bucket);
|
||||
--ca->nr_invalidated;
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
spin_unlock(&c->freelist_lock);
|
||||
return true;
|
||||
}
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
while (1) {
|
||||
bool pushed = false;
|
||||
unsigned i;
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* Don't remove from free_inc until after it's added to
|
||||
* freelist, so gc can find it:
|
||||
*/
|
||||
spin_lock(&c->freelist_lock);
|
||||
for (i = 0; i < RESERVE_NR; i++)
|
||||
if (fifo_push(&ca->free[i], bucket)) {
|
||||
fifo_pop(&ca->free_inc, bucket);
|
||||
closure_wake_up(&c->freelist_wait);
|
||||
pushed = true;
|
||||
break;
|
||||
}
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
if (pushed)
|
||||
if (__push_invalidated_bucket(c, ca, bucket))
|
||||
break;
|
||||
|
||||
if (kthread_should_stop())
|
||||
if ((current->flags & PF_KTHREAD) &&
|
||||
kthread_should_stop()) {
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
schedule();
|
||||
try_to_freeze();
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given an invalidated, ready to use bucket: issue a discard to it if enabled,
|
||||
* then add it to the freelist, waiting until there's room if necessary:
|
||||
*/
|
||||
static int discard_invalidated_buckets(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
while (ca->nr_invalidated) {
|
||||
size_t bucket = fifo_peek(&ca->free_inc);
|
||||
|
||||
BUG_ON(fifo_empty(&ca->free_inc) || !ca->nr_invalidated);
|
||||
|
||||
if (ca->mi.discard &&
|
||||
blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
bucket_to_sector(ca, bucket),
|
||||
ca->mi.bucket_size, GFP_NOIO, 0);
|
||||
|
||||
if (push_invalidated_bucket(c, ca, bucket))
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch_allocator_thread - move buckets from free_inc to reserves
|
||||
*
|
||||
* The free_inc FIFO is populated by invalidate_buckets(), and
|
||||
* The free_inc FIFO is populated by find_reclaimable_buckets(), and
|
||||
* the reserves are depleted by bucket allocation. When we run out
|
||||
* of free_inc, try to invalidate some buckets and write out
|
||||
* prios and gens.
|
||||
@ -889,43 +929,36 @@ static int bch2_allocator_thread(void *arg)
|
||||
struct bch_dev *ca = arg;
|
||||
struct bch_fs *c = ca->fs;
|
||||
u64 journal_seq;
|
||||
size_t bucket;
|
||||
int ret;
|
||||
|
||||
set_freezable();
|
||||
|
||||
while (1) {
|
||||
while (1) {
|
||||
while (ca->nr_invalidated) {
|
||||
BUG_ON(fifo_empty(&ca->free_inc));
|
||||
|
||||
bucket = fifo_peek(&ca->free_inc);
|
||||
discard_invalidated_bucket(c, ca, bucket);
|
||||
if (kthread_should_stop())
|
||||
return 0;
|
||||
--ca->nr_invalidated;
|
||||
}
|
||||
ret = discard_invalidated_buckets(c, ca);
|
||||
if (ret)
|
||||
return 0;
|
||||
|
||||
if (fifo_empty(&ca->free_inc))
|
||||
break;
|
||||
|
||||
journal_seq = 0;
|
||||
ret = bch2_invalidate_free_inc(c, ca, &journal_seq);
|
||||
if (ret < 0)
|
||||
ret = bch2_invalidate_free_inc(c, ca, &journal_seq, SIZE_MAX);
|
||||
if (ret)
|
||||
return 0;
|
||||
|
||||
ca->nr_invalidated = ret;
|
||||
|
||||
if (ca->nr_invalidated == fifo_used(&ca->free_inc)) {
|
||||
ca->alloc_thread_started = true;
|
||||
bch2_alloc_write(c, ca, &journal_seq);
|
||||
}
|
||||
|
||||
if (ca->allocator_invalidating_data)
|
||||
bch2_journal_flush_seq(&c->journal, journal_seq);
|
||||
ret = bch2_journal_flush_seq(&c->journal, journal_seq);
|
||||
else if (ca->allocator_journal_seq_flush)
|
||||
bch2_journal_flush_seq(&c->journal,
|
||||
ret = bch2_journal_flush_seq(&c->journal,
|
||||
ca->allocator_journal_seq_flush);
|
||||
|
||||
/*
|
||||
* journal error - buckets haven't actually been
|
||||
* invalidated, can't discard them:
|
||||
*/
|
||||
if (ret)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Reset front/back so we can easily sort fifo entries later: */
|
||||
@ -947,7 +980,7 @@ static int bch2_allocator_thread(void *arg)
|
||||
* another cache tier
|
||||
*/
|
||||
|
||||
invalidate_buckets(c, ca);
|
||||
find_reclaimable_buckets(c, ca);
|
||||
trace_alloc_batch(ca, fifo_used(&ca->free_inc),
|
||||
ca->free_inc.size);
|
||||
|
||||
@ -970,14 +1003,7 @@ static int bch2_allocator_thread(void *arg)
|
||||
}
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
BUG_ON(ca->free_inc.front);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
sort(ca->free_inc.data,
|
||||
ca->free_inc.back,
|
||||
sizeof(ca->free_inc.data[0]),
|
||||
size_t_cmp, NULL);
|
||||
spin_unlock(&c->freelist_lock);
|
||||
sort_free_inc(c, ca);
|
||||
|
||||
/*
|
||||
* free_inc is now full of newly-invalidated buckets: next,
|
||||
@ -1037,51 +1063,27 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
|
||||
return ob;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: allocation on startup is still sketchy. There is insufficient
|
||||
* synchronization for bch2_bucket_alloc_startup() to work correctly after
|
||||
* bch2_alloc_write() has been called, and we aren't currently doing anything
|
||||
* to guarantee that this won't happen.
|
||||
*
|
||||
* Even aside from that, it's really difficult to avoid situations where on
|
||||
* startup we write out a pointer to a freshly allocated bucket before the
|
||||
* corresponding gen - when we're still digging ourself out of the "i need to
|
||||
* allocate to write bucket gens, but i need to write bucket gens to allocate"
|
||||
* hole.
|
||||
*
|
||||
* Fortunately, bch2_btree_mark_key_initial() will detect and repair this
|
||||
* easily enough...
|
||||
*/
|
||||
static long bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca)
|
||||
/* _only_ for allocating the journal and btree roots on a brand new fs: */
|
||||
int bch2_bucket_alloc_startup(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
struct bucket_array *buckets;
|
||||
ssize_t b;
|
||||
|
||||
if (!down_read_trylock(&c->gc_lock))
|
||||
return -1;
|
||||
|
||||
if (test_bit(BCH_FS_GC_FAILURE, &c->flags)) {
|
||||
up_read(&c->gc_lock);
|
||||
return -1;
|
||||
}
|
||||
|
||||
spin_unlock(&c->freelist_lock);
|
||||
|
||||
down_read(&ca->bucket_lock);
|
||||
rcu_read_lock();
|
||||
buckets = bucket_array(ca);
|
||||
|
||||
spin_lock(&c->freelist_lock);
|
||||
|
||||
for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
|
||||
if (is_startup_available_bucket(buckets->b[b].mark) &&
|
||||
bch2_mark_alloc_bucket_startup(c, ca, b)) {
|
||||
if (is_available_bucket(buckets->b[b].mark)) {
|
||||
bch2_mark_alloc_bucket(c, ca, b, true,
|
||||
gc_pos_alloc(c, NULL),
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
set_bit(b, ca->buckets_dirty);
|
||||
goto success;
|
||||
}
|
||||
b = -1;
|
||||
success:
|
||||
up_read(&ca->bucket_lock);
|
||||
up_read(&c->gc_lock);
|
||||
rcu_read_unlock();
|
||||
return b;
|
||||
}
|
||||
|
||||
@ -1150,8 +1152,7 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
|
||||
break;
|
||||
}
|
||||
|
||||
if (unlikely(!ca->alloc_thread_started) &&
|
||||
(reserve == RESERVE_ALLOC) &&
|
||||
if (unlikely(test_bit(BCH_FS_BRAND_NEW_FS, &c->flags)) &&
|
||||
(bucket = bch2_bucket_alloc_startup(c, ca)) >= 0)
|
||||
goto out;
|
||||
|
||||
@ -1858,6 +1859,172 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __bch2_fs_allocator_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
size_t bu, i, devs_have_enough = 0;
|
||||
unsigned dev_iter;
|
||||
u64 journal_seq = 0;
|
||||
bool invalidating_data = false;
|
||||
int ret = 0;
|
||||
|
||||
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
|
||||
return -1;
|
||||
|
||||
/* Scan for buckets that are already invalidated: */
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
struct btree_iter iter;
|
||||
struct bucket_mark m;
|
||||
struct bkey_s_c k;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), 0, k) {
|
||||
if (k.k->type != BCH_ALLOC)
|
||||
continue;
|
||||
|
||||
bu = k.k->p.offset;
|
||||
m = READ_ONCE(bucket(ca, bu)->mark);
|
||||
|
||||
if (!is_available_bucket(m) || m.cached_sectors)
|
||||
continue;
|
||||
|
||||
bch2_mark_alloc_bucket(c, ca, bu, true,
|
||||
gc_pos_alloc(c, NULL),
|
||||
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
|
||||
BCH_BUCKET_MARK_GC_LOCK_HELD);
|
||||
|
||||
fifo_push(&ca->free_inc, bu);
|
||||
ca->nr_invalidated++;
|
||||
|
||||
if (fifo_full(&ca->free_inc))
|
||||
break;
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
}
|
||||
|
||||
/* did we find enough buckets? */
|
||||
for_each_rw_member(ca, c, dev_iter)
|
||||
devs_have_enough += (fifo_used(&ca->free_inc) >=
|
||||
ca->free[RESERVE_BTREE].size);
|
||||
|
||||
if (devs_have_enough >= c->opts.metadata_replicas)
|
||||
return 0;
|
||||
|
||||
/* clear out free_inc - find_reclaimable_buckets() assumes it's empty */
|
||||
for_each_rw_member(ca, c, dev_iter)
|
||||
discard_invalidated_buckets(c, ca);
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
BUG_ON(!fifo_empty(&ca->free_inc));
|
||||
ca->free_inc.front = ca->free_inc.back = 0;
|
||||
|
||||
find_reclaimable_buckets(c, ca);
|
||||
sort_free_inc(c, ca);
|
||||
|
||||
invalidating_data |= ca->allocator_invalidating_data;
|
||||
|
||||
fifo_for_each_entry(bu, &ca->free_inc, i)
|
||||
if (!fifo_push(&ca->free[RESERVE_BTREE], bu))
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're moving buckets to freelists _before_ they've been marked as
|
||||
* invalidated on disk - we have to so that we can allocate new btree
|
||||
* nodes to mark them as invalidated on disk.
|
||||
*
|
||||
* However, we can't _write_ to any of these buckets yet - they might
|
||||
* have cached data in them, which is live until they're marked as
|
||||
* invalidated on disk:
|
||||
*/
|
||||
if (invalidating_data)
|
||||
set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
|
||||
/*
|
||||
* XXX: it's possible for this to deadlock waiting on journal reclaim,
|
||||
* since we're holding btree writes. What then?
|
||||
*/
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter) {
|
||||
ret = bch2_invalidate_free_inc(c, ca, &journal_seq,
|
||||
ca->free[RESERVE_BTREE].size);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (invalidating_data) {
|
||||
ret = bch2_journal_flush_seq(&c->journal, journal_seq);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
for_each_rw_member(ca, c, dev_iter)
|
||||
while (ca->nr_invalidated) {
|
||||
BUG_ON(!fifo_pop(&ca->free_inc, bu));
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
bucket_to_sector(ca, bu),
|
||||
ca->mi.bucket_size, GFP_NOIO, 0);
|
||||
ca->nr_invalidated--;
|
||||
}
|
||||
|
||||
set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
|
||||
|
||||
/* now flush dirty btree nodes: */
|
||||
if (invalidating_data) {
|
||||
struct bucket_table *tbl;
|
||||
struct rhash_head *pos;
|
||||
struct btree *b;
|
||||
|
||||
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
|
||||
again:
|
||||
rcu_read_lock();
|
||||
for_each_cached_btree(b, c, tbl, i, pos)
|
||||
if (btree_node_dirty(b) && (!b->written || b->level)) {
|
||||
rcu_read_unlock();
|
||||
six_lock_read(&b->lock);
|
||||
bch2_btree_node_write(c, b, NULL, SIX_LOCK_read);
|
||||
six_unlock_read(&b->lock);
|
||||
goto again;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_fs_allocator_start(struct bch_fs *c)
|
||||
{
|
||||
struct bch_dev *ca;
|
||||
unsigned i;
|
||||
int ret;
|
||||
|
||||
down_read(&c->gc_lock);
|
||||
ret = __bch2_fs_allocator_start(c);
|
||||
up_read(&c->gc_lock);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_rw_member(ca, c, i) {
|
||||
ret = bch2_dev_allocator_start(ca);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_rw_member(ca, c, i) {
|
||||
ret = bch2_alloc_write(c, ca);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bch2_fs_allocator_init(struct bch_fs *c)
|
||||
{
|
||||
struct open_bucket *ob;
|
||||
|
@ -118,6 +118,7 @@ static inline void writepoint_init(struct write_point *wp,
|
||||
wp->type = type;
|
||||
}
|
||||
|
||||
int bch2_fs_allocator_start(struct bch_fs *);
|
||||
void bch2_fs_allocator_init(struct bch_fs *);
|
||||
|
||||
extern const struct bkey_ops bch2_bkey_alloc_ops;
|
||||
|
@ -281,11 +281,9 @@ do { \
|
||||
#include "clock_types.h"
|
||||
#include "journal_types.h"
|
||||
#include "keylist_types.h"
|
||||
#include "quota_types.h"
|
||||
#include "super_types.h"
|
||||
|
||||
/* 256k, in sectors */
|
||||
#define BTREE_NODE_SIZE_MAX 512
|
||||
|
||||
/*
|
||||
* Number of nodes we might have to allocate in a worst case btree split
|
||||
* operation - we split all the way up to the root, then allocate a new root.
|
||||
@ -380,7 +378,6 @@ struct bch_dev {
|
||||
alloc_fifo free_inc;
|
||||
spinlock_t freelist_lock;
|
||||
unsigned nr_invalidated;
|
||||
bool alloc_thread_started;
|
||||
|
||||
u8 open_buckets_partial[OPEN_BUCKETS_COUNT];
|
||||
unsigned open_buckets_partial_nr;
|
||||
@ -423,18 +420,28 @@ struct bch_dev {
|
||||
* won't automatically reattach).
|
||||
*/
|
||||
enum {
|
||||
/* startup: */
|
||||
BCH_FS_BRAND_NEW_FS,
|
||||
BCH_FS_ALLOC_READ_DONE,
|
||||
BCH_FS_ALLOCATOR_STARTED,
|
||||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
|
||||
/* shutdown: */
|
||||
BCH_FS_EMERGENCY_RO,
|
||||
BCH_FS_WRITE_DISABLE_COMPLETE,
|
||||
BCH_FS_GC_STOPPING,
|
||||
BCH_FS_GC_FAILURE,
|
||||
BCH_FS_BDEV_MOUNTED,
|
||||
|
||||
/* errors: */
|
||||
BCH_FS_ERROR,
|
||||
BCH_FS_GC_FAILURE,
|
||||
|
||||
/* misc: */
|
||||
BCH_FS_BDEV_MOUNTED,
|
||||
BCH_FS_FSCK_FIXED_ERRORS,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_FIXED_GENS,
|
||||
BCH_FS_REBUILD_REPLICAS,
|
||||
BCH_FS_HOLD_BTREE_WRITES,
|
||||
};
|
||||
|
||||
struct btree_debug {
|
||||
@ -517,7 +524,7 @@ struct bch_fs {
|
||||
struct mutex sb_lock;
|
||||
|
||||
/* BTREE CACHE */
|
||||
struct bio_set btree_read_bio;
|
||||
struct bio_set btree_bio;
|
||||
|
||||
struct btree_root btree_roots[BTREE_ID_NR];
|
||||
bool btree_roots_dirty;
|
||||
@ -665,6 +672,9 @@ struct bch_fs {
|
||||
unsigned writeback_pages_max;
|
||||
atomic_long_t nr_inodes;
|
||||
|
||||
/* QUOTAS */
|
||||
struct bch_memquota_type quotas[QTYP_NR];
|
||||
|
||||
/* DEBUG JUNK */
|
||||
struct dentry *debug;
|
||||
struct btree_debug btree_debug[BTREE_ID_NR];
|
||||
|
@ -606,11 +606,13 @@ BKEY_VAL_TYPE(inode_generation, BCH_INODE_GENERATION);
|
||||
BCH_INODE_FIELD(bi_generation, 32) \
|
||||
BCH_INODE_FIELD(bi_dev, 32) \
|
||||
BCH_INODE_FIELD(bi_data_checksum, 8) \
|
||||
BCH_INODE_FIELD(bi_compression, 8)
|
||||
BCH_INODE_FIELD(bi_compression, 8) \
|
||||
BCH_INODE_FIELD(bi_project, 32)
|
||||
|
||||
#define BCH_INODE_FIELDS_INHERIT() \
|
||||
BCH_INODE_FIELD(bi_data_checksum) \
|
||||
BCH_INODE_FIELD(bi_compression)
|
||||
BCH_INODE_FIELD(bi_compression) \
|
||||
BCH_INODE_FIELD(bi_project)
|
||||
|
||||
enum {
|
||||
/*
|
||||
@ -737,6 +739,36 @@ struct bch_alloc {
|
||||
} __attribute__((packed, aligned(8)));
|
||||
BKEY_VAL_TYPE(alloc, BCH_ALLOC);
|
||||
|
||||
/* Quotas: */
|
||||
|
||||
enum {
|
||||
BCH_QUOTA = 128,
|
||||
};
|
||||
|
||||
enum quota_types {
|
||||
QTYP_USR = 0,
|
||||
QTYP_GRP = 1,
|
||||
QTYP_PRJ = 2,
|
||||
QTYP_NR = 3,
|
||||
};
|
||||
|
||||
enum quota_counters {
|
||||
Q_SPC = 0,
|
||||
Q_INO = 1,
|
||||
Q_COUNTERS = 2,
|
||||
};
|
||||
|
||||
struct bch_quota_counter {
|
||||
__le64 hardlimit;
|
||||
__le64 softlimit;
|
||||
};
|
||||
|
||||
struct bch_quota {
|
||||
struct bch_val v;
|
||||
struct bch_quota_counter c[Q_COUNTERS];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
BKEY_VAL_TYPE(quota, BCH_QUOTA);
|
||||
|
||||
/* Optional/variable size superblock sections: */
|
||||
|
||||
struct bch_sb_field {
|
||||
@ -749,7 +781,8 @@ struct bch_sb_field {
|
||||
x(journal, 0) \
|
||||
x(members, 1) \
|
||||
x(crypt, 2) \
|
||||
x(replicas, 3)
|
||||
x(replicas, 3) \
|
||||
x(quota, 4)
|
||||
|
||||
enum bch_sb_field_type {
|
||||
#define x(f, nr) BCH_SB_FIELD_##f = nr,
|
||||
@ -883,6 +916,23 @@ struct bch_sb_field_replicas {
|
||||
struct bch_replicas_entry entries[0];
|
||||
};
|
||||
|
||||
/* BCH_SB_FIELD_quota: */
|
||||
|
||||
struct bch_sb_quota_counter {
|
||||
__le32 timelimit;
|
||||
__le32 warnlimit;
|
||||
};
|
||||
|
||||
struct bch_sb_quota_type {
|
||||
__le64 flags;
|
||||
struct bch_sb_quota_counter c[Q_COUNTERS];
|
||||
};
|
||||
|
||||
struct bch_sb_field_quota {
|
||||
struct bch_sb_field field;
|
||||
struct bch_sb_quota_type q[QTYP_NR];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
/* Superblock: */
|
||||
|
||||
/*
|
||||
@ -986,6 +1036,11 @@ LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52);
|
||||
LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56);
|
||||
|
||||
LE64_BITMASK(BCH_SB_POSIX_ACL, struct bch_sb, flags[0], 56, 57);
|
||||
LE64_BITMASK(BCH_SB_USRQUOTA, struct bch_sb, flags[0], 57, 58);
|
||||
LE64_BITMASK(BCH_SB_GRPQUOTA, struct bch_sb, flags[0], 58, 59);
|
||||
LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60);
|
||||
|
||||
/* 60-64 unused */
|
||||
|
||||
LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
|
||||
LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8);
|
||||
@ -1181,7 +1236,8 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
|
||||
DEF_BTREE_ID(INODES, 1, "inodes") \
|
||||
DEF_BTREE_ID(DIRENTS, 2, "dirents") \
|
||||
DEF_BTREE_ID(XATTRS, 3, "xattrs") \
|
||||
DEF_BTREE_ID(ALLOC, 4, "alloc")
|
||||
DEF_BTREE_ID(ALLOC, 4, "alloc") \
|
||||
DEF_BTREE_ID(QUOTAS, 5, "quotas")
|
||||
|
||||
#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val,
|
||||
|
||||
|
@ -7,6 +7,10 @@
|
||||
#include "util.h"
|
||||
#include "vstructs.h"
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define HAVE_BCACHEFS_COMPILED_UNPACK 1
|
||||
#endif
|
||||
|
||||
void bch2_to_binary(char *, const u64 *, unsigned);
|
||||
|
||||
#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
|
||||
@ -381,8 +385,7 @@ static inline u64 bkey_field_max(const struct bkey_format *f,
|
||||
: U64_MAX;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define HAVE_BCACHEFS_COMPILED_UNPACK 1
|
||||
#ifdef HAVE_BCACHEFS_COMPILED_UNPACK
|
||||
|
||||
int bch2_compile_bkey_format(const struct bkey_format *, void *);
|
||||
|
||||
@ -583,6 +586,8 @@ BKEY_VAL_ACCESSORS(xattr, BCH_XATTR);
|
||||
|
||||
BKEY_VAL_ACCESSORS(alloc, BCH_ALLOC);
|
||||
|
||||
BKEY_VAL_ACCESSORS(quota, BCH_QUOTA);
|
||||
|
||||
/* byte order helpers */
|
||||
|
||||
#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "inode.h"
|
||||
#include "quota.h"
|
||||
#include "xattr.h"
|
||||
|
||||
const struct bkey_ops *bch2_bkey_ops[] = {
|
||||
@ -15,6 +16,7 @@ const struct bkey_ops *bch2_bkey_ops[] = {
|
||||
[BKEY_TYPE_DIRENTS] = &bch2_bkey_dirent_ops,
|
||||
[BKEY_TYPE_XATTRS] = &bch2_bkey_xattr_ops,
|
||||
[BKEY_TYPE_ALLOC] = &bch2_bkey_alloc_ops,
|
||||
[BKEY_TYPE_QUOTAS] = &bch2_bkey_quota_ops,
|
||||
[BKEY_TYPE_BTREE] = &bch2_bkey_btree_ops,
|
||||
};
|
||||
|
||||
|
@ -1550,9 +1550,6 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
|
||||
|
||||
__bch2_btree_node_iter_init(iter, is_extents);
|
||||
|
||||
//if (bkey_cmp(search, b->curr_max_key) > 0)
|
||||
// return;
|
||||
|
||||
switch (bch2_bkey_pack_pos_lossy(&p, search, b)) {
|
||||
case BKEY_PACK_POS_EXACT:
|
||||
packed_search = &p;
|
||||
|
@ -45,8 +45,8 @@ static inline bool btree_node_hashed(struct btree *b)
|
||||
}
|
||||
|
||||
#define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \
|
||||
for ((_tbl) = rht_dereference_rcu((_c)->btree_cache_table.tbl, \
|
||||
&(_c)->btree_cache_table), \
|
||||
for ((_tbl) = rht_dereference_rcu((_c)->btree_cache.table.tbl, \
|
||||
&(_c)->btree_cache.table), \
|
||||
_iter = 0; _iter < (_tbl)->size; _iter++) \
|
||||
rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
|
||||
|
||||
|
@ -148,23 +148,24 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
|
||||
{
|
||||
enum bch_data_type data_type = type == BKEY_TYPE_BTREE
|
||||
? BCH_DATA_BTREE : BCH_DATA_USER;
|
||||
struct bch_devs_list devs = bch2_bkey_devs(k);
|
||||
int ret = 0;
|
||||
|
||||
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_sb_has_replicas(c, data_type, devs), c,
|
||||
"superblock not marked as containing replicas (type %u)",
|
||||
data_type)) {
|
||||
ret = bch2_check_mark_super(c, data_type, devs);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_EXTENT:
|
||||
case BCH_EXTENT_CACHED: {
|
||||
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_sb_has_replicas(c, e, data_type), c,
|
||||
"superblock not marked as containing replicas (type %u)",
|
||||
data_type)) {
|
||||
ret = bch2_check_mark_super(c, e, data_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
extent_for_each_ptr(e, ptr) {
|
||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||
size_t b = PTR_BUCKET_NR(ca, ptr);
|
||||
@ -284,7 +285,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
|
||||
mutex_lock(&c->btree_root_lock);
|
||||
|
||||
b = c->btree_roots[btree_id].b;
|
||||
bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
|
||||
if (!btree_node_fake(b))
|
||||
bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
|
||||
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
|
||||
|
||||
mutex_unlock(&c->btree_root_lock);
|
||||
@ -991,8 +993,10 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
|
||||
if (!c->btree_roots[id].b)
|
||||
return 0;
|
||||
|
||||
ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
|
||||
bkey_i_to_s_c(&c->btree_roots[id].b->key));
|
||||
b = c->btree_roots[id].b;
|
||||
if (!btree_node_fake(b))
|
||||
ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
|
||||
bkey_i_to_s_c(&b->key));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -1352,7 +1352,7 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
|
||||
return;
|
||||
}
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_read_bio);
|
||||
bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_bio);
|
||||
rb = container_of(bio, struct btree_read_bio, bio);
|
||||
rb->c = c;
|
||||
rb->start_time = local_clock();
|
||||
@ -1438,9 +1438,9 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b)
|
||||
}
|
||||
|
||||
static void bch2_btree_node_write_error(struct bch_fs *c,
|
||||
struct bch_write_bio *wbio)
|
||||
struct btree_write_bio *wbio)
|
||||
{
|
||||
struct btree *b = wbio->bio.bi_private;
|
||||
struct btree *b = wbio->wbio.bio.bi_private;
|
||||
struct closure *cl = wbio->cl;
|
||||
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
|
||||
struct bkey_i_extent *new_key;
|
||||
@ -1473,7 +1473,7 @@ retry:
|
||||
new_key = bkey_i_to_extent(&tmp.k);
|
||||
e = extent_i_to_s(new_key);
|
||||
extent_for_each_ptr_backwards(e, ptr)
|
||||
if (bch2_dev_list_has_dev(wbio->failed, ptr->dev))
|
||||
if (bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev))
|
||||
bch2_extent_drop_ptr(e, ptr);
|
||||
|
||||
if (!bch2_extent_nr_ptrs(e.c))
|
||||
@ -1486,7 +1486,7 @@ retry:
|
||||
goto err;
|
||||
out:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
bio_put(&wbio->bio);
|
||||
bio_put(&wbio->wbio.bio);
|
||||
btree_node_write_done(c, b);
|
||||
if (cl)
|
||||
closure_put(cl);
|
||||
@ -1511,17 +1511,46 @@ void bch2_btree_write_error_work(struct work_struct *work)
|
||||
if (!bio)
|
||||
break;
|
||||
|
||||
bch2_btree_node_write_error(c, to_wbio(bio));
|
||||
bch2_btree_node_write_error(c,
|
||||
container_of(bio, struct btree_write_bio, wbio.bio));
|
||||
}
|
||||
}
|
||||
|
||||
static void btree_node_write_work(struct work_struct *work)
|
||||
{
|
||||
struct btree_write_bio *wbio =
|
||||
container_of(work, struct btree_write_bio, work);
|
||||
struct closure *cl = wbio->cl;
|
||||
struct bch_fs *c = wbio->wbio.c;
|
||||
struct btree *b = wbio->wbio.bio.bi_private;
|
||||
|
||||
btree_bounce_free(c,
|
||||
wbio->wbio.order,
|
||||
wbio->wbio.used_mempool,
|
||||
wbio->data);
|
||||
|
||||
if (wbio->wbio.failed.nr) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bio_list_add(&c->btree_write_error_list, &wbio->wbio.bio);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
|
||||
queue_work(c->wq, &c->btree_write_error_work);
|
||||
return;
|
||||
}
|
||||
|
||||
bio_put(&wbio->wbio.bio);
|
||||
btree_node_write_done(c, b);
|
||||
if (cl)
|
||||
closure_put(cl);
|
||||
}
|
||||
|
||||
static void btree_node_write_endio(struct bio *bio)
|
||||
{
|
||||
struct btree *b = bio->bi_private;
|
||||
struct bch_write_bio *wbio = to_wbio(bio);
|
||||
struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL;
|
||||
struct bch_write_bio *orig = parent ?: wbio;
|
||||
struct closure *cl = !wbio->split ? wbio->cl : NULL;
|
||||
struct bch_fs *c = wbio->c;
|
||||
struct bch_dev *ca = wbio->ca;
|
||||
unsigned long flags;
|
||||
@ -1542,27 +1571,13 @@ static void btree_node_write_endio(struct bio *bio)
|
||||
if (parent) {
|
||||
bio_put(bio);
|
||||
bio_endio(&parent->bio);
|
||||
return;
|
||||
} else {
|
||||
struct btree_write_bio *wb =
|
||||
container_of(orig, struct btree_write_bio, wbio);
|
||||
|
||||
INIT_WORK(&wb->work, btree_node_write_work);
|
||||
schedule_work(&wb->work);
|
||||
}
|
||||
|
||||
btree_bounce_free(c,
|
||||
wbio->order,
|
||||
wbio->used_mempool,
|
||||
wbio->data);
|
||||
|
||||
if (wbio->failed.nr) {
|
||||
spin_lock_irqsave(&c->btree_write_error_lock, flags);
|
||||
bio_list_add(&c->btree_write_error_list, &wbio->bio);
|
||||
spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
|
||||
|
||||
queue_work(c->wq, &c->btree_write_error_work);
|
||||
return;
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
btree_node_write_done(c, b);
|
||||
if (cl)
|
||||
closure_put(cl);
|
||||
}
|
||||
|
||||
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
@ -1586,7 +1601,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
struct closure *parent,
|
||||
enum six_lock_type lock_type_held)
|
||||
{
|
||||
struct bch_write_bio *wbio;
|
||||
struct btree_write_bio *wbio;
|
||||
struct bset_tree *t;
|
||||
struct bset *i;
|
||||
struct btree_node *bn = NULL;
|
||||
@ -1602,6 +1617,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
unsigned long old, new;
|
||||
void *data;
|
||||
|
||||
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We may only have a read lock on the btree node - the dirty bit is our
|
||||
* "lock" against racing with other threads that may be trying to start
|
||||
@ -1631,6 +1649,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
new ^= (1 << BTREE_NODE_write_idx);
|
||||
} while (cmpxchg_acquire(&b->flags, old, new) != old);
|
||||
|
||||
BUG_ON(btree_node_fake(b));
|
||||
BUG_ON(!list_empty(&b->write_blocked));
|
||||
BUG_ON((b->will_make_reachable != NULL) != !b->written);
|
||||
|
||||
@ -1763,21 +1782,22 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
|
||||
trace_btree_write(b, bytes_to_write, sectors_to_write);
|
||||
|
||||
wbio = wbio_init(bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write));
|
||||
wbio->cl = parent;
|
||||
wbio->failed.nr = 0;
|
||||
wbio->order = order;
|
||||
wbio->used_mempool = used_mempool;
|
||||
wbio->data = data;
|
||||
wbio->bio.bi_opf = REQ_OP_WRITE|REQ_META|REQ_FUA;
|
||||
wbio->bio.bi_iter.bi_size = sectors_to_write << 9;
|
||||
wbio->bio.bi_end_io = btree_node_write_endio;
|
||||
wbio->bio.bi_private = b;
|
||||
wbio = container_of(bio_alloc_bioset(GFP_NOIO, 1 << order, &c->btree_bio),
|
||||
struct btree_write_bio, wbio.bio);
|
||||
wbio_init(&wbio->wbio.bio);
|
||||
wbio->data = data;
|
||||
wbio->cl = parent;
|
||||
wbio->wbio.order = order;
|
||||
wbio->wbio.used_mempool = used_mempool;
|
||||
wbio->wbio.bio.bi_opf = REQ_OP_WRITE|REQ_META|REQ_FUA;
|
||||
wbio->wbio.bio.bi_iter.bi_size = sectors_to_write << 9;
|
||||
wbio->wbio.bio.bi_end_io = btree_node_write_endio;
|
||||
wbio->wbio.bio.bi_private = b;
|
||||
|
||||
if (parent)
|
||||
closure_get(parent);
|
||||
|
||||
bch2_bio_map(&wbio->bio, data);
|
||||
bch2_bio_map(&wbio->wbio.bio, data);
|
||||
|
||||
/*
|
||||
* If we're appending to a leaf node, we don't technically need FUA -
|
||||
@ -1802,7 +1822,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
|
||||
b->written += sectors_to_write;
|
||||
|
||||
bch2_submit_wbio_replicas(wbio, c, BCH_DATA_BTREE, &k.key);
|
||||
bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_BTREE, &k.key);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_noevict(b);
|
||||
@ -1905,11 +1925,7 @@ void bch2_btree_verify_flushed(struct bch_fs *c)
|
||||
unsigned i;
|
||||
|
||||
rcu_read_lock();
|
||||
tbl = rht_dereference_rcu(c->btree_cache.table.tbl,
|
||||
&c->btree_cache.table);
|
||||
|
||||
for (i = 0; i < tbl->size; i++)
|
||||
rht_for_each_entry_rcu(b, pos, tbl, i, hash)
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
for_each_cached_btree(b, c, tbl, i, pos)
|
||||
BUG_ON(btree_node_dirty(b));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define _BCACHEFS_BTREE_IO_H
|
||||
|
||||
#include "extents.h"
|
||||
#include "io_types.h"
|
||||
|
||||
struct bch_fs;
|
||||
struct btree_write;
|
||||
@ -17,6 +18,13 @@ struct btree_read_bio {
|
||||
struct bio bio;
|
||||
};
|
||||
|
||||
struct btree_write_bio {
|
||||
struct closure *cl;
|
||||
void *data;
|
||||
struct work_struct work;
|
||||
struct bch_write_bio wbio;
|
||||
};
|
||||
|
||||
static inline void btree_node_io_unlock(struct btree *b)
|
||||
{
|
||||
EBUG_ON(!btree_node_write_in_flight(b));
|
||||
|
@ -202,21 +202,20 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
|
||||
|
||||
/* Btree iterator locking: */
|
||||
|
||||
|
||||
static void btree_iter_drop_extra_locks(struct btree_iter *iter)
|
||||
{
|
||||
unsigned l;
|
||||
|
||||
while (iter->nodes_locked &&
|
||||
(l = __fls(iter->nodes_locked)) > iter->locks_want) {
|
||||
if (!btree_node_locked(iter, l))
|
||||
panic("l %u nodes_locked %u\n", l, iter->nodes_locked);
|
||||
|
||||
if (l > iter->level) {
|
||||
btree_node_unlock(iter, l);
|
||||
} else if (btree_node_intent_locked(iter, l)) {
|
||||
six_lock_downgrade(&iter->nodes[l]->lock);
|
||||
iter->nodes_intent_locked ^= 1 << l;
|
||||
} else {
|
||||
if (btree_node_intent_locked(iter, l)) {
|
||||
six_lock_downgrade(&iter->nodes[l]->lock);
|
||||
iter->nodes_intent_locked ^= 1 << l;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -861,7 +860,8 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
i < iter->locks_want && iter->nodes[i];
|
||||
i++)
|
||||
if (!bch2_btree_node_relock(iter, i)) {
|
||||
while (iter->nodes[iter->level] &&
|
||||
while (iter->level < BTREE_MAX_DEPTH &&
|
||||
iter->nodes[iter->level] &&
|
||||
iter->level + 1 < iter->locks_want)
|
||||
btree_iter_up(iter);
|
||||
break;
|
||||
@ -872,7 +872,8 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
* If the current node isn't locked, go up until we have a locked node
|
||||
* or run out of nodes:
|
||||
*/
|
||||
while (iter->nodes[iter->level] &&
|
||||
while (iter->level < BTREE_MAX_DEPTH &&
|
||||
iter->nodes[iter->level] &&
|
||||
!(is_btree_node(iter, iter->level) &&
|
||||
bch2_btree_node_relock(iter, iter->level) &&
|
||||
btree_iter_pos_cmp(iter->pos,
|
||||
@ -884,7 +885,8 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
|
||||
* If we've got a btree node locked (i.e. we aren't about to relock the
|
||||
* root) - advance its node iterator if necessary:
|
||||
*/
|
||||
if (iter->nodes[iter->level]) {
|
||||
if (iter->level < BTREE_MAX_DEPTH &&
|
||||
iter->nodes[iter->level]) {
|
||||
struct bkey_s_c k;
|
||||
|
||||
while ((k = __btree_iter_peek_all(iter)).k &&
|
||||
@ -956,7 +958,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
|
||||
|
||||
btree_iter_up(iter);
|
||||
|
||||
if (!iter->nodes[iter->level])
|
||||
if (iter->level == BTREE_MAX_DEPTH ||
|
||||
!iter->nodes[iter->level])
|
||||
return NULL;
|
||||
|
||||
/* parent node usually won't be locked: redo traversal if necessary */
|
||||
|
@ -50,10 +50,8 @@ struct btree_iter {
|
||||
* always fail (but since freeing a btree node takes a write lock on the
|
||||
* node, which increments the node's lock seq, that's not actually
|
||||
* necessary in that example).
|
||||
*
|
||||
* One extra slot for a sentinel NULL:
|
||||
*/
|
||||
struct btree *nodes[BTREE_MAX_DEPTH + 1];
|
||||
struct btree *nodes[BTREE_MAX_DEPTH];
|
||||
struct btree_node_iter node_iters[BTREE_MAX_DEPTH];
|
||||
|
||||
/*
|
||||
|
@ -92,6 +92,7 @@ static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
|
||||
int lock_type = btree_node_locked_type(iter, level);
|
||||
|
||||
EBUG_ON(!level && iter->flags & BTREE_ITER_UPTODATE);
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
if (lock_type != BTREE_NODE_UNLOCKED)
|
||||
six_unlock_type(&iter->nodes[level]->lock, lock_type);
|
||||
@ -106,6 +107,8 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos,
|
||||
struct btree_iter *iter,
|
||||
enum six_lock_type type)
|
||||
{
|
||||
EBUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
|
||||
return likely(six_trylock_type(&b->lock, type)) ||
|
||||
__bch2_btree_node_lock(b, pos, level, iter, type);
|
||||
}
|
||||
|
@ -197,6 +197,7 @@ enum btree_flags {
|
||||
BTREE_NODE_write_in_flight,
|
||||
BTREE_NODE_just_written,
|
||||
BTREE_NODE_dying,
|
||||
BTREE_NODE_fake,
|
||||
};
|
||||
|
||||
BTREE_FLAG(read_in_flight);
|
||||
@ -209,6 +210,7 @@ BTREE_FLAG(accessed);
|
||||
BTREE_FLAG(write_in_flight);
|
||||
BTREE_FLAG(just_written);
|
||||
BTREE_FLAG(dying);
|
||||
BTREE_FLAG(fake);
|
||||
|
||||
static inline struct btree_write *btree_current_write(struct btree *b)
|
||||
{
|
||||
|
@ -546,8 +546,8 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
ret = bch2_check_mark_super(c, bkey_i_to_s_c_extent(&b->key),
|
||||
BCH_DATA_BTREE);
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_BTREE,
|
||||
bch2_bkey_devs(bkey_i_to_s_c(&b->key)));
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
@ -915,6 +915,10 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
|
||||
struct bset_tree *t;
|
||||
|
||||
set_btree_node_dying(b);
|
||||
|
||||
if (btree_node_fake(b))
|
||||
return;
|
||||
|
||||
btree_interior_update_add_node_reference(as, b);
|
||||
|
||||
/*
|
||||
@ -1052,7 +1056,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
|
||||
gc_pos_btree_root(b->btree_id),
|
||||
&stats, 0, 0);
|
||||
|
||||
if (old)
|
||||
if (old && !btree_node_fake(old))
|
||||
bch2_btree_node_free_index(as, NULL,
|
||||
bkey_i_to_s_c(&old->key),
|
||||
&stats);
|
||||
@ -1422,7 +1426,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
|
||||
|
||||
bch2_btree_node_lock_for_insert(c, b, iter);
|
||||
|
||||
if (bch_keylist_u64s(keys) > bch_btree_keys_u64s_remaining(c, b)) {
|
||||
if (!bch2_btree_node_insert_fits(c, b, bch_keylist_u64s(keys))) {
|
||||
bch2_btree_node_unlock_write(b, iter);
|
||||
return -1;
|
||||
}
|
||||
@ -1957,7 +1961,8 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_check_mark_super(c, extent_i_to_s_c(new_key), BCH_DATA_BTREE);
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_BTREE,
|
||||
bch2_extent_devs(extent_i_to_s_c(new_key)));
|
||||
if (ret)
|
||||
goto err_free_update;
|
||||
|
||||
@ -1993,45 +1998,43 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
|
||||
bch2_btree_set_root_ondisk(c, b, READ);
|
||||
}
|
||||
|
||||
int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id,
|
||||
struct closure *writes)
|
||||
void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
|
||||
{
|
||||
struct btree_update *as;
|
||||
struct closure cl;
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
memset(&as, 0, sizeof(as));
|
||||
closure_init_stack(&cl);
|
||||
|
||||
while (1) {
|
||||
/* XXX haven't calculated capacity yet :/ */
|
||||
as = bch2_btree_update_start(c, id, 1,
|
||||
BTREE_INSERT_USE_RESERVE|
|
||||
BTREE_INSERT_USE_ALLOC_RESERVE,
|
||||
&cl);
|
||||
do {
|
||||
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
|
||||
closure_sync(&cl);
|
||||
} while (ret);
|
||||
|
||||
if (!IS_ERR(as))
|
||||
break;
|
||||
b = bch2_btree_node_mem_alloc(c);
|
||||
bch2_btree_cache_cannibalize_unlock(c);
|
||||
|
||||
if (PTR_ERR(as) == -ENOSPC)
|
||||
return PTR_ERR(as);
|
||||
}
|
||||
set_btree_node_fake(b);
|
||||
b->level = 0;
|
||||
b->btree_id = id;
|
||||
|
||||
b = __btree_root_alloc(as, 0);
|
||||
bkey_extent_init(&b->key);
|
||||
b->key.k.p = POS_MAX;
|
||||
bkey_i_to_extent(&b->key)->v._data[0] = U64_MAX - id;
|
||||
|
||||
bch2_btree_node_write(c, b, writes, SIX_LOCK_intent);
|
||||
btree_update_drop_new_node(c, b);
|
||||
bch2_bset_init_first(b, &b->data->keys);
|
||||
bch2_btree_build_aux_trees(b);
|
||||
|
||||
BUG_ON(btree_node_root(c, b));
|
||||
b->data->min_key = POS_MIN;
|
||||
b->data->max_key = POS_MAX;
|
||||
b->data->format = bch2_btree_calc_format(b);
|
||||
btree_node_set_format(b, b->data->format);
|
||||
|
||||
bch2_btree_set_root_inmem(as, b);
|
||||
bch2_btree_set_root_ondisk(c, b, WRITE);
|
||||
ret = bch2_btree_node_hash_insert(&c->btree_cache, b, b->level, b->btree_id);
|
||||
BUG_ON(ret);
|
||||
|
||||
bch2_btree_open_bucket_put(c, b);
|
||||
__bch2_btree_set_root_inmem(c, b);
|
||||
|
||||
six_unlock_write(&b->lock);
|
||||
six_unlock_intent(&b->lock);
|
||||
|
||||
bch2_btree_update_free(as);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -150,7 +150,7 @@ int bch2_foreground_maybe_merge(struct bch_fs *, struct btree_iter *,
|
||||
enum btree_node_sibling);
|
||||
|
||||
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
|
||||
int bch2_btree_root_alloc(struct bch_fs *, enum btree_id, struct closure *);
|
||||
void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
|
||||
|
||||
static inline unsigned btree_update_reserve_required(struct bch_fs *c,
|
||||
struct btree *b)
|
||||
@ -280,6 +280,9 @@ static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
|
||||
static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
|
||||
struct btree *b, unsigned u64s)
|
||||
{
|
||||
if (unlikely(btree_node_fake(b)))
|
||||
return false;
|
||||
|
||||
if (btree_node_is_extents(b)) {
|
||||
/* The insert key might split an existing key
|
||||
* (bch2_insert_fixup_extent() -> BCH_EXTENT_OVERLAP_MIDDLE case:
|
||||
|
@ -258,6 +258,11 @@ static u64 reserve_factor(u64 r)
|
||||
return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR);
|
||||
}
|
||||
|
||||
static u64 avail_factor(u64 r)
|
||||
{
|
||||
return (r << RESERVE_FACTOR) / (1 << RESERVE_FACTOR) + 1;
|
||||
}
|
||||
|
||||
u64 __bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
{
|
||||
struct fs_usage_sum sum = __fs_usage_sum(stats);
|
||||
@ -270,6 +275,11 @@ u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
return min(c->capacity, __bch2_fs_sectors_used(c, stats));
|
||||
}
|
||||
|
||||
u64 bch2_fs_sectors_free(struct bch_fs *c, struct bch_fs_usage stats)
|
||||
{
|
||||
return avail_factor(c->capacity - bch2_fs_sectors_used(c, stats));
|
||||
}
|
||||
|
||||
static inline int is_unavailable_bucket(struct bucket_mark m)
|
||||
{
|
||||
return !is_available_bucket(m);
|
||||
@ -382,7 +392,6 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
|
||||
new.owned_by_allocator = 1;
|
||||
new.touched_this_mount = 1;
|
||||
new.data_type = 0;
|
||||
new.cached_sectors = 0;
|
||||
new.dirty_sectors = 0;
|
||||
@ -396,29 +405,6 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool bch2_mark_alloc_bucket_startup(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b)
|
||||
{
|
||||
struct bucket *g;
|
||||
struct bucket_mark new, old;
|
||||
|
||||
lg_local_lock(&c->usage_lock);
|
||||
g = bucket(ca, b);
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, g, new, ({
|
||||
if (!is_startup_available_bucket(new)) {
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
new.owned_by_allocator = 1;
|
||||
new.touched_this_mount = 1;
|
||||
}));
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
size_t b, bool owned_by_allocator,
|
||||
struct gc_pos pos, unsigned flags)
|
||||
@ -436,7 +422,6 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
}
|
||||
|
||||
old = bucket_data_cmpxchg(c, ca, g, new, ({
|
||||
new.touched_this_mount = 1;
|
||||
new.owned_by_allocator = owned_by_allocator;
|
||||
}));
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
@ -481,7 +466,6 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
|
||||
saturated_add(ca, new.dirty_sectors, sectors,
|
||||
GC_MAX_SECTORS_USED);
|
||||
new.data_type = type;
|
||||
new.touched_this_mount = 1;
|
||||
}));
|
||||
lg_local_unlock(&c->usage_lock);
|
||||
|
||||
@ -539,7 +523,6 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) {
|
||||
if (journal_seq)
|
||||
bucket_cmpxchg(g, new, ({
|
||||
new.touched_this_mount = 1;
|
||||
new.journal_seq_valid = 1;
|
||||
new.journal_seq = journal_seq;
|
||||
}));
|
||||
@ -588,8 +571,6 @@ static void bch2_mark_pointer(struct bch_fs *c,
|
||||
new.data_type = data_type;
|
||||
}
|
||||
|
||||
new.touched_this_mount = 1;
|
||||
|
||||
if (flags & BCH_BUCKET_MARK_NOATOMIC) {
|
||||
g->_mark = new;
|
||||
break;
|
||||
@ -694,17 +675,12 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
|
||||
|
||||
static u64 __recalc_sectors_available(struct bch_fs *c)
|
||||
{
|
||||
u64 avail;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0;
|
||||
|
||||
avail = c->capacity - bch2_fs_sectors_used(c, bch2_fs_usage_read(c));
|
||||
|
||||
avail <<= RESERVE_FACTOR;
|
||||
avail /= (1 << RESERVE_FACTOR) + 1;
|
||||
return avail;
|
||||
return bch2_fs_sectors_free(c, bch2_fs_usage_read(c));
|
||||
}
|
||||
|
||||
/* Used by gc when it's starting: */
|
||||
@ -839,7 +815,7 @@ static void buckets_free_rcu(struct rcu_head *rcu)
|
||||
|
||||
int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
|
||||
{
|
||||
struct bucket_array *buckets = NULL, *old_buckets;
|
||||
struct bucket_array *buckets = NULL, *old_buckets = NULL;
|
||||
unsigned long *buckets_dirty = NULL;
|
||||
u8 *oldest_gens = NULL;
|
||||
alloc_fifo free[RESERVE_NR];
|
||||
|
@ -184,6 +184,7 @@ void bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
|
||||
|
||||
u64 __bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
|
||||
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
|
||||
u64 bch2_fs_sectors_free(struct bch_fs *, struct bch_fs_usage);
|
||||
|
||||
static inline bool is_available_bucket(struct bucket_mark mark)
|
||||
{
|
||||
@ -192,11 +193,6 @@ static inline bool is_available_bucket(struct bucket_mark mark)
|
||||
!mark.nouse);
|
||||
}
|
||||
|
||||
static inline bool is_startup_available_bucket(struct bucket_mark mark)
|
||||
{
|
||||
return !mark.touched_this_mount && is_available_bucket(mark);
|
||||
}
|
||||
|
||||
static inline bool bucket_needs_journal_commit(struct bucket_mark m,
|
||||
u16 last_seq_ondisk)
|
||||
{
|
||||
@ -208,8 +204,6 @@ void bch2_bucket_seq_cleanup(struct bch_fs *);
|
||||
|
||||
bool bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *,
|
||||
size_t, struct bucket_mark *);
|
||||
bool bch2_mark_alloc_bucket_startup(struct bch_fs *, struct bch_dev *,
|
||||
size_t);
|
||||
void bch2_mark_alloc_bucket(struct bch_fs *, struct bch_dev *,
|
||||
size_t, bool, struct gc_pos, unsigned);
|
||||
void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
|
||||
|
@ -15,8 +15,7 @@ struct bucket_mark {
|
||||
gen_valid:1,
|
||||
owned_by_allocator:1,
|
||||
nouse:1,
|
||||
journal_seq_valid:1,
|
||||
touched_this_mount:1;
|
||||
journal_seq_valid:1;
|
||||
u16 dirty_sectors;
|
||||
u16 cached_sectors;
|
||||
|
||||
|
@ -64,7 +64,7 @@ found:
|
||||
static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
|
||||
{
|
||||
struct bch_ioctl_assemble arg;
|
||||
const char *err;
|
||||
struct bch_fs *c;
|
||||
u64 *user_devs = NULL;
|
||||
char **devs = NULL;
|
||||
unsigned i;
|
||||
@ -96,14 +96,10 @@ static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
|
||||
}
|
||||
}
|
||||
|
||||
err = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty(), NULL);
|
||||
if (err) {
|
||||
pr_err("Could not open filesystem: %s", err);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
|
||||
ret = PTR_ERR_OR_ZERO(c);
|
||||
if (!ret)
|
||||
closure_put(&c->cl);
|
||||
err:
|
||||
if (devs)
|
||||
for (i = 0; i < arg.nr_devs; i++)
|
||||
|
@ -58,7 +58,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
|
||||
if (IS_ERR_OR_NULL(pick.ca))
|
||||
return;
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_read_bio);
|
||||
bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_bio);
|
||||
bio->bi_bdev = pick.ca->disk_sb.bdev;
|
||||
bio->bi_opf = REQ_OP_READ|REQ_META;
|
||||
bio->bi_iter.bi_sector = pick.ptr.offset;
|
||||
|
@ -143,9 +143,6 @@ void bch2_flush_fsck_errs(struct bch_fs *);
|
||||
#define __fsck_err_on(cond, c, _flags, ...) \
|
||||
((cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false)
|
||||
|
||||
#define unfixable_fsck_err_on(cond, c, ...) \
|
||||
__fsck_err_on(cond, c, FSCK_CAN_IGNORE, ##__VA_ARGS__)
|
||||
|
||||
#define need_fsck_err_on(cond, c, ...) \
|
||||
__fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__)
|
||||
|
||||
|
@ -666,7 +666,7 @@ static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!bch2_sb_has_replicas(c, e, BCH_DATA_BTREE)) {
|
||||
if (!bch2_sb_has_replicas(c, BCH_DATA_BTREE, bch2_extent_devs(e))) {
|
||||
bch2_bkey_val_to_text(c, btree_node_type(b),
|
||||
buf, sizeof(buf), k);
|
||||
bch2_fs_bug(c,
|
||||
@ -1803,7 +1803,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
|
||||
}
|
||||
|
||||
if (!bkey_extent_is_cached(e.k) &&
|
||||
!bch2_sb_has_replicas(c, e, BCH_DATA_USER)) {
|
||||
!bch2_sb_has_replicas(c, BCH_DATA_USER, bch2_extent_devs(e))) {
|
||||
bch2_bkey_val_to_text(c, btree_node_type(b),
|
||||
buf, sizeof(buf), e.s_c);
|
||||
bch2_fs_bug(c,
|
||||
|
@ -426,6 +426,17 @@ static inline struct bch_devs_list bch2_extent_dirty_devs(struct bkey_s_c_extent
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case BCH_EXTENT:
|
||||
case BCH_EXTENT_CACHED:
|
||||
return bch2_extent_devs(bkey_s_c_to_extent(k));
|
||||
default:
|
||||
return (struct bch_devs_list) { .nr = 0 };
|
||||
}
|
||||
}
|
||||
|
||||
bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent,
|
||||
struct bch_extent_crc_unpacked);
|
||||
bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked);
|
||||
|
@ -57,6 +57,7 @@ do { \
|
||||
#define fifo_peek_back(fifo) ((fifo)->data[((fifo)->back - 1) & (fifo)->mask])
|
||||
|
||||
#define fifo_entry_idx(fifo, p) (((p) - &fifo_peek_front(fifo)) & (fifo)->mask)
|
||||
#define fifo_idx_entry(fifo, i) (fifo)->data[((fifo)->front + (i)) & (fifo)->mask]
|
||||
|
||||
#define fifo_push_back_ref(f) \
|
||||
(fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask])
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "journal.h"
|
||||
#include "io.h"
|
||||
#include "keylist.h"
|
||||
#include "quota.h"
|
||||
|
||||
#include <linux/aio.h>
|
||||
#include <linux/backing-dev.h>
|
||||
@ -56,14 +57,13 @@ struct bch_writepage_io {
|
||||
struct dio_write {
|
||||
struct closure cl;
|
||||
struct kiocb *req;
|
||||
struct bch_fs *c;
|
||||
loff_t offset;
|
||||
|
||||
struct iovec *iovec;
|
||||
struct iovec inline_vecs[UIO_FASTIOV];
|
||||
struct iov_iter iter;
|
||||
|
||||
struct task_struct *task;
|
||||
unsigned loop:1,
|
||||
sync:1,
|
||||
free_iov:1;
|
||||
|
||||
struct iov_iter iter;
|
||||
struct iovec inline_vecs[2];
|
||||
|
||||
/* must be last: */
|
||||
struct bchfs_write_op iop;
|
||||
@ -130,6 +130,7 @@ static int __must_check bch2_write_inode_size(struct bch_fs *c,
|
||||
static void __i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors)
|
||||
{
|
||||
inode->v.i_blocks += sectors;
|
||||
bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN);
|
||||
}
|
||||
|
||||
static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, int sectors)
|
||||
@ -1286,7 +1287,8 @@ static int bch2_read_single_page(struct page *page,
|
||||
int ret;
|
||||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
|
||||
rbio = to_rbio(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read));
|
||||
rbio = rbio_init(bio_alloc_bioset(GFP_NOFS, 1, &c->bio_read),
|
||||
io_opts(c, inode));
|
||||
rbio->bio.bi_private = &done;
|
||||
rbio->bio.bi_end_io = bch2_read_single_page_end_io;
|
||||
|
||||
@ -1439,13 +1441,15 @@ static void bch2_direct_IO_read_split_endio(struct bio *bio)
|
||||
bio_check_pages_dirty(bio); /* transfers ownership */
|
||||
}
|
||||
|
||||
static int bch2_direct_IO_read(struct bch_fs *c, struct kiocb *req,
|
||||
struct file *file, struct bch_inode_info *inode,
|
||||
struct iov_iter *iter, loff_t offset)
|
||||
static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
|
||||
{
|
||||
struct file *file = req->ki_filp;
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_io_opts opts = io_opts(c, inode);
|
||||
struct dio_read *dio;
|
||||
struct bio *bio;
|
||||
loff_t offset = req->ki_pos;
|
||||
bool sync = is_sync_kiocb(req);
|
||||
ssize_t ret;
|
||||
|
||||
@ -1525,103 +1529,128 @@ start:
|
||||
}
|
||||
}
|
||||
|
||||
static long __bch2_dio_write_complete(struct dio_write *dio)
|
||||
static void bch2_dio_write_loop_async(struct closure *);
|
||||
|
||||
static long bch2_dio_write_loop(struct dio_write *dio)
|
||||
{
|
||||
struct file *file = dio->req->ki_filp;
|
||||
struct kiocb *req = dio->req;
|
||||
struct file *file = req->ki_filp;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
long ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9);
|
||||
|
||||
bch2_disk_reservation_put(dio->c, &dio->iop.op.res);
|
||||
|
||||
__pagecache_block_put(&mapping->add_lock);
|
||||
inode_dio_end(&inode->v);
|
||||
|
||||
if (dio->iovec && dio->iovec != dio->inline_vecs)
|
||||
kfree(dio->iovec);
|
||||
|
||||
bio_put(&dio->iop.op.wbio.bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_dio_write_complete(struct closure *cl)
|
||||
{
|
||||
struct dio_write *dio = container_of(cl, struct dio_write, cl);
|
||||
struct kiocb *req = dio->req;
|
||||
|
||||
req->ki_complete(req, __bch2_dio_write_complete(dio), 0);
|
||||
}
|
||||
|
||||
static void bch2_dio_write_done(struct dio_write *dio)
|
||||
{
|
||||
struct bio *bio = &dio->iop.op.wbio.bio;
|
||||
struct bio_vec *bv;
|
||||
bool sync;
|
||||
long ret;
|
||||
int i;
|
||||
|
||||
bio_for_each_segment_all(bv, &dio->iop.op.wbio.bio, i)
|
||||
put_page(bv->bv_page);
|
||||
if (dio->loop)
|
||||
goto loop;
|
||||
|
||||
if (dio->iter.count)
|
||||
bio_reset(&dio->iop.op.wbio.bio);
|
||||
}
|
||||
inode_dio_begin(&inode->v);
|
||||
__pagecache_block_get(&mapping->add_lock);
|
||||
|
||||
static void bch2_do_direct_IO_write(struct dio_write *dio)
|
||||
{
|
||||
struct file *file = dio->req->ki_filp;
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
struct bio *bio = &dio->iop.op.wbio.bio;
|
||||
int ret;
|
||||
/* Write and invalidate pagecache range that we're writing to: */
|
||||
ret = write_invalidate_inode_pages_range(mapping, req->ki_pos,
|
||||
req->ki_pos + iov_iter_count(&dio->iter) - 1);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
ret = bio_iov_iter_get_pages(bio, &dio->iter);
|
||||
if (ret < 0) {
|
||||
dio->iop.op.error = ret;
|
||||
return;
|
||||
while (1) {
|
||||
BUG_ON(current->pagecache_lock);
|
||||
current->pagecache_lock = &mapping->add_lock;
|
||||
if (current != dio->task)
|
||||
use_mm(dio->task->mm);
|
||||
|
||||
ret = bio_iov_iter_get_pages(bio, &dio->iter);
|
||||
|
||||
if (current != dio->task)
|
||||
unuse_mm(dio->task->mm);
|
||||
current->pagecache_lock = NULL;
|
||||
|
||||
if (unlikely(ret < 0))
|
||||
goto err;
|
||||
|
||||
dio->iop.op.pos = POS(inode->v.i_ino,
|
||||
(req->ki_pos >> 9) + dio->iop.op.written);
|
||||
|
||||
task_io_account_write(bio->bi_iter.bi_size);
|
||||
|
||||
closure_call(&dio->iop.op.cl, bch2_write, NULL, &dio->cl);
|
||||
|
||||
if (!dio->sync && !dio->loop && dio->iter.count) {
|
||||
struct iovec *iov = dio->inline_vecs;
|
||||
|
||||
if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
|
||||
iov = kmalloc(dio->iter.nr_segs * sizeof(*iov),
|
||||
GFP_KERNEL);
|
||||
if (unlikely(!iov)) {
|
||||
dio->iop.op.error = -ENOMEM;
|
||||
goto err_wait_io;
|
||||
}
|
||||
|
||||
dio->free_iov = true;
|
||||
}
|
||||
|
||||
memcpy(iov, dio->iter.iov, dio->iter.nr_segs * sizeof(*iov));
|
||||
dio->iter.iov = iov;
|
||||
}
|
||||
err_wait_io:
|
||||
dio->loop = true;
|
||||
|
||||
if (!dio->sync) {
|
||||
continue_at_noreturn(&dio->cl,
|
||||
bch2_dio_write_loop_async, NULL);
|
||||
return -EIOCBQUEUED;
|
||||
}
|
||||
|
||||
closure_sync(&dio->cl);
|
||||
loop:
|
||||
bio_for_each_segment_all(bv, bio, i)
|
||||
put_page(bv->bv_page);
|
||||
if (!dio->iter.count || dio->iop.op.error)
|
||||
break;
|
||||
bio_reset(bio);
|
||||
}
|
||||
|
||||
dio->iop.op.pos = POS(inode->v.i_ino, (dio->offset >> 9) + dio->iop.op.written);
|
||||
ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9);
|
||||
err:
|
||||
__pagecache_block_put(&mapping->add_lock);
|
||||
inode_dio_end(&inode->v);
|
||||
bch2_disk_reservation_put(dio->iop.op.c, &dio->iop.op.res);
|
||||
|
||||
task_io_account_write(bio->bi_iter.bi_size);
|
||||
if (dio->free_iov)
|
||||
kfree(dio->iter.iov);
|
||||
|
||||
closure_call(&dio->iop.op.cl, bch2_write, NULL, &dio->cl);
|
||||
closure_debug_destroy(&dio->cl);
|
||||
|
||||
sync = dio->sync;
|
||||
bio_put(bio);
|
||||
|
||||
if (!sync) {
|
||||
req->ki_complete(req, ret, 0);
|
||||
ret = -EIOCBQUEUED;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_dio_write_loop_async(struct closure *cl)
|
||||
{
|
||||
struct dio_write *dio =
|
||||
container_of(cl, struct dio_write, cl);
|
||||
struct address_space *mapping = dio->req->ki_filp->f_mapping;
|
||||
struct dio_write *dio = container_of(cl, struct dio_write, cl);
|
||||
|
||||
bch2_dio_write_done(dio);
|
||||
|
||||
if (dio->iter.count && !dio->iop.op.error) {
|
||||
use_mm(dio->task->mm);
|
||||
pagecache_block_get(&mapping->add_lock);
|
||||
|
||||
bch2_do_direct_IO_write(dio);
|
||||
|
||||
pagecache_block_put(&mapping->add_lock);
|
||||
unuse_mm(dio->task->mm);
|
||||
|
||||
continue_at(&dio->cl, bch2_dio_write_loop_async, NULL);
|
||||
} else {
|
||||
#if 0
|
||||
closure_return_with_destructor(cl, bch2_dio_write_complete);
|
||||
#else
|
||||
closure_debug_destroy(cl);
|
||||
bch2_dio_write_complete(cl);
|
||||
#endif
|
||||
}
|
||||
bch2_dio_write_loop(dio);
|
||||
}
|
||||
|
||||
static int bch2_direct_IO_write(struct bch_fs *c,
|
||||
struct kiocb *req, struct file *file,
|
||||
struct bch_inode_info *inode,
|
||||
struct iov_iter *iter, loff_t offset)
|
||||
static int bch2_direct_IO_write(struct kiocb *req,
|
||||
struct iov_iter *iter,
|
||||
bool swap)
|
||||
{
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct file *file = req->ki_filp;
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct dio_write *dio;
|
||||
struct bio *bio;
|
||||
loff_t offset = req->ki_pos;
|
||||
ssize_t ret;
|
||||
bool sync = is_sync_kiocb(req);
|
||||
|
||||
lockdep_assert_held(&inode->v.i_rwsem);
|
||||
|
||||
@ -1637,95 +1666,49 @@ static int bch2_direct_IO_write(struct bch_fs *c,
|
||||
dio = container_of(bio, struct dio_write, iop.op.wbio.bio);
|
||||
closure_init(&dio->cl, NULL);
|
||||
dio->req = req;
|
||||
dio->c = c;
|
||||
dio->offset = offset;
|
||||
dio->iovec = NULL;
|
||||
dio->iter = *iter;
|
||||
dio->task = current;
|
||||
dio->loop = false;
|
||||
dio->sync = is_sync_kiocb(req) ||
|
||||
offset + iter->count > inode->v.i_size;
|
||||
dio->free_iov = false;
|
||||
dio->iter = *iter;
|
||||
bch2_fswrite_op_init(&dio->iop, c, inode, io_opts(c, inode), true);
|
||||
dio->iop.op.write_point = writepoint_hashed((unsigned long) dio->task);
|
||||
dio->iop.op.flags |= BCH_WRITE_NOPUT_RESERVATION;
|
||||
|
||||
if ((dio->req->ki_flags & IOCB_DSYNC) &&
|
||||
if ((req->ki_flags & IOCB_DSYNC) &&
|
||||
!c->opts.journal_flush_disabled)
|
||||
dio->iop.op.flags |= BCH_WRITE_FLUSH;
|
||||
|
||||
if (offset + iter->count > inode->v.i_size)
|
||||
sync = true;
|
||||
|
||||
/*
|
||||
* XXX: we shouldn't return -ENOSPC if we're overwriting existing data -
|
||||
* if getting a reservation fails we should check if we are doing an
|
||||
* overwrite.
|
||||
*
|
||||
* Have to then guard against racing with truncate (deleting data that
|
||||
* we would have been overwriting)
|
||||
*/
|
||||
ret = bch2_disk_reservation_get(c, &dio->iop.op.res, iter->count >> 9, 0);
|
||||
if (unlikely(ret)) {
|
||||
if (bch2_check_range_allocated(c, POS(inode->v.i_ino,
|
||||
offset >> 9),
|
||||
iter->count >> 9)) {
|
||||
closure_debug_destroy(&dio->cl);
|
||||
bio_put(bio);
|
||||
return ret;
|
||||
}
|
||||
iter->count >> 9))
|
||||
goto err;
|
||||
|
||||
dio->iop.unalloc = true;
|
||||
}
|
||||
|
||||
dio->iop.op.nr_replicas = dio->iop.op.res.nr_replicas;
|
||||
|
||||
inode_dio_begin(&inode->v);
|
||||
__pagecache_block_get(&mapping->add_lock);
|
||||
|
||||
if (sync) {
|
||||
do {
|
||||
bch2_do_direct_IO_write(dio);
|
||||
|
||||
closure_sync(&dio->cl);
|
||||
bch2_dio_write_done(dio);
|
||||
} while (dio->iter.count && !dio->iop.op.error);
|
||||
|
||||
closure_debug_destroy(&dio->cl);
|
||||
return __bch2_dio_write_complete(dio);
|
||||
} else {
|
||||
bch2_do_direct_IO_write(dio);
|
||||
|
||||
if (dio->iter.count && !dio->iop.op.error) {
|
||||
if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
|
||||
dio->iovec = kmalloc(dio->iter.nr_segs *
|
||||
sizeof(struct iovec),
|
||||
GFP_KERNEL);
|
||||
if (!dio->iovec)
|
||||
dio->iop.op.error = -ENOMEM;
|
||||
} else {
|
||||
dio->iovec = dio->inline_vecs;
|
||||
}
|
||||
|
||||
memcpy(dio->iovec,
|
||||
dio->iter.iov,
|
||||
dio->iter.nr_segs * sizeof(struct iovec));
|
||||
dio->iter.iov = dio->iovec;
|
||||
}
|
||||
|
||||
continue_at_noreturn(&dio->cl, bch2_dio_write_loop_async, NULL);
|
||||
return -EIOCBQUEUED;
|
||||
}
|
||||
return bch2_dio_write_loop(dio);
|
||||
err:
|
||||
bch2_disk_reservation_put(c, &dio->iop.op.res);
|
||||
closure_debug_destroy(&dio->cl);
|
||||
bio_put(bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t bch2_direct_IO(struct kiocb *req, struct iov_iter *iter)
|
||||
{
|
||||
struct file *file = req->ki_filp;
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct blk_plug plug;
|
||||
ssize_t ret;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
ret = ((iov_iter_rw(iter) == WRITE)
|
||||
? bch2_direct_IO_write
|
||||
: bch2_direct_IO_read)(c, req, file, inode, iter, req->ki_pos);
|
||||
ret = iov_iter_rw(iter) == WRITE
|
||||
? bch2_direct_IO_write(req, iter, false)
|
||||
: bch2_direct_IO_read(req, iter);
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
return ret;
|
||||
@ -1734,26 +1717,7 @@ ssize_t bch2_direct_IO(struct kiocb *req, struct iov_iter *iter)
|
||||
static ssize_t
|
||||
bch2_direct_write(struct kiocb *iocb, struct iov_iter *iter)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct bch_inode_info *inode = file_bch_inode(file);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
loff_t pos = iocb->ki_pos;
|
||||
ssize_t ret;
|
||||
|
||||
pagecache_block_get(&mapping->add_lock);
|
||||
|
||||
/* Write and invalidate pagecache range that we're writing to: */
|
||||
ret = write_invalidate_inode_pages_range(file->f_mapping, pos,
|
||||
pos + iov_iter_count(iter) - 1);
|
||||
if (unlikely(ret))
|
||||
goto err;
|
||||
|
||||
ret = bch2_direct_IO_write(c, iocb, file, inode, iter, pos);
|
||||
err:
|
||||
pagecache_block_put(&mapping->add_lock);
|
||||
|
||||
return ret;
|
||||
return bch2_direct_IO_write(iocb, iter, true);
|
||||
}
|
||||
|
||||
static ssize_t __bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "chardev.h"
|
||||
#include "fs.h"
|
||||
#include "fs-ioctl.h"
|
||||
#include "quota.h"
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/mount.h>
|
||||
@ -154,10 +155,32 @@ static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode,
|
||||
struct fsxattr fa = { 0 };
|
||||
|
||||
fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags);
|
||||
fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ];
|
||||
|
||||
return copy_to_user(arg, &fa, sizeof(fa));
|
||||
}
|
||||
|
||||
static int bch2_set_projid(struct bch_fs *c,
|
||||
struct bch_inode_info *inode,
|
||||
u32 projid)
|
||||
{
|
||||
struct bch_qid qid = inode->ei_qid;
|
||||
int ret;
|
||||
|
||||
if (projid == inode->ei_qid.q[QTYP_PRJ])
|
||||
return 0;
|
||||
|
||||
qid.q[QTYP_PRJ] = projid;
|
||||
|
||||
ret = bch2_quota_transfer(c, 1 << QTYP_PRJ, qid, inode->ei_qid,
|
||||
inode->v.i_blocks);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
inode->ei_qid.q[QTYP_PRJ] = projid;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_ioc_fssetxattr(struct bch_fs *c,
|
||||
struct file *file,
|
||||
struct bch_inode_info *inode,
|
||||
@ -185,9 +208,14 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
|
||||
}
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
ret = bch2_set_projid(c, inode, fa.fsx_projid);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
|
||||
ret = __bch2_write_inode(c, inode, bch2_inode_flags_set, &flags);
|
||||
if (!ret)
|
||||
bch2_inode_flags_to_vfs(inode);
|
||||
err_unlock:
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
err:
|
||||
inode_unlock(&inode->v);
|
||||
|
236
libbcachefs/fs.c
236
libbcachefs/fs.c
@ -15,6 +15,7 @@
|
||||
#include "io.h"
|
||||
#include "journal.h"
|
||||
#include "keylist.h"
|
||||
#include "quota.h"
|
||||
#include "super.h"
|
||||
#include "xattr.h"
|
||||
|
||||
@ -116,6 +117,7 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
|
||||
inode_u.bi_mode = inode->v.i_mode;
|
||||
inode_u.bi_uid = i_uid_read(&inode->v);
|
||||
inode_u.bi_gid = i_gid_read(&inode->v);
|
||||
inode_u.bi_project = inode->ei_qid.q[QTYP_PRJ];
|
||||
inode_u.bi_nlink= i_nlink - nlink_bias(inode->v.i_mode);
|
||||
inode_u.bi_dev = inode->v.i_rdev;
|
||||
inode_u.bi_atime= timespec_to_bch2_time(c, inode->v.i_atime);
|
||||
@ -131,8 +133,10 @@ int __must_check __bch2_write_inode(struct bch_fs *c,
|
||||
BTREE_INSERT_ENTRY(&iter, &inode_p.inode.k_i));
|
||||
} while (ret == -EINTR);
|
||||
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
inode->ei_inode = inode_u;
|
||||
inode->ei_qid = bch_qid(&inode_u);
|
||||
}
|
||||
out:
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
@ -215,7 +219,7 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
|
||||
ret = posix_acl_create(&dir->v, &inode->v.i_mode, &default_acl, &acl);
|
||||
if (ret) {
|
||||
make_bad_inode(&inode->v);
|
||||
goto err;
|
||||
goto err_make_bad;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -225,16 +229,20 @@ static struct bch_inode_info *bch2_vfs_inode_create(struct bch_fs *c,
|
||||
inode->v.i_mode, rdev,
|
||||
&dir->ei_inode);
|
||||
|
||||
inode_u.bi_project = dir->ei_qid.q[QTYP_PRJ];
|
||||
|
||||
ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC);
|
||||
if (ret) {
|
||||
make_bad_inode(&inode->v);
|
||||
goto err_make_bad;
|
||||
}
|
||||
|
||||
ret = bch2_inode_create(c, &inode_u,
|
||||
BLOCKDEV_INODE_MAX, 0,
|
||||
&c->unused_inode_hint);
|
||||
if (unlikely(ret)) {
|
||||
/*
|
||||
* indicate to bch_evict_inode that the inode was never actually
|
||||
* created:
|
||||
*/
|
||||
make_bad_inode(&inode->v);
|
||||
goto err;
|
||||
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN);
|
||||
goto err_make_bad;
|
||||
}
|
||||
|
||||
bch2_vfs_inode_init(c, inode, &inode_u);
|
||||
@ -257,6 +265,12 @@ out:
|
||||
posix_acl_release(default_acl);
|
||||
posix_acl_release(acl);
|
||||
return inode;
|
||||
err_make_bad:
|
||||
/*
|
||||
* indicate to bch_evict_inode that the inode was never actually
|
||||
* created:
|
||||
*/
|
||||
make_bad_inode(&inode->v);
|
||||
err:
|
||||
clear_nlink(&inode->v);
|
||||
iput(&inode->v);
|
||||
@ -604,11 +618,53 @@ static int bch2_rename2(struct inode *old_vdir, struct dentry *old_dentry,
|
||||
return bch2_rename(c, old_dir, old_dentry, new_dir, new_dentry);
|
||||
}
|
||||
|
||||
static int bch2_setattr_nonsize(struct bch_inode_info *inode, struct iattr *iattr)
|
||||
{
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
struct bch_qid qid = inode->ei_qid;
|
||||
unsigned qtypes = 0;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
|
||||
if (c->opts.usrquota &&
|
||||
(iattr->ia_valid & ATTR_UID) &&
|
||||
!uid_eq(iattr->ia_uid, inode->v.i_uid)) {
|
||||
qid.q[QTYP_USR] = from_kuid(&init_user_ns, iattr->ia_uid),
|
||||
qtypes |= 1 << QTYP_USR;
|
||||
}
|
||||
|
||||
if (c->opts.grpquota &&
|
||||
(iattr->ia_valid & ATTR_GID) &&
|
||||
!gid_eq(iattr->ia_gid, inode->v.i_gid)) {
|
||||
qid.q[QTYP_GRP] = from_kgid(&init_user_ns, iattr->ia_gid);
|
||||
qtypes |= 1 << QTYP_GRP;
|
||||
}
|
||||
|
||||
if (qtypes) {
|
||||
ret = bch2_quota_transfer(c, qtypes, qid, inode->ei_qid,
|
||||
inode->v.i_blocks);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
setattr_copy(&inode->v, iattr);
|
||||
|
||||
ret = bch2_write_inode(c, inode);
|
||||
out_unlock:
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
|
||||
if (!ret &&
|
||||
iattr->ia_valid & ATTR_MODE)
|
||||
ret = posix_acl_chmod(&inode->v, inode->v.i_mode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
|
||||
{
|
||||
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
|
||||
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&inode->v.i_rwsem);
|
||||
|
||||
@ -616,22 +672,9 @@ static int bch2_setattr(struct dentry *dentry, struct iattr *iattr)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (iattr->ia_valid & ATTR_SIZE) {
|
||||
ret = bch2_truncate(inode, iattr);
|
||||
} else {
|
||||
mutex_lock(&inode->ei_update_lock);
|
||||
setattr_copy(&inode->v, iattr);
|
||||
ret = bch2_write_inode(c, inode);
|
||||
mutex_unlock(&inode->ei_update_lock);
|
||||
}
|
||||
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
if (iattr->ia_valid & ATTR_MODE)
|
||||
ret = posix_acl_chmod(&inode->v, inode->v.i_mode);
|
||||
|
||||
return ret;
|
||||
return iattr->ia_valid & ATTR_SIZE
|
||||
? bch2_truncate(inode, iattr)
|
||||
: bch2_setattr_nonsize(inode, iattr);
|
||||
}
|
||||
|
||||
static int bch2_tmpfile(struct inode *vdir, struct dentry *dentry, umode_t mode)
|
||||
@ -910,6 +953,7 @@ static void bch2_vfs_inode_init(struct bch_fs *c,
|
||||
inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
|
||||
|
||||
inode->ei_journal_seq = 0;
|
||||
inode->ei_qid = bch_qid(bi);
|
||||
inode->ei_str_hash = bch2_hash_info_init(c, bi);
|
||||
inode->ei_inode = *bi;
|
||||
|
||||
@ -995,6 +1039,10 @@ static void bch2_evict_inode(struct inode *vinode)
|
||||
clear_inode(&inode->v);
|
||||
|
||||
if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
|
||||
bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
|
||||
BCH_QUOTA_WARN);
|
||||
bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
|
||||
BCH_QUOTA_WARN);
|
||||
bch2_inode_rm(c, inode->v.i_ino);
|
||||
atomic_long_dec(&c->nr_inodes);
|
||||
}
|
||||
@ -1009,8 +1057,7 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
buf->f_type = BCACHEFS_STATFS_MAGIC;
|
||||
buf->f_bsize = sb->s_blocksize;
|
||||
buf->f_blocks = c->capacity >> PAGE_SECTOR_SHIFT;
|
||||
buf->f_bfree = (c->capacity -
|
||||
bch2_fs_sectors_used(c, bch2_fs_usage_read(c))) >>
|
||||
buf->f_bfree = bch2_fs_sectors_free(c, bch2_fs_usage_read(c)) >>
|
||||
PAGE_SECTOR_SHIFT;
|
||||
buf->f_bavail = buf->f_bfree;
|
||||
buf->f_files = atomic_long_read(&c->nr_inodes);
|
||||
@ -1037,17 +1084,83 @@ static int bch2_sync_fs(struct super_block *sb, int wait)
|
||||
return bch2_journal_flush(&c->journal);
|
||||
}
|
||||
|
||||
static struct bch_fs *bch2_path_to_fs(const char *dev)
|
||||
{
|
||||
struct bch_fs *c;
|
||||
struct block_device *bdev = lookup_bdev(dev);
|
||||
|
||||
if (IS_ERR(bdev))
|
||||
return ERR_CAST(bdev);
|
||||
|
||||
c = bch2_bdev_to_fs(bdev);
|
||||
bdput(bdev);
|
||||
return c ?: ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * const *devs,
|
||||
unsigned nr_devs, struct bch_opts opts)
|
||||
{
|
||||
struct bch_fs *c, *c1, *c2;
|
||||
size_t i;
|
||||
|
||||
if (!nr_devs)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
c = bch2_fs_open(devs, nr_devs, opts);
|
||||
|
||||
if (IS_ERR(c) && PTR_ERR(c) == -EBUSY) {
|
||||
/*
|
||||
* Already open?
|
||||
* Look up each block device, make sure they all belong to a
|
||||
* filesystem and they all belong to the _same_ filesystem
|
||||
*/
|
||||
|
||||
c1 = bch2_path_to_fs(devs[0]);
|
||||
if (!c1)
|
||||
return c;
|
||||
|
||||
for (i = 1; i < nr_devs; i++) {
|
||||
c2 = bch2_path_to_fs(devs[i]);
|
||||
if (!IS_ERR(c2))
|
||||
closure_put(&c2->cl);
|
||||
|
||||
if (c1 != c2) {
|
||||
closure_put(&c1->cl);
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
c = c1;
|
||||
}
|
||||
|
||||
if (IS_ERR(c))
|
||||
return c;
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
if (!bch2_fs_running(c)) {
|
||||
mutex_unlock(&c->state_lock);
|
||||
closure_put(&c->cl);
|
||||
pr_err("err mounting %s: incomplete filesystem", dev_name);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
mutex_unlock(&c->state_lock);
|
||||
|
||||
set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
|
||||
return c;
|
||||
}
|
||||
|
||||
static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name,
|
||||
struct bch_opts opts)
|
||||
{
|
||||
size_t nr_devs = 0, i = 0;
|
||||
char *dev_name, *s, **devs;
|
||||
struct bch_fs *c = NULL;
|
||||
const char *err = "cannot allocate memory";
|
||||
char *dev_name = NULL, **devs = NULL, *s;
|
||||
struct bch_fs *c = ERR_PTR(-ENOMEM);
|
||||
size_t i, nr_devs = 0;
|
||||
|
||||
dev_name = kstrdup(_dev_name, GFP_KERNEL);
|
||||
if (!dev_name)
|
||||
return NULL;
|
||||
goto err;
|
||||
|
||||
for (s = dev_name; s; s = strchr(s + 1, ':'))
|
||||
nr_devs++;
|
||||
@ -1061,57 +1174,10 @@ static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name,
|
||||
(s = strchr(s, ':')) && (*s++ = '\0'))
|
||||
devs[i++] = s;
|
||||
|
||||
err = bch2_fs_open(devs, nr_devs, opts, &c);
|
||||
if (err) {
|
||||
/*
|
||||
* Already open?
|
||||
* Look up each block device, make sure they all belong to a
|
||||
* filesystem and they all belong to the _same_ filesystem
|
||||
*/
|
||||
|
||||
for (i = 0; i < nr_devs; i++) {
|
||||
struct block_device *bdev = lookup_bdev(devs[i]);
|
||||
struct bch_fs *c2;
|
||||
|
||||
if (IS_ERR(bdev))
|
||||
goto err;
|
||||
|
||||
c2 = bch2_bdev_to_fs(bdev);
|
||||
bdput(bdev);
|
||||
|
||||
if (!c)
|
||||
c = c2;
|
||||
else if (c2)
|
||||
closure_put(&c2->cl);
|
||||
|
||||
if (!c)
|
||||
goto err;
|
||||
if (c != c2) {
|
||||
closure_put(&c->cl);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
if (!bch2_fs_running(c)) {
|
||||
mutex_unlock(&c->state_lock);
|
||||
closure_put(&c->cl);
|
||||
err = "incomplete filesystem";
|
||||
c = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->state_lock);
|
||||
}
|
||||
|
||||
set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
|
||||
c = __bch2_open_as_blockdevs(_dev_name, devs, nr_devs, opts);
|
||||
err:
|
||||
kfree(devs);
|
||||
kfree(dev_name);
|
||||
|
||||
if (!c)
|
||||
pr_err("bch_fs_open err %s", err);
|
||||
return c;
|
||||
}
|
||||
|
||||
@ -1234,8 +1300,8 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
|
||||
return ERR_PTR(ret);
|
||||
|
||||
c = bch2_open_as_blockdevs(dev_name, opts);
|
||||
if (!c)
|
||||
return ERR_PTR(-ENOENT);
|
||||
if (IS_ERR(c))
|
||||
return ERR_CAST(c);
|
||||
|
||||
sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|MS_NOSEC, c);
|
||||
if (IS_ERR(sb)) {
|
||||
@ -1261,6 +1327,10 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
|
||||
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
||||
sb->s_op = &bch_super_operations;
|
||||
sb->s_export_op = &bch_export_ops;
|
||||
#ifdef CONFIG_BCACHEFS_QUOTA
|
||||
sb->s_qcop = &bch2_quotactl_operations;
|
||||
sb->s_quota_types = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
|
||||
#endif
|
||||
sb->s_xattr = bch2_xattr_handlers;
|
||||
sb->s_magic = BCACHEFS_STATFS_MAGIC;
|
||||
sb->s_time_gran = c->sb.time_precision;
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include "opts.h"
|
||||
#include "str_hash.h"
|
||||
#include "quota_types.h"
|
||||
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/stat.h>
|
||||
@ -13,6 +14,7 @@ struct bch_inode_info {
|
||||
struct mutex ei_update_lock;
|
||||
u64 ei_journal_seq;
|
||||
unsigned long ei_last_dirtied;
|
||||
struct bch_qid ei_qid;
|
||||
|
||||
struct bch_hash_info ei_str_hash;
|
||||
|
||||
|
@ -266,26 +266,60 @@ static int check_extents(struct bch_fs *c)
|
||||
!S_ISREG(w.inode.bi_mode) && !S_ISLNK(w.inode.bi_mode), c,
|
||||
"extent type %u for non regular file, inode %llu mode %o",
|
||||
k.k->type, k.k->p.inode, w.inode.bi_mode)) {
|
||||
ret = bch2_btree_delete_at(&iter, 0);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
ret = bch2_inode_truncate(c, k.k->p.inode, 0, NULL, NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
continue;
|
||||
}
|
||||
|
||||
unfixable_fsck_err_on(w.first_this_inode &&
|
||||
if (fsck_err_on(w.first_this_inode &&
|
||||
w.have_inode &&
|
||||
!(w.inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY) &&
|
||||
w.inode.bi_sectors !=
|
||||
(i_sectors = bch2_count_inode_sectors(c, w.cur_inum)),
|
||||
c, "i_sectors wrong: got %llu, should be %llu",
|
||||
w.inode.bi_sectors, i_sectors);
|
||||
w.inode.bi_sectors, i_sectors)) {
|
||||
struct bkey_inode_buf p;
|
||||
|
||||
unfixable_fsck_err_on(w.have_inode &&
|
||||
w.inode.bi_sectors = i_sectors;
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
bch2_inode_pack(&p, &w.inode);
|
||||
|
||||
ret = bch2_btree_insert(c, BTREE_ID_INODES,
|
||||
&p.inode.k_i,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
BTREE_INSERT_NOFAIL);
|
||||
if (ret) {
|
||||
bch_err(c, "error in fs gc: error %i "
|
||||
"updating inode", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* revalidate iterator: */
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
}
|
||||
|
||||
if (fsck_err_on(w.have_inode &&
|
||||
!(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
|
||||
k.k->type != BCH_RESERVATION &&
|
||||
k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
|
||||
"extent type %u offset %llu past end of inode %llu, i_size %llu",
|
||||
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size);
|
||||
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
ret = bch2_inode_truncate(c, k.k->p.inode,
|
||||
round_up(w.inode.bi_size, PAGE_SIZE) >> 9,
|
||||
NULL, NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
@ -999,7 +1033,7 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
|
||||
u64 nlinks_pos;
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0), 0);
|
||||
genradix_iter_init(&nlinks_iter);
|
||||
nlinks_iter = genradix_iter_init(links, 0);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!btree_iter_err(k)) {
|
||||
|
@ -268,7 +268,8 @@ static void bch2_write_index(struct closure *cl)
|
||||
}
|
||||
|
||||
if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) {
|
||||
ret = bch2_check_mark_super(c, e.c, BCH_DATA_USER);
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_extent_devs(e.c));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
@ -67,10 +67,7 @@ struct bch_read_bio {
|
||||
struct bch_write_bio {
|
||||
struct bch_fs *c;
|
||||
struct bch_dev *ca;
|
||||
union {
|
||||
struct bch_write_bio *parent;
|
||||
struct closure *cl;
|
||||
};
|
||||
|
||||
struct bch_devs_list failed;
|
||||
u8 order;
|
||||
@ -82,7 +79,6 @@ struct bch_write_bio {
|
||||
used_mempool:1;
|
||||
|
||||
unsigned submit_time_us;
|
||||
void *data;
|
||||
|
||||
struct bio bio;
|
||||
};
|
||||
@ -94,7 +90,7 @@ struct bch_write_op {
|
||||
|
||||
unsigned written; /* sectors */
|
||||
u16 flags;
|
||||
s8 error;
|
||||
s16 error; /* dio write path expects it to hold -ERESTARTSYS... */
|
||||
|
||||
unsigned csum_type:4;
|
||||
unsigned compression_type:4;
|
||||
|
@ -88,6 +88,9 @@ struct bkey_i *bch2_journal_find_btree_root(struct bch_fs *c, struct jset *j,
|
||||
if (!entry)
|
||||
return NULL;
|
||||
|
||||
if (!entry->u64s)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
k = entry->start;
|
||||
*level = entry->level;
|
||||
*level = entry->level;
|
||||
@ -415,6 +418,7 @@ static struct nonce journal_nonce(const struct jset *jset)
|
||||
}};
|
||||
}
|
||||
|
||||
/* this fills in a range with empty jset_entries: */
|
||||
static void journal_entry_null_range(void *start, void *end)
|
||||
{
|
||||
struct jset_entry *entry;
|
||||
@ -423,7 +427,7 @@ static void journal_entry_null_range(void *start, void *end)
|
||||
memset(entry, 0, sizeof(*entry));
|
||||
}
|
||||
|
||||
static int journal_validate_key(struct bch_fs *c, struct jset *j,
|
||||
static int journal_validate_key(struct bch_fs *c, struct jset *jset,
|
||||
struct jset_entry *entry,
|
||||
struct bkey_i *k, enum bkey_type key_type,
|
||||
const char *type)
|
||||
@ -458,7 +462,7 @@ static int journal_validate_key(struct bch_fs *c, struct jset *j,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (JSET_BIG_ENDIAN(j) != CPU_BIG_ENDIAN)
|
||||
if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN)
|
||||
bch2_bkey_swab(key_type, NULL, bkey_to_packed(k));
|
||||
|
||||
invalid = bch2_bkey_invalid(c, key_type, bkey_i_to_s_c(k));
|
||||
@ -497,26 +501,27 @@ fsck_err:
|
||||
#define journal_entry_err_on(cond, c, msg, ...) \
|
||||
((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false)
|
||||
|
||||
static int journal_entry_validate_entries(struct bch_fs *c, struct jset *j,
|
||||
static int journal_entry_validate_entries(struct bch_fs *c, struct jset *jset,
|
||||
int write)
|
||||
{
|
||||
struct jset_entry *entry;
|
||||
int ret = 0;
|
||||
|
||||
vstruct_for_each(j, entry) {
|
||||
vstruct_for_each(jset, entry) {
|
||||
void *next = vstruct_next(entry);
|
||||
struct bkey_i *k;
|
||||
|
||||
if (journal_entry_err_on(vstruct_next(entry) >
|
||||
vstruct_last(j), c,
|
||||
vstruct_last(jset), c,
|
||||
"journal entry extends past end of jset")) {
|
||||
j->u64s = cpu_to_le32((u64 *) entry - j->_data);
|
||||
jset->u64s = cpu_to_le32((u64 *) entry - jset->_data);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (entry->type) {
|
||||
case JOURNAL_ENTRY_BTREE_KEYS:
|
||||
vstruct_for_each(entry, k) {
|
||||
ret = journal_validate_key(c, j, entry, k,
|
||||
ret = journal_validate_key(c, jset, entry, k,
|
||||
bkey_type(entry->level,
|
||||
entry->btree_id),
|
||||
"key");
|
||||
@ -531,12 +536,17 @@ static int journal_entry_validate_entries(struct bch_fs *c, struct jset *j,
|
||||
if (journal_entry_err_on(!entry->u64s ||
|
||||
le16_to_cpu(entry->u64s) != k->k.u64s, c,
|
||||
"invalid btree root journal entry: wrong number of keys")) {
|
||||
journal_entry_null_range(entry,
|
||||
vstruct_next(entry));
|
||||
/*
|
||||
* we don't want to null out this jset_entry,
|
||||
* just the contents, so that later we can tell
|
||||
* we were _supposed_ to have a btree root
|
||||
*/
|
||||
entry->u64s = 0;
|
||||
journal_entry_null_range(vstruct_next(entry), next);
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = journal_validate_key(c, j, entry, k,
|
||||
ret = journal_validate_key(c, jset, entry, k,
|
||||
BKEY_TYPE_BTREE, "btree root");
|
||||
if (ret)
|
||||
goto fsck_err;
|
||||
@ -566,21 +576,21 @@ fsck_err:
|
||||
}
|
||||
|
||||
static int journal_entry_validate(struct bch_fs *c,
|
||||
struct jset *j, u64 sector,
|
||||
struct jset *jset, u64 sector,
|
||||
unsigned bucket_sectors_left,
|
||||
unsigned sectors_read,
|
||||
int write)
|
||||
{
|
||||
size_t bytes = vstruct_bytes(j);
|
||||
size_t bytes = vstruct_bytes(jset);
|
||||
struct bch_csum csum;
|
||||
int ret = 0;
|
||||
|
||||
if (le64_to_cpu(j->magic) != jset_magic(c))
|
||||
if (le64_to_cpu(jset->magic) != jset_magic(c))
|
||||
return JOURNAL_ENTRY_NONE;
|
||||
|
||||
if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) {
|
||||
if (le32_to_cpu(jset->version) != BCACHE_JSET_VERSION) {
|
||||
bch_err(c, "unknown journal entry version %u",
|
||||
le32_to_cpu(j->version));
|
||||
le32_to_cpu(jset->version));
|
||||
return BCH_FSCK_UNKNOWN_VERSION;
|
||||
}
|
||||
|
||||
@ -594,26 +604,26 @@ static int journal_entry_validate(struct bch_fs *c,
|
||||
if (bytes > sectors_read << 9)
|
||||
return JOURNAL_ENTRY_REREAD;
|
||||
|
||||
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c,
|
||||
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
|
||||
"journal entry with unknown csum type %llu sector %lluu",
|
||||
JSET_CSUM_TYPE(j), sector))
|
||||
JSET_CSUM_TYPE(jset), sector))
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
|
||||
csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
|
||||
if (journal_entry_err_on(bch2_crc_cmp(csum, j->csum), c,
|
||||
csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
|
||||
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
|
||||
"journal checksum bad, sector %llu", sector)) {
|
||||
/* XXX: retry IO, when we start retrying checksum errors */
|
||||
/* XXX: note we might have missing journal entries */
|
||||
return JOURNAL_ENTRY_BAD;
|
||||
}
|
||||
|
||||
bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
|
||||
j->encrypted_start,
|
||||
vstruct_end(j) - (void *) j->encrypted_start);
|
||||
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
|
||||
jset->encrypted_start,
|
||||
vstruct_end(jset) - (void *) jset->encrypted_start);
|
||||
|
||||
if (journal_entry_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c,
|
||||
if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
|
||||
"invalid journal entry: last_seq > seq"))
|
||||
j->last_seq = j->seq;
|
||||
jset->last_seq = jset->seq;
|
||||
|
||||
return 0;
|
||||
fsck_err:
|
||||
@ -960,6 +970,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
struct bch_dev *ca;
|
||||
u64 cur_seq, end_seq;
|
||||
unsigned iter, keys = 0, entries = 0;
|
||||
size_t nr;
|
||||
int ret = 0;
|
||||
|
||||
closure_init_stack(&jlist.cl);
|
||||
@ -994,12 +1005,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
goto fsck_err;
|
||||
|
||||
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
|
||||
fsck_err_on(!bch2_sb_has_replicas_devlist(c, &i->devs,
|
||||
BCH_DATA_JOURNAL), c,
|
||||
fsck_err_on(!bch2_sb_has_replicas(c, BCH_DATA_JOURNAL,
|
||||
i->devs), c,
|
||||
"superblock not marked as containing replicas (type %u)",
|
||||
BCH_DATA_JOURNAL)) {
|
||||
ret = bch2_check_mark_super_devlist(c, &i->devs,
|
||||
BCH_DATA_JOURNAL);
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_JOURNAL,
|
||||
i->devs);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -1007,9 +1018,16 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
|
||||
|
||||
i = list_last_entry(list, struct journal_replay, list);
|
||||
|
||||
unfixable_fsck_err_on(le64_to_cpu(i->j.seq) -
|
||||
le64_to_cpu(i->j.last_seq) + 1 > j->pin.size, c,
|
||||
"too many journal entries open for refcount fifo");
|
||||
nr = le64_to_cpu(i->j.seq) - le64_to_cpu(i->j.last_seq) + 1;
|
||||
|
||||
if (nr > j->pin.size) {
|
||||
free_fifo(&j->pin);
|
||||
init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
|
||||
if (!j->pin.data) {
|
||||
bch_err(c, "error reallocating journal fifo (%zu open entries)", nr);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
atomic64_set(&j->seq, le64_to_cpu(i->j.seq));
|
||||
j->last_seq_ondisk = le64_to_cpu(i->j.last_seq);
|
||||
@ -1131,18 +1149,19 @@ void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __journal_entry_new(struct journal *j, int count)
|
||||
static void journal_pin_new_entry(struct journal *j, int count)
|
||||
{
|
||||
struct journal_entry_pin_list *p = fifo_push_ref(&j->pin);
|
||||
struct journal_entry_pin_list *p;
|
||||
|
||||
/*
|
||||
* The fifo_push() needs to happen at the same time as j->seq is
|
||||
* incremented for last_seq() to be calculated correctly
|
||||
*/
|
||||
p = fifo_push_ref(&j->pin);
|
||||
atomic64_inc(&j->seq);
|
||||
|
||||
BUG_ON(journal_seq_pin(j, atomic64_read(&j->seq)) !=
|
||||
&fifo_peek_back(&j->pin));
|
||||
EBUG_ON(journal_seq_pin(j, atomic64_read(&j->seq)) !=
|
||||
&fifo_peek_back(&j->pin));
|
||||
|
||||
INIT_LIST_HEAD(&p->list);
|
||||
INIT_LIST_HEAD(&p->flushed);
|
||||
@ -1150,13 +1169,10 @@ static void __journal_entry_new(struct journal *j, int count)
|
||||
p->devs.nr = 0;
|
||||
}
|
||||
|
||||
static void __bch2_journal_next_entry(struct journal *j)
|
||||
static void bch2_journal_buf_init(struct journal *j)
|
||||
{
|
||||
struct journal_buf *buf;
|
||||
struct journal_buf *buf = journal_cur_buf(j);
|
||||
|
||||
__journal_entry_new(j, 1);
|
||||
|
||||
buf = journal_cur_buf(j);
|
||||
memset(buf->has_inode, 0, sizeof(buf->has_inode));
|
||||
|
||||
memset(buf->data, 0, sizeof(*buf->data));
|
||||
@ -1208,22 +1224,24 @@ static enum {
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
journal_reclaim_fast(j);
|
||||
|
||||
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
|
||||
|
||||
buf = &j->buf[old.idx];
|
||||
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
|
||||
buf->data->last_seq = cpu_to_le64(last_seq(j));
|
||||
|
||||
j->prev_buf_sectors =
|
||||
vstruct_blocks_plus(buf->data, c->block_bits,
|
||||
journal_entry_u64s_reserve(buf)) *
|
||||
c->opts.block_size;
|
||||
|
||||
BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
|
||||
|
||||
__bch2_journal_next_entry(j);
|
||||
journal_reclaim_fast(j);
|
||||
/* XXX: why set this here, and not in journal_write()? */
|
||||
buf->data->last_seq = cpu_to_le64(last_seq(j));
|
||||
|
||||
journal_pin_new_entry(j, 1);
|
||||
|
||||
bch2_journal_buf_init(j);
|
||||
|
||||
cancel_delayed_work(&j->write_work);
|
||||
spin_unlock(&j->lock);
|
||||
@ -1352,12 +1370,20 @@ static int journal_entry_sectors(struct journal *j)
|
||||
/*
|
||||
* should _only_ called from journal_res_get() - when we actually want a
|
||||
* journal reservation - journal entry is open means journal is dirty:
|
||||
*
|
||||
* returns:
|
||||
* 1: success
|
||||
* 0: journal currently full (must wait)
|
||||
* -EROFS: insufficient rw devices
|
||||
* -EIO: journal error
|
||||
*/
|
||||
static int journal_entry_open(struct journal *j)
|
||||
{
|
||||
struct journal_buf *buf = journal_cur_buf(j);
|
||||
union journal_res_state old, new;
|
||||
ssize_t u64s;
|
||||
int ret = 0, sectors;
|
||||
int sectors;
|
||||
u64 v;
|
||||
|
||||
lockdep_assert_held(&j->lock);
|
||||
BUG_ON(journal_entry_is_open(j));
|
||||
@ -1387,41 +1413,36 @@ static int journal_entry_open(struct journal *j)
|
||||
|
||||
BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL);
|
||||
|
||||
if (u64s > le32_to_cpu(buf->data->u64s)) {
|
||||
union journal_res_state old, new;
|
||||
u64 v = atomic64_read(&j->reservations.counter);
|
||||
if (u64s <= le32_to_cpu(buf->data->u64s))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Must be set before marking the journal entry as open:
|
||||
*/
|
||||
j->cur_entry_u64s = u64s;
|
||||
/*
|
||||
* Must be set before marking the journal entry as open:
|
||||
*/
|
||||
j->cur_entry_u64s = u64s;
|
||||
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
v = atomic64_read(&j->reservations.counter);
|
||||
do {
|
||||
old.v = new.v = v;
|
||||
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
|
||||
return false;
|
||||
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
|
||||
return -EIO;
|
||||
|
||||
/* Handle any already added entries */
|
||||
new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
ret = 1;
|
||||
/* Handle any already added entries */
|
||||
new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
wake_up(&j->wait);
|
||||
if (j->res_get_blocked_start)
|
||||
__bch2_time_stats_update(j->blocked_time,
|
||||
j->res_get_blocked_start);
|
||||
j->res_get_blocked_start = 0;
|
||||
|
||||
if (j->res_get_blocked_start) {
|
||||
__bch2_time_stats_update(j->blocked_time,
|
||||
j->res_get_blocked_start);
|
||||
j->res_get_blocked_start = 0;
|
||||
}
|
||||
|
||||
mod_delayed_work(system_freezable_wq,
|
||||
&j->write_work,
|
||||
msecs_to_jiffies(j->write_delay_ms));
|
||||
}
|
||||
|
||||
return ret;
|
||||
mod_delayed_work(system_freezable_wq,
|
||||
&j->write_work,
|
||||
msecs_to_jiffies(j->write_delay_ms));
|
||||
wake_up(&j->wait);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void bch2_journal_start(struct bch_fs *c)
|
||||
@ -1438,14 +1459,15 @@ void bch2_journal_start(struct bch_fs *c)
|
||||
set_bit(JOURNAL_STARTED, &j->flags);
|
||||
|
||||
while (atomic64_read(&j->seq) < new_seq)
|
||||
__journal_entry_new(j, 0);
|
||||
journal_pin_new_entry(j, 0);
|
||||
|
||||
/*
|
||||
* journal_buf_switch() only inits the next journal entry when it
|
||||
* closes an open journal entry - the very first journal entry gets
|
||||
* initialized here:
|
||||
*/
|
||||
__bch2_journal_next_entry(j);
|
||||
journal_pin_new_entry(j, 1);
|
||||
bch2_journal_buf_init(j);
|
||||
|
||||
/*
|
||||
* Adding entries to the next journal entry before allocating space on
|
||||
@ -1476,7 +1498,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
struct bkey_i *k, *_n;
|
||||
struct jset_entry *entry;
|
||||
struct journal_replay *i, *n;
|
||||
int ret = 0, did_replay = 0;
|
||||
int ret = 0;
|
||||
|
||||
list_for_each_entry_safe(i, n, list, list) {
|
||||
j->replay_pin_list =
|
||||
@ -1514,7 +1536,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
did_replay = true;
|
||||
}
|
||||
|
||||
if (atomic_dec_and_test(&j->replay_pin_list->count))
|
||||
@ -1524,22 +1545,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
|
||||
j->replay_pin_list = NULL;
|
||||
|
||||
bch2_journal_set_replay_done(j);
|
||||
|
||||
if (did_replay) {
|
||||
bch2_journal_flush_pins(&c->journal, U64_MAX);
|
||||
|
||||
/*
|
||||
* Write a new journal entry _before_ we start journalling new data -
|
||||
* otherwise, we could end up with btree node bsets with journal seqs
|
||||
* arbitrarily far in the future vs. the most recently written journal
|
||||
* entry on disk, if we crash before writing the next journal entry:
|
||||
*/
|
||||
ret = bch2_journal_meta(j);
|
||||
if (ret) {
|
||||
bch_err(c, "journal replay: error %d flushing journal", ret);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
ret = bch2_journal_flush_all_pins(j);
|
||||
err:
|
||||
bch2_journal_entries_free(list);
|
||||
return ret;
|
||||
@ -1654,7 +1660,7 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_journal_alloc(struct bch_dev *ca)
|
||||
int bch2_dev_journal_alloc(struct bch_fs *c, struct bch_dev *ca)
|
||||
{
|
||||
unsigned nr;
|
||||
|
||||
@ -1670,7 +1676,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
|
||||
min(1 << 10,
|
||||
(1 << 20) / ca->mi.bucket_size));
|
||||
|
||||
return bch2_set_nr_journal_buckets(ca->fs, ca, nr);
|
||||
return bch2_set_nr_journal_buckets(c, ca, nr);
|
||||
}
|
||||
|
||||
/* Journalling */
|
||||
@ -1723,6 +1729,7 @@ static inline void __journal_pin_add(struct journal *j,
|
||||
list_add(&pin->list, &pin_list->list);
|
||||
else
|
||||
INIT_LIST_HEAD(&pin->list);
|
||||
wake_up(&j->wait);
|
||||
}
|
||||
|
||||
static void journal_pin_add_entry(struct journal *j,
|
||||
@ -1730,9 +1737,9 @@ static void journal_pin_add_entry(struct journal *j,
|
||||
struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
spin_lock_irq(&j->pin_lock);
|
||||
spin_lock(&j->lock);
|
||||
__journal_pin_add(j, pin_list, pin, flush_fn);
|
||||
spin_unlock_irq(&j->pin_lock);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
void bch2_journal_pin_add(struct journal *j,
|
||||
@ -1744,9 +1751,9 @@ void bch2_journal_pin_add(struct journal *j,
|
||||
? journal_seq_pin(j, res->seq)
|
||||
: j->replay_pin_list;
|
||||
|
||||
spin_lock_irq(&j->pin_lock);
|
||||
spin_lock(&j->lock);
|
||||
__journal_pin_add(j, pin_list, pin, flush_fn);
|
||||
spin_unlock_irq(&j->pin_lock);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
static inline bool __journal_pin_drop(struct journal *j,
|
||||
@ -1766,13 +1773,12 @@ static inline bool __journal_pin_drop(struct journal *j,
|
||||
void bch2_journal_pin_drop(struct journal *j,
|
||||
struct journal_entry_pin *pin)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool wakeup = false;
|
||||
|
||||
spin_lock_irqsave(&j->pin_lock, flags);
|
||||
spin_lock(&j->lock);
|
||||
if (journal_pin_active(pin))
|
||||
wakeup = __journal_pin_drop(j, pin);
|
||||
spin_unlock_irqrestore(&j->pin_lock, flags);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
/*
|
||||
* Unpinning a journal entry make make journal_next_bucket() succeed, if
|
||||
@ -1789,7 +1795,7 @@ void bch2_journal_pin_add_if_older(struct journal *j,
|
||||
struct journal_entry_pin *pin,
|
||||
journal_pin_flush_fn flush_fn)
|
||||
{
|
||||
spin_lock_irq(&j->pin_lock);
|
||||
spin_lock(&j->lock);
|
||||
|
||||
if (journal_pin_active(src_pin) &&
|
||||
(!journal_pin_active(pin) ||
|
||||
@ -1800,24 +1806,19 @@ void bch2_journal_pin_add_if_older(struct journal *j,
|
||||
__journal_pin_add(j, src_pin->pin_list, pin, flush_fn);
|
||||
}
|
||||
|
||||
spin_unlock_irq(&j->pin_lock);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
|
||||
static struct journal_entry_pin *
|
||||
journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||
__journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||
{
|
||||
struct journal_entry_pin_list *pin_list;
|
||||
struct journal_entry_pin *ret = NULL;
|
||||
struct journal_entry_pin *ret;
|
||||
unsigned iter;
|
||||
|
||||
/* so we don't iterate over empty fifo entries below: */
|
||||
if (!atomic_read(&fifo_peek_front(&j->pin).count)) {
|
||||
spin_lock(&j->lock);
|
||||
journal_reclaim_fast(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
/* no need to iterate over empty fifo entries: */
|
||||
journal_reclaim_fast(j);
|
||||
|
||||
spin_lock_irq(&j->pin_lock);
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, iter) {
|
||||
if (journal_pin_seq(j, pin_list) > seq_to_flush)
|
||||
break;
|
||||
@ -1828,71 +1829,82 @@ journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||
/* must be list_del_init(), see bch2_journal_pin_drop() */
|
||||
list_move(&ret->list, &pin_list->flushed);
|
||||
*seq = journal_pin_seq(j, pin_list);
|
||||
break;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&j->pin_lock);
|
||||
|
||||
return ret;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool journal_flush_done(struct journal *j, u64 seq_to_flush)
|
||||
static struct journal_entry_pin *
|
||||
journal_get_next_pin(struct journal *j, u64 seq_to_flush, u64 *seq)
|
||||
{
|
||||
bool ret;
|
||||
struct journal_entry_pin *ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
journal_reclaim_fast(j);
|
||||
|
||||
ret = (fifo_used(&j->pin) == 1 &&
|
||||
atomic_read(&fifo_peek_front(&j->pin).count) == 1) ||
|
||||
last_seq(j) > seq_to_flush;
|
||||
ret = __journal_get_next_pin(j, seq_to_flush, seq);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
struct journal_entry_pin **pin,
|
||||
u64 *pin_seq)
|
||||
{
|
||||
struct journal_entry_pin *pin;
|
||||
u64 pin_seq;
|
||||
int ret;
|
||||
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
return;
|
||||
*pin = NULL;
|
||||
|
||||
while ((pin = journal_get_next_pin(j, seq_to_flush, &pin_seq)))
|
||||
pin->flush(j, pin, pin_seq);
|
||||
ret = bch2_journal_error(j);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
/*
|
||||
* If journal replay hasn't completed, the unreplayed journal entries
|
||||
* hold refs on their corresponding sequence numbers and thus this would
|
||||
* deadlock:
|
||||
* hold refs on their corresponding sequence numbers
|
||||
*/
|
||||
if (!test_bit(JOURNAL_REPLAY_DONE, &j->flags))
|
||||
return;
|
||||
ret = (*pin = __journal_get_next_pin(j, seq_to_flush, pin_seq)) ||
|
||||
!test_bit(JOURNAL_REPLAY_DONE, &j->flags) ||
|
||||
last_seq(j) > seq_to_flush ||
|
||||
(fifo_used(&j->pin) == 1 &&
|
||||
atomic_read(&fifo_peek_front(&j->pin).count) == 1);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
wait_event(j->wait,
|
||||
journal_flush_done(j, seq_to_flush) ||
|
||||
bch2_journal_error(j));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_journal_flush_all_pins(struct journal *j)
|
||||
int bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_entry_pin *pin;
|
||||
u64 pin_seq;
|
||||
bool flush;
|
||||
|
||||
if (!test_bit(JOURNAL_STARTED, &j->flags))
|
||||
return 0;
|
||||
|
||||
bch2_journal_flush_pins(j, U64_MAX);
|
||||
again:
|
||||
wait_event(j->wait, journal_flush_done(j, seq_to_flush, &pin, &pin_seq));
|
||||
if (pin) {
|
||||
/* flushing a journal pin might cause a new one to be added: */
|
||||
pin->flush(j, pin, pin_seq);
|
||||
goto again;
|
||||
}
|
||||
|
||||
spin_lock(&j->lock);
|
||||
flush = last_seq(j) != j->last_seq_ondisk ||
|
||||
c->btree_roots_dirty;
|
||||
(seq_to_flush == U64_MAX && c->btree_roots_dirty);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return flush ? bch2_journal_meta(j) : 0;
|
||||
}
|
||||
|
||||
int bch2_journal_flush_all_pins(struct journal *j)
|
||||
{
|
||||
return bch2_journal_flush_pins(j, U64_MAX);
|
||||
}
|
||||
|
||||
static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
|
||||
{
|
||||
bool ret;
|
||||
@ -2179,14 +2191,15 @@ static void journal_write_done(struct closure *cl)
|
||||
struct journal *j = container_of(cl, struct journal, io);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct journal_buf *w = journal_prev_buf(j);
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&w->key);
|
||||
struct bch_devs_list devs =
|
||||
bch2_extent_devs(bkey_i_to_s_c_extent(&w->key));
|
||||
|
||||
if (!bch2_extent_nr_ptrs(e)) {
|
||||
if (!devs.nr) {
|
||||
bch_err(c, "unable to write journal to sufficient devices");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bch2_check_mark_super(c, e, BCH_DATA_JOURNAL))
|
||||
if (bch2_check_mark_super(c, BCH_DATA_JOURNAL, devs))
|
||||
goto err;
|
||||
out:
|
||||
__bch2_time_stats_update(j->write_time, j->write_start_time);
|
||||
@ -2194,8 +2207,7 @@ out:
|
||||
spin_lock(&j->lock);
|
||||
j->last_seq_ondisk = le64_to_cpu(w->data->last_seq);
|
||||
|
||||
journal_seq_pin(j, le64_to_cpu(w->data->seq))->devs =
|
||||
bch2_extent_devs(bkey_i_to_s_c_extent(&w->key));
|
||||
journal_seq_pin(j, le64_to_cpu(w->data->seq))->devs = devs;
|
||||
|
||||
/*
|
||||
* Updating last_seq_ondisk may let journal_reclaim_work() discard more
|
||||
@ -2358,7 +2370,7 @@ static void journal_write(struct closure *cl)
|
||||
}
|
||||
|
||||
no_io:
|
||||
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr)
|
||||
extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr)
|
||||
ptr->offset += sectors;
|
||||
|
||||
continue_at(cl, journal_write_done, system_highpri_wq);
|
||||
@ -2737,7 +2749,9 @@ int bch2_journal_flush_device(struct journal *j, unsigned dev_idx)
|
||||
seq = journal_pin_seq(j, p);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
bch2_journal_flush_pins(j, seq);
|
||||
ret = bch2_journal_flush_pins(j, seq);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&c->replicas_gc_lock);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_JOURNAL);
|
||||
@ -2751,7 +2765,7 @@ int bch2_journal_flush_device(struct journal *j, unsigned dev_idx)
|
||||
seq++;
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
ret = bch2_check_mark_super_devlist(c, &devs, BCH_DATA_JOURNAL);
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_JOURNAL, devs);
|
||||
spin_lock(&j->lock);
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
@ -2857,7 +2871,6 @@ int bch2_fs_journal_init(struct journal *j)
|
||||
static struct lock_class_key res_key;
|
||||
|
||||
spin_lock_init(&j->lock);
|
||||
spin_lock_init(&j->pin_lock);
|
||||
spin_lock_init(&j->err_lock);
|
||||
init_waitqueue_head(&j->wait);
|
||||
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
|
||||
@ -2956,7 +2969,7 @@ ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
|
||||
ssize_t ret = 0;
|
||||
unsigned i;
|
||||
|
||||
spin_lock_irq(&j->pin_lock);
|
||||
spin_lock(&j->lock);
|
||||
fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret,
|
||||
"%llu: count %u\n",
|
||||
@ -2977,7 +2990,7 @@ ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
|
||||
"\t%p %pf\n",
|
||||
pin, pin->flush);
|
||||
}
|
||||
spin_unlock_irq(&j->pin_lock);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -165,7 +165,7 @@ void bch2_journal_pin_add_if_older(struct journal *,
|
||||
struct journal_entry_pin *,
|
||||
struct journal_entry_pin *,
|
||||
journal_pin_flush_fn);
|
||||
void bch2_journal_flush_pins(struct journal *, u64);
|
||||
int bch2_journal_flush_pins(struct journal *, u64);
|
||||
int bch2_journal_flush_all_pins(struct journal *);
|
||||
|
||||
struct closure;
|
||||
@ -390,7 +390,7 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
|
||||
ssize_t bch2_journal_print_debug(struct journal *, char *);
|
||||
ssize_t bch2_journal_print_pins(struct journal *, char *);
|
||||
|
||||
int bch2_dev_journal_alloc(struct bch_dev *);
|
||||
int bch2_dev_journal_alloc(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
|
||||
void bch2_fs_journal_stop(struct journal *);
|
||||
|
@ -169,12 +169,6 @@ struct journal {
|
||||
DECLARE_FIFO(struct journal_entry_pin_list, pin);
|
||||
struct journal_entry_pin_list *replay_pin_list;
|
||||
|
||||
/*
|
||||
* Protects the pin lists - the fifo itself is still protected by
|
||||
* j->lock though:
|
||||
*/
|
||||
spinlock_t pin_lock;
|
||||
|
||||
struct mutex blacklist_lock;
|
||||
struct list_head seq_blacklist;
|
||||
|
||||
|
@ -16,13 +16,8 @@
|
||||
static bool migrate_pred(void *arg, struct bkey_s_c_extent e)
|
||||
{
|
||||
struct bch_dev *ca = arg;
|
||||
const struct bch_extent_ptr *ptr;
|
||||
|
||||
extent_for_each_ptr(e, ptr)
|
||||
if (ptr->dev == ca->dev_idx)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return bch2_extent_has_device(e, ca->dev_idx);
|
||||
}
|
||||
|
||||
#define MAX_DATA_OFF_ITER 10
|
||||
@ -32,30 +27,17 @@ static int bch2_dev_usrdata_migrate(struct bch_fs *c, struct bch_dev *ca,
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u64 keys_moved, sectors_moved;
|
||||
struct bch_move_stats stats;
|
||||
unsigned pass = 0;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW);
|
||||
|
||||
if (!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_USER)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* In theory, only one pass should be necessary as we've
|
||||
* quiesced all writes before calling this.
|
||||
*
|
||||
* However, in practice, more than one pass may be necessary:
|
||||
* - Some move fails due to an error. We can can find this out
|
||||
* from the moving_context.
|
||||
* - Some key swap failed because some of the pointers in the
|
||||
* key in the tree changed due to caching behavior, btree gc
|
||||
* pruning stale pointers, or tiering (if the device being
|
||||
* removed is in tier 0). A smarter bkey_cmpxchg would
|
||||
* handle these cases.
|
||||
*
|
||||
* Thus this scans the tree one more time than strictly necessary,
|
||||
* but that can be viewed as a verification pass.
|
||||
* XXX: we should be able to do this in one pass, but bch2_move_data()
|
||||
* can spuriously fail to move an extent due to racing with other move
|
||||
* operations
|
||||
*/
|
||||
do {
|
||||
ret = bch2_move_data(c, NULL,
|
||||
@ -65,15 +47,14 @@ static int bch2_dev_usrdata_migrate(struct bch_fs *c, struct bch_dev *ca,
|
||||
0,
|
||||
ca->dev_idx,
|
||||
migrate_pred, ca,
|
||||
&keys_moved,
|
||||
§ors_moved);
|
||||
&stats);
|
||||
if (ret) {
|
||||
bch_err(c, "error migrating data: %i", ret);
|
||||
return ret;
|
||||
}
|
||||
} while (keys_moved && pass++ < MAX_DATA_OFF_ITER);
|
||||
} while (atomic64_read(&stats.keys_moved) && pass++ < MAX_DATA_OFF_ITER);
|
||||
|
||||
if (keys_moved) {
|
||||
if (atomic64_read(&stats.keys_moved)) {
|
||||
bch_err(c, "unable to migrate all data in %d iterations",
|
||||
MAX_DATA_OFF_ITER);
|
||||
return -1;
|
||||
@ -83,11 +64,7 @@ static int bch2_dev_usrdata_migrate(struct bch_fs *c, struct bch_dev *ca,
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_USER);
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH, k) {
|
||||
if (!bkey_extent_is_data(k.k))
|
||||
continue;
|
||||
|
||||
ret = bch2_check_mark_super(c, bkey_s_c_to_extent(k),
|
||||
BCH_DATA_USER);
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER, bch2_bkey_devs(k));
|
||||
if (ret) {
|
||||
bch_err(c, "error migrating data %i from check_mark_super()", ret);
|
||||
break;
|
||||
@ -99,107 +76,34 @@ static int bch2_dev_usrdata_migrate(struct bch_fs *c, struct bch_dev *ca,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_move_btree_off(struct bch_fs *c, struct bch_dev *ca,
|
||||
enum btree_id id)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW);
|
||||
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
||||
|
||||
if (!bch2_extent_has_device(e, ca->dev_idx))
|
||||
continue;
|
||||
|
||||
ret = bch2_btree_node_rewrite(c, &iter, b->data->keys.seq, 0);
|
||||
if (ret) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bch2_btree_iter_set_locks_want(&iter, 0);
|
||||
}
|
||||
ret = bch2_btree_iter_unlock(&iter);
|
||||
if (ret)
|
||||
return ret; /* btree IO error */
|
||||
|
||||
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
||||
|
||||
BUG_ON(bch2_extent_has_device(e, ca->dev_idx));
|
||||
}
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This moves only the meta-data off, leaving the data (if any) in place.
|
||||
* The data is moved off by bch_move_data_off_device, if desired, and
|
||||
* called first.
|
||||
*
|
||||
* Before calling this, allocation of buckets to the device must have
|
||||
* been disabled, as else we'll continue to write meta-data to the device
|
||||
* when new buckets are picked for meta-data writes.
|
||||
* In addition, the copying gc and allocator threads for the device
|
||||
* must have been stopped. The allocator thread is the only thread
|
||||
* that writes prio/gen information.
|
||||
*
|
||||
* Meta-data consists of:
|
||||
* - Btree nodes
|
||||
* - Prio/gen information
|
||||
* - Journal entries
|
||||
* - Superblock
|
||||
*
|
||||
* This has to move the btree nodes and the journal only:
|
||||
* - prio/gen information is not written once the allocator thread is stopped.
|
||||
* also, as the prio/gen information is per-device it is not moved.
|
||||
* - the superblock will be written by the caller once after everything
|
||||
* is stopped.
|
||||
*
|
||||
* Note that currently there is no way to stop btree node and journal
|
||||
* meta-data writes to a device without moving the meta-data because
|
||||
* once a bucket is open for a btree node, unless a replacement btree
|
||||
* node is allocated (and the tree updated), the bucket will continue
|
||||
* to be written with updates. Similarly for the journal (it gets
|
||||
* written until filled).
|
||||
*
|
||||
* This routine leaves the data (if any) in place. Whether the data
|
||||
* should be moved off is a decision independent of whether the meta
|
||||
* data should be moved off and stopped:
|
||||
*
|
||||
* - For device removal, both data and meta-data are moved off, in
|
||||
* that order.
|
||||
*
|
||||
* - However, for turning a device read-only without removing it, only
|
||||
* meta-data is moved off since that's the only way to prevent it
|
||||
* from being written. Data is left in the device, but no new data
|
||||
* is written.
|
||||
*/
|
||||
|
||||
static int bch2_dev_metadata_migrate(struct bch_fs *c, struct bch_dev *ca,
|
||||
int flags)
|
||||
{
|
||||
unsigned i;
|
||||
struct btree_iter iter;
|
||||
struct btree *b;
|
||||
int ret = 0;
|
||||
unsigned id;
|
||||
|
||||
BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW);
|
||||
|
||||
if (!(bch2_dev_has_data(c, ca) &
|
||||
((1 << BCH_DATA_JOURNAL)|
|
||||
(1 << BCH_DATA_BTREE))))
|
||||
if (!(bch2_dev_has_data(c, ca) & (1 << BCH_DATA_BTREE)))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&c->replicas_gc_lock);
|
||||
bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++) {
|
||||
ret = bch2_move_btree_off(c, ca, i);
|
||||
for (id = 0; id < BTREE_ID_NR; id++) {
|
||||
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
|
||||
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
|
||||
|
||||
if (!bch2_extent_has_device(e, ca->dev_idx))
|
||||
continue;
|
||||
|
||||
ret = bch2_btree_node_rewrite(c, &iter, b->data->keys.seq, 0);
|
||||
if (ret) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
ret = bch2_btree_iter_unlock(&iter);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -211,6 +115,9 @@ err:
|
||||
|
||||
int bch2_dev_data_migrate(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
{
|
||||
BUG_ON(ca->mi.state == BCH_MEMBER_STATE_RW &&
|
||||
bch2_dev_is_online(ca));
|
||||
|
||||
return bch2_dev_usrdata_migrate(c, ca, flags) ?:
|
||||
bch2_dev_metadata_migrate(c, ca, flags);
|
||||
}
|
||||
@ -233,17 +140,6 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This doesn't actually move any data -- it marks the keys as bad
|
||||
* if they contain a pointer to a device that is forcibly removed
|
||||
* and don't have other valid pointers. If there are valid pointers,
|
||||
* the necessary pointers to the removed device are replaced with
|
||||
* bad pointers instead.
|
||||
*
|
||||
* This is only called if bch_move_data_off_device above failed, meaning
|
||||
* that we've already tried to move the data MAX_DATA_OFF_ITER times and
|
||||
* are not likely to succeed if we try again.
|
||||
*/
|
||||
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
{
|
||||
struct bkey_s_c k;
|
||||
@ -260,11 +156,15 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
|
||||
while ((k = bch2_btree_iter_peek(&iter)).k &&
|
||||
!(ret = btree_iter_err(k))) {
|
||||
if (!bkey_extent_is_data(k.k))
|
||||
goto advance;
|
||||
|
||||
if (!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx))
|
||||
goto advance;
|
||||
if (!bkey_extent_is_data(k.k) ||
|
||||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_bkey_devs(k));
|
||||
if (ret)
|
||||
break;
|
||||
bch2_btree_iter_advance_pos(&iter);
|
||||
continue;
|
||||
}
|
||||
|
||||
bkey_reassemble(&tmp.key, k);
|
||||
e = bkey_i_to_s_extent(&tmp.key);
|
||||
@ -280,8 +180,9 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
*/
|
||||
bch2_extent_normalize(c, e.s);
|
||||
|
||||
if (bkey_extent_is_data(e.k) &&
|
||||
(ret = bch2_check_mark_super(c, e.c, BCH_DATA_USER)))
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_bkey_devs(bkey_i_to_s_c(&tmp.key)));
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
iter.pos = bkey_start_pos(&tmp.key.k);
|
||||
@ -300,16 +201,6 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
ret = 0;
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
continue;
|
||||
advance:
|
||||
if (bkey_extent_is_data(k.k)) {
|
||||
ret = bch2_check_mark_super(c, bkey_s_c_to_extent(k),
|
||||
BCH_DATA_USER);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
bch2_btree_iter_advance_pos(&iter);
|
||||
}
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
@ -346,8 +237,8 @@ retry:
|
||||
dev_idx)) {
|
||||
bch2_btree_iter_set_locks_want(&iter, 0);
|
||||
|
||||
ret = bch2_check_mark_super(c, bkey_i_to_s_c_extent(&b->key),
|
||||
BCH_DATA_BTREE);
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_BTREE,
|
||||
bch2_bkey_devs(bkey_i_to_s_c(&b->key)));
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
|
@ -31,15 +31,10 @@ struct moving_context {
|
||||
/* Closure for waiting on all reads and writes to complete */
|
||||
struct closure cl;
|
||||
|
||||
/* Key and sector moves issued, updated from submission context */
|
||||
u64 keys_moved;
|
||||
u64 sectors_moved;
|
||||
atomic64_t sectors_raced;
|
||||
struct bch_move_stats *stats;
|
||||
|
||||
struct list_head reads;
|
||||
|
||||
atomic_t sectors_in_flight;
|
||||
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
@ -116,8 +111,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
|
||||
bch2_extent_normalize(c, extent_i_to_s(insert).s);
|
||||
bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert));
|
||||
|
||||
ret = bch2_check_mark_super(c, extent_i_to_s_c(insert),
|
||||
BCH_DATA_USER);
|
||||
ret = bch2_check_mark_super(c, BCH_DATA_USER,
|
||||
bch2_extent_devs(extent_i_to_s_c(insert)));
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@ -145,7 +140,7 @@ next:
|
||||
nomatch:
|
||||
if (m->ctxt)
|
||||
atomic64_add(k.k->p.offset - iter.pos.offset,
|
||||
&m->ctxt->sectors_raced);
|
||||
&m->ctxt->stats->sectors_raced);
|
||||
atomic_long_inc(&c->extent_migrate_raced);
|
||||
trace_move_race(&new->k);
|
||||
bch2_btree_iter_advance_pos(&iter);
|
||||
@ -303,8 +298,8 @@ static int bch2_move_extent(struct bch_fs *c,
|
||||
io->write.op.devs = devs;
|
||||
io->write.op.write_point = wp;
|
||||
|
||||
ctxt->keys_moved++;
|
||||
ctxt->sectors_moved += k.k->size;
|
||||
atomic64_inc(&ctxt->stats->keys_moved);
|
||||
atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
|
||||
|
||||
trace_move_extent(k.k);
|
||||
|
||||
@ -353,24 +348,6 @@ static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
|
||||
atomic_read(&ctxt->sectors_in_flight) != sectors_pending);
|
||||
}
|
||||
|
||||
static void bch2_move_ctxt_exit(struct moving_context *ctxt)
|
||||
{
|
||||
move_ctxt_wait_event(ctxt, !atomic_read(&ctxt->sectors_in_flight));
|
||||
closure_sync(&ctxt->cl);
|
||||
|
||||
EBUG_ON(!list_empty(&ctxt->reads));
|
||||
EBUG_ON(atomic_read(&ctxt->sectors_in_flight));
|
||||
}
|
||||
|
||||
static void bch2_move_ctxt_init(struct moving_context *ctxt)
|
||||
{
|
||||
memset(ctxt, 0, sizeof(*ctxt));
|
||||
closure_init_stack(&ctxt->cl);
|
||||
|
||||
INIT_LIST_HEAD(&ctxt->reads);
|
||||
init_waitqueue_head(&ctxt->wait);
|
||||
}
|
||||
|
||||
int bch2_move_data(struct bch_fs *c,
|
||||
struct bch_ratelimit *rate,
|
||||
unsigned sectors_in_flight,
|
||||
@ -379,20 +356,21 @@ int bch2_move_data(struct bch_fs *c,
|
||||
int btree_insert_flags,
|
||||
int move_device,
|
||||
move_pred_fn pred, void *arg,
|
||||
u64 *keys_moved,
|
||||
u64 *sectors_moved)
|
||||
struct bch_move_stats *stats)
|
||||
{
|
||||
bool kthread = (current->flags & PF_KTHREAD) != 0;
|
||||
struct moving_context ctxt;
|
||||
struct moving_context ctxt = { .stats = stats };
|
||||
struct bch_io_opts opts = bch2_opts_to_inode_opts(c->opts);
|
||||
struct btree_iter iter;
|
||||
BKEY_PADDED(k) tmp;
|
||||
struct bkey_s_c k;
|
||||
u64 cur_inum = U64_MAX;
|
||||
int ret = 0;
|
||||
|
||||
bch2_move_ctxt_init(&ctxt);
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
closure_init_stack(&ctxt.cl);
|
||||
INIT_LIST_HEAD(&ctxt.reads);
|
||||
init_waitqueue_head(&ctxt.wait);
|
||||
bch2_btree_iter_init(&stats->iter, c, BTREE_ID_EXTENTS, POS_MIN,
|
||||
BTREE_ITER_PREFETCH);
|
||||
|
||||
if (rate)
|
||||
@ -400,7 +378,7 @@ int bch2_move_data(struct bch_fs *c,
|
||||
|
||||
while (!kthread || !(ret = kthread_should_stop())) {
|
||||
if (atomic_read(&ctxt.sectors_in_flight) >= sectors_in_flight) {
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
bch2_btree_iter_unlock(&stats->iter);
|
||||
move_ctxt_wait_event(&ctxt,
|
||||
atomic_read(&ctxt.sectors_in_flight) <
|
||||
sectors_in_flight);
|
||||
@ -408,11 +386,11 @@ int bch2_move_data(struct bch_fs *c,
|
||||
|
||||
if (rate &&
|
||||
bch2_ratelimit_delay(rate) &&
|
||||
(bch2_btree_iter_unlock(&iter),
|
||||
(bch2_btree_iter_unlock(&stats->iter),
|
||||
(ret = bch2_ratelimit_wait_freezable_stoppable(rate))))
|
||||
break;
|
||||
peek:
|
||||
k = bch2_btree_iter_peek(&iter);
|
||||
k = bch2_btree_iter_peek(&stats->iter);
|
||||
if (!k.k)
|
||||
break;
|
||||
ret = btree_iter_err(k);
|
||||
@ -420,13 +398,13 @@ peek:
|
||||
break;
|
||||
|
||||
if (!bkey_extent_is_data(k.k))
|
||||
goto next;
|
||||
goto next_nondata;
|
||||
|
||||
if (cur_inum != k.k->p.inode) {
|
||||
struct bch_inode_unpacked inode;
|
||||
|
||||
/* don't hold btree locks while looking up inode: */
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
bch2_btree_iter_unlock(&stats->iter);
|
||||
|
||||
opts = bch2_opts_to_inode_opts(c->opts);
|
||||
if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
|
||||
@ -441,7 +419,7 @@ peek:
|
||||
/* unlock before doing IO: */
|
||||
bkey_reassemble(&tmp.k, k);
|
||||
k = bkey_i_to_s_c(&tmp.k);
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
bch2_btree_iter_unlock(&stats->iter);
|
||||
|
||||
if (bch2_move_extent(c, &ctxt, devs, wp,
|
||||
btree_insert_flags,
|
||||
@ -454,17 +432,24 @@ peek:
|
||||
if (rate)
|
||||
bch2_ratelimit_increment(rate, k.k->size);
|
||||
next:
|
||||
bch2_btree_iter_advance_pos(&iter);
|
||||
bch2_btree_iter_cond_resched(&iter);
|
||||
atomic64_add(k.k->size * bch2_extent_nr_dirty_ptrs(k),
|
||||
&stats->sectors_seen);
|
||||
next_nondata:
|
||||
bch2_btree_iter_advance_pos(&stats->iter);
|
||||
bch2_btree_iter_cond_resched(&stats->iter);
|
||||
}
|
||||
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
bch2_move_ctxt_exit(&ctxt);
|
||||
bch2_btree_iter_unlock(&stats->iter);
|
||||
|
||||
trace_move_data(c, ctxt.sectors_moved, ctxt.keys_moved);
|
||||
move_ctxt_wait_event(&ctxt, !atomic_read(&ctxt.sectors_in_flight));
|
||||
closure_sync(&ctxt.cl);
|
||||
|
||||
*keys_moved = ctxt.keys_moved;
|
||||
*sectors_moved = ctxt.sectors_moved;
|
||||
EBUG_ON(!list_empty(&ctxt.reads));
|
||||
EBUG_ON(atomic_read(&ctxt.sectors_in_flight));
|
||||
|
||||
trace_move_data(c,
|
||||
atomic64_read(&stats->sectors_moved),
|
||||
atomic64_read(&stats->keys_moved));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef _BCACHEFS_MOVE_H
|
||||
#define _BCACHEFS_MOVE_H
|
||||
|
||||
#include "btree_iter.h"
|
||||
#include "buckets.h"
|
||||
#include "io_types.h"
|
||||
|
||||
@ -25,10 +26,19 @@ void bch2_migrate_write_init(struct migrate_write *, struct bch_read_bio *);
|
||||
|
||||
typedef bool (*move_pred_fn)(void *, struct bkey_s_c_extent);
|
||||
|
||||
struct bch_move_stats {
|
||||
struct btree_iter iter;
|
||||
|
||||
atomic64_t keys_moved;
|
||||
atomic64_t sectors_moved;
|
||||
atomic64_t sectors_seen;
|
||||
atomic64_t sectors_raced;
|
||||
};
|
||||
|
||||
int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
|
||||
unsigned, struct bch_devs_mask *,
|
||||
struct write_point_specifier,
|
||||
int, int, move_pred_fn, void *,
|
||||
u64 *, u64 *);
|
||||
struct bch_move_stats *);
|
||||
|
||||
#endif /* _BCACHEFS_MOVE_H */
|
||||
|
@ -100,7 +100,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
copygc_heap *h = &ca->copygc_heap;
|
||||
struct copygc_heap_entry e, *i;
|
||||
struct bucket_array *buckets;
|
||||
u64 keys_moved, sectors_moved;
|
||||
struct bch_move_stats move_stats;
|
||||
u64 sectors_to_move = 0, sectors_not_moved = 0;
|
||||
u64 buckets_to_move, buckets_not_moved = 0;
|
||||
size_t b;
|
||||
@ -167,8 +167,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
BTREE_INSERT_USE_RESERVE,
|
||||
ca->dev_idx,
|
||||
copygc_pred, ca,
|
||||
&keys_moved,
|
||||
§ors_moved);
|
||||
&move_stats);
|
||||
|
||||
down_read(&ca->bucket_lock);
|
||||
buckets = bucket_array(ca);
|
||||
@ -189,7 +188,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
|
||||
buckets_not_moved, buckets_to_move);
|
||||
|
||||
trace_copygc(ca,
|
||||
sectors_moved, sectors_not_moved,
|
||||
atomic64_read(&move_stats.sectors_moved), sectors_not_moved,
|
||||
buckets_to_move, buckets_not_moved);
|
||||
}
|
||||
|
||||
|
@ -167,6 +167,27 @@ int bch2_opt_lookup(const char *name)
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct synonym {
|
||||
const char *s1, *s2;
|
||||
};
|
||||
|
||||
static const struct synonym bch_opt_synonyms[] = {
|
||||
{ "quota", "usrquota" },
|
||||
};
|
||||
|
||||
static int bch2_mount_opt_lookup(const char *name)
|
||||
{
|
||||
const struct synonym *i;
|
||||
|
||||
for (i = bch_opt_synonyms;
|
||||
i < bch_opt_synonyms + ARRAY_SIZE(bch_opt_synonyms);
|
||||
i++)
|
||||
if (!strcmp(name, i->s1))
|
||||
name = i->s2;
|
||||
|
||||
return bch2_opt_lookup(name);
|
||||
}
|
||||
|
||||
int bch2_opt_parse(const struct bch_option *opt, const char *val, u64 *res)
|
||||
{
|
||||
ssize_t ret;
|
||||
@ -211,7 +232,7 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
|
||||
val = opt;
|
||||
|
||||
if (val) {
|
||||
id = bch2_opt_lookup(name);
|
||||
id = bch2_mount_opt_lookup(name);
|
||||
if (id < 0)
|
||||
goto bad_opt;
|
||||
|
||||
@ -219,12 +240,12 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
|
||||
if (ret < 0)
|
||||
goto bad_val;
|
||||
} else {
|
||||
id = bch2_opt_lookup(name);
|
||||
id = bch2_mount_opt_lookup(name);
|
||||
v = 1;
|
||||
|
||||
if (id < 0 &&
|
||||
!strncmp("no", name, 2)) {
|
||||
id = bch2_opt_lookup(name + 2);
|
||||
id = bch2_mount_opt_lookup(name + 2);
|
||||
v = 0;
|
||||
}
|
||||
|
||||
@ -242,6 +263,11 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
|
||||
!IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL))
|
||||
goto bad_opt;
|
||||
|
||||
if ((id == Opt_usrquota ||
|
||||
id == Opt_grpquota) &&
|
||||
!IS_ENABLED(CONFIG_BCACHEFS_QUOTA))
|
||||
goto bad_opt;
|
||||
|
||||
bch2_opt_set_by_id(opts, id, v);
|
||||
}
|
||||
|
||||
|
@ -112,6 +112,15 @@ enum opt_type {
|
||||
BCH_OPT(acl, u8, OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_POSIX_ACL, true) \
|
||||
BCH_OPT(usrquota, u8, OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_USRQUOTA, false) \
|
||||
BCH_OPT(grpquota, u8, OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_GRPQUOTA, false) \
|
||||
BCH_OPT(prjquota, u8, OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_PRJQUOTA, false) \
|
||||
BCH_OPT(degraded, u8, OPT_MOUNT, \
|
||||
OPT_BOOL(), \
|
||||
NO_SB_OPT, false) \
|
||||
@ -171,7 +180,7 @@ static const struct bch_opts bch2_opts_default = {
|
||||
#define opt_defined(_opts, _name) ((_opts)._name##_defined)
|
||||
|
||||
#define opt_get(_opts, _name) \
|
||||
(opt_defined(_opts, _name) ? _opts._name : bch2_opts_default._name)
|
||||
(opt_defined(_opts, _name) ? (_opts)._name : bch2_opts_default._name)
|
||||
|
||||
#define opt_set(_opts, _name, _v) \
|
||||
do { \
|
||||
|
786
libbcachefs/quota.c
Normal file
786
libbcachefs/quota.c
Normal file
@ -0,0 +1,786 @@
|
||||
#include "bcachefs.h"
|
||||
#include "btree_update.h"
|
||||
#include "inode.h"
|
||||
#include "quota.h"
|
||||
#include "super-io.h"
|
||||
|
||||
static const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_quota dq;
|
||||
|
||||
if (k.k->p.inode >= QTYP_NR)
|
||||
return "invalid quota type";
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_QUOTA: {
|
||||
dq = bkey_s_c_to_quota(k);
|
||||
|
||||
if (bkey_val_bytes(k.k) != sizeof(struct bch_quota))
|
||||
return "incorrect value size";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
default:
|
||||
return "invalid type";
|
||||
}
|
||||
}
|
||||
|
||||
static const char * const bch2_quota_counters[] = {
|
||||
"space",
|
||||
"inodes",
|
||||
};
|
||||
|
||||
static void bch2_quota_to_text(struct bch_fs *c, char *buf,
|
||||
size_t size, struct bkey_s_c k)
|
||||
{
|
||||
char *out = buf, *end= buf + size;
|
||||
struct bkey_s_c_quota dq;
|
||||
unsigned i;
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_QUOTA:
|
||||
dq = bkey_s_c_to_quota(k);
|
||||
|
||||
for (i = 0; i < Q_COUNTERS; i++)
|
||||
out += scnprintf(out, end - out, "%s hardlimit %llu softlimit %llu",
|
||||
bch2_quota_counters[i],
|
||||
le64_to_cpu(dq.v->c[i].hardlimit),
|
||||
le64_to_cpu(dq.v->c[i].softlimit));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const struct bkey_ops bch2_bkey_quota_ops = {
|
||||
.key_invalid = bch2_quota_invalid,
|
||||
.val_to_text = bch2_quota_to_text,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_QUOTA
|
||||
|
||||
#include <linux/cred.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/quota.h>
|
||||
|
||||
static inline unsigned __next_qtype(unsigned i, unsigned qtypes)
|
||||
{
|
||||
qtypes >>= i;
|
||||
return qtypes ? i + __ffs(qtypes) : QTYP_NR;
|
||||
}
|
||||
|
||||
#define for_each_set_qtype(_c, _i, _q, _qtypes) \
|
||||
for (_i = 0; \
|
||||
(_i = __next_qtype(_i, _qtypes), \
|
||||
_q = &(_c)->quotas[_i], \
|
||||
_i < QTYP_NR); \
|
||||
_i++)
|
||||
|
||||
static inline unsigned enabled_qtypes(struct bch_fs *c)
|
||||
{
|
||||
return ((c->opts.usrquota << QTYP_USR)|
|
||||
(c->opts.grpquota << QTYP_GRP)|
|
||||
(c->opts.prjquota << QTYP_PRJ));
|
||||
}
|
||||
|
||||
static bool ignore_hardlimit(struct bch_memquota_type *q)
|
||||
{
|
||||
if (capable(CAP_SYS_RESOURCE))
|
||||
return true;
|
||||
#if 0
|
||||
struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
|
||||
|
||||
return capable(CAP_SYS_RESOURCE) &&
|
||||
(info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
|
||||
!(info->dqi_flags & DQF_ROOT_SQUASH));
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
enum quota_msg {
|
||||
SOFTWARN, /* Softlimit reached */
|
||||
SOFTLONGWARN, /* Grace time expired */
|
||||
HARDWARN, /* Hardlimit reached */
|
||||
|
||||
HARDBELOW, /* Usage got below inode hardlimit */
|
||||
SOFTBELOW, /* Usage got below inode softlimit */
|
||||
};
|
||||
|
||||
static int quota_nl[][Q_COUNTERS] = {
|
||||
[HARDWARN][Q_SPC] = QUOTA_NL_BHARDWARN,
|
||||
[SOFTLONGWARN][Q_SPC] = QUOTA_NL_BSOFTLONGWARN,
|
||||
[SOFTWARN][Q_SPC] = QUOTA_NL_BSOFTWARN,
|
||||
[HARDBELOW][Q_SPC] = QUOTA_NL_BHARDBELOW,
|
||||
[SOFTBELOW][Q_SPC] = QUOTA_NL_BSOFTBELOW,
|
||||
|
||||
[HARDWARN][Q_INO] = QUOTA_NL_IHARDWARN,
|
||||
[SOFTLONGWARN][Q_INO] = QUOTA_NL_ISOFTLONGWARN,
|
||||
[SOFTWARN][Q_INO] = QUOTA_NL_ISOFTWARN,
|
||||
[HARDBELOW][Q_INO] = QUOTA_NL_IHARDBELOW,
|
||||
[SOFTBELOW][Q_INO] = QUOTA_NL_ISOFTBELOW,
|
||||
};
|
||||
|
||||
struct quota_msgs {
|
||||
u8 nr;
|
||||
struct {
|
||||
u8 qtype;
|
||||
u8 msg;
|
||||
} m[QTYP_NR * Q_COUNTERS];
|
||||
};
|
||||
|
||||
static void prepare_msg(unsigned qtype,
|
||||
enum quota_counters counter,
|
||||
struct quota_msgs *msgs,
|
||||
enum quota_msg msg_type)
|
||||
{
|
||||
BUG_ON(msgs->nr >= ARRAY_SIZE(msgs->m));
|
||||
|
||||
msgs->m[msgs->nr].qtype = qtype;
|
||||
msgs->m[msgs->nr].msg = quota_nl[msg_type][counter];
|
||||
msgs->nr++;
|
||||
}
|
||||
|
||||
static void prepare_warning(struct memquota_counter *qc,
|
||||
unsigned qtype,
|
||||
enum quota_counters counter,
|
||||
struct quota_msgs *msgs,
|
||||
enum quota_msg msg_type)
|
||||
{
|
||||
if (qc->warning_issued & (1 << msg_type))
|
||||
return;
|
||||
|
||||
prepare_msg(qtype, counter, msgs, msg_type);
|
||||
}
|
||||
|
||||
static void flush_warnings(struct bch_qid qid,
|
||||
struct super_block *sb,
|
||||
struct quota_msgs *msgs)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < msgs->nr; i++)
|
||||
quota_send_warning(make_kqid(&init_user_ns, msgs->m[i].qtype, qid.q[i]),
|
||||
sb->s_dev, msgs->m[i].msg);
|
||||
}
|
||||
|
||||
static int bch2_quota_check_limit(struct bch_fs *c,
|
||||
unsigned qtype,
|
||||
struct bch_memquota *mq,
|
||||
struct quota_msgs *msgs,
|
||||
enum quota_counters counter,
|
||||
s64 v,
|
||||
enum quota_acct_mode mode)
|
||||
{
|
||||
struct bch_memquota_type *q = &c->quotas[qtype];
|
||||
struct memquota_counter *qc = &mq->c[counter];
|
||||
u64 n = qc->v + v;
|
||||
|
||||
BUG_ON((s64) n < 0);
|
||||
|
||||
if (mode == BCH_QUOTA_NOCHECK)
|
||||
return 0;
|
||||
|
||||
if (v <= 0) {
|
||||
if (n < qc->hardlimit &&
|
||||
(qc->warning_issued & (1 << HARDWARN))) {
|
||||
qc->warning_issued &= ~(1 << HARDWARN);
|
||||
prepare_msg(qtype, counter, msgs, HARDBELOW);
|
||||
}
|
||||
|
||||
if (n < qc->softlimit &&
|
||||
(qc->warning_issued & (1 << SOFTWARN))) {
|
||||
qc->warning_issued &= ~(1 << SOFTWARN);
|
||||
prepare_msg(qtype, counter, msgs, SOFTBELOW);
|
||||
}
|
||||
|
||||
qc->warning_issued = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (qc->hardlimit &&
|
||||
qc->hardlimit < n &&
|
||||
!ignore_hardlimit(q)) {
|
||||
if (mode == BCH_QUOTA_PREALLOC)
|
||||
return -EDQUOT;
|
||||
|
||||
prepare_warning(qc, qtype, counter, msgs, HARDWARN);
|
||||
}
|
||||
|
||||
if (qc->softlimit &&
|
||||
qc->softlimit < n &&
|
||||
qc->timer &&
|
||||
ktime_get_real_seconds() >= qc->timer &&
|
||||
!ignore_hardlimit(q)) {
|
||||
if (mode == BCH_QUOTA_PREALLOC)
|
||||
return -EDQUOT;
|
||||
|
||||
prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN);
|
||||
}
|
||||
|
||||
if (qc->softlimit &&
|
||||
qc->softlimit < n &&
|
||||
qc->timer == 0) {
|
||||
if (mode == BCH_QUOTA_PREALLOC)
|
||||
return -EDQUOT;
|
||||
|
||||
prepare_warning(qc, qtype, counter, msgs, SOFTWARN);
|
||||
|
||||
/* XXX is this the right one? */
|
||||
qc->timer = ktime_get_real_seconds() +
|
||||
q->limits[counter].warnlimit;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_quota_acct(struct bch_fs *c, struct bch_qid qid,
|
||||
enum quota_counters counter, s64 v,
|
||||
enum quota_acct_mode mode)
|
||||
{
|
||||
unsigned qtypes = enabled_qtypes(c);
|
||||
struct bch_memquota_type *q;
|
||||
struct bch_memquota *mq[QTYP_NR];
|
||||
struct quota_msgs msgs;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
memset(&msgs, 0, sizeof(msgs));
|
||||
|
||||
for_each_set_qtype(c, i, q, qtypes)
|
||||
mutex_lock(&q->lock);
|
||||
|
||||
for_each_set_qtype(c, i, q, qtypes) {
|
||||
mq[i] = genradix_ptr_alloc(&q->table, qid.q[i], GFP_NOFS);
|
||||
if (!mq[i]) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_quota_check_limit(c, i, mq[i], &msgs, counter, v, mode);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
for_each_set_qtype(c, i, q, qtypes)
|
||||
mq[i]->c[counter].v += v;
|
||||
err:
|
||||
for_each_set_qtype(c, i, q, qtypes)
|
||||
mutex_unlock(&q->lock);
|
||||
|
||||
flush_warnings(qid, c->vfs_sb, &msgs);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __bch2_quota_transfer(struct bch_memquota *src_q,
|
||||
struct bch_memquota *dst_q,
|
||||
enum quota_counters counter, s64 v)
|
||||
{
|
||||
BUG_ON(v > src_q->c[counter].v);
|
||||
BUG_ON(v + dst_q->c[counter].v < v);
|
||||
|
||||
src_q->c[counter].v -= v;
|
||||
dst_q->c[counter].v += v;
|
||||
}
|
||||
|
||||
int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes,
|
||||
struct bch_qid dst,
|
||||
struct bch_qid src, u64 space)
|
||||
{
|
||||
struct bch_memquota_type *q;
|
||||
struct bch_memquota *src_q[3], *dst_q[3];
|
||||
struct quota_msgs msgs;
|
||||
unsigned i;
|
||||
int ret = 0;
|
||||
|
||||
qtypes &= enabled_qtypes(c);
|
||||
|
||||
memset(&msgs, 0, sizeof(msgs));
|
||||
|
||||
for_each_set_qtype(c, i, q, qtypes)
|
||||
mutex_lock(&q->lock);
|
||||
|
||||
for_each_set_qtype(c, i, q, qtypes) {
|
||||
src_q[i] = genradix_ptr_alloc(&q->table, src.q[i], GFP_NOFS);
|
||||
dst_q[i] = genradix_ptr_alloc(&q->table, dst.q[i], GFP_NOFS);
|
||||
|
||||
if (!src_q[i] || !dst_q[i]) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC,
|
||||
dst_q[i]->c[Q_SPC].v + space,
|
||||
BCH_QUOTA_PREALLOC);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO,
|
||||
dst_q[i]->c[Q_INO].v + 1,
|
||||
BCH_QUOTA_PREALLOC);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
for_each_set_qtype(c, i, q, qtypes) {
|
||||
__bch2_quota_transfer(src_q[i], dst_q[i], Q_SPC, space);
|
||||
__bch2_quota_transfer(src_q[i], dst_q[i], Q_INO, 1);
|
||||
}
|
||||
|
||||
err:
|
||||
for_each_set_qtype(c, i, q, qtypes)
|
||||
mutex_unlock(&q->lock);
|
||||
|
||||
flush_warnings(dst, c->vfs_sb, &msgs);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_quota dq;
|
||||
struct bch_memquota_type *q;
|
||||
struct bch_memquota *mq;
|
||||
unsigned i;
|
||||
|
||||
BUG_ON(k.k->p.inode >= QTYP_NR);
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_QUOTA:
|
||||
dq = bkey_s_c_to_quota(k);
|
||||
q = &c->quotas[k.k->p.inode];
|
||||
|
||||
mutex_lock(&q->lock);
|
||||
mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL);
|
||||
if (!mq) {
|
||||
mutex_unlock(&q->lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < Q_COUNTERS; i++) {
|
||||
mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit);
|
||||
mq->c[i].softlimit = le64_to_cpu(dq.v->c[i].softlimit);
|
||||
}
|
||||
|
||||
mutex_unlock(&q->lock);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
int ret = 0;
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_QUOTAS, POS(type, 0),
|
||||
BTREE_ITER_PREFETCH, k) {
|
||||
if (k.k->p.inode != type)
|
||||
break;
|
||||
|
||||
ret = __bch2_quota_set(c, k);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||
}
|
||||
|
||||
void bch2_fs_quota_exit(struct bch_fs *c)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
|
||||
genradix_free(&c->quotas[i].table);
|
||||
}
|
||||
|
||||
void bch2_fs_quota_init(struct bch_fs *c)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(c->quotas); i++)
|
||||
mutex_init(&c->quotas[i].lock);
|
||||
}
|
||||
|
||||
static void bch2_sb_quota_read(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_quota *sb_quota;
|
||||
unsigned i, j;
|
||||
|
||||
sb_quota = bch2_sb_get_quota(c->disk_sb);
|
||||
if (!sb_quota)
|
||||
return;
|
||||
|
||||
for (i = 0; i < QTYP_NR; i++) {
|
||||
struct bch_memquota_type *q = &c->quotas[i];
|
||||
|
||||
for (j = 0; j < Q_COUNTERS; j++) {
|
||||
q->limits[j].timelimit =
|
||||
le32_to_cpu(sb_quota->q[i].c[j].timelimit);
|
||||
q->limits[j].warnlimit =
|
||||
le32_to_cpu(sb_quota->q[i].c[j].warnlimit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int bch2_fs_quota_read(struct bch_fs *c)
|
||||
{
|
||||
unsigned i, qtypes = enabled_qtypes(c);
|
||||
struct bch_memquota_type *q;
|
||||
struct btree_iter iter;
|
||||
struct bch_inode_unpacked u;
|
||||
struct bkey_s_c k;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
bch2_sb_quota_read(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
for_each_set_qtype(c, i, q, qtypes) {
|
||||
ret = bch2_quota_init_type(c, i);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k) {
|
||||
switch (k.k->type) {
|
||||
case BCH_INODE_FS:
|
||||
ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors,
|
||||
BCH_QUOTA_NOCHECK);
|
||||
bch2_quota_acct(c, bch_qid(&u), Q_INO, 1,
|
||||
BCH_QUOTA_NOCHECK);
|
||||
}
|
||||
}
|
||||
return bch2_btree_iter_unlock(&iter) ?: ret;
|
||||
}
|
||||
|
||||
/* Enable/disable/delete quotas for an entire filesystem: */
|
||||
|
||||
static int bch2_quota_enable(struct super_block *sb, unsigned uflags)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
/* Accounting must be enabled at mount time: */
|
||||
if (uflags & (FS_QUOTA_UDQ_ACCT|FS_QUOTA_GDQ_ACCT|FS_QUOTA_PDQ_ACCT))
|
||||
return -EINVAL;
|
||||
|
||||
/* Can't enable enforcement without accounting: */
|
||||
if ((uflags & FS_QUOTA_UDQ_ENFD) && !c->opts.usrquota)
|
||||
return -EINVAL;
|
||||
|
||||
if ((uflags & FS_QUOTA_GDQ_ENFD) && !c->opts.grpquota)
|
||||
return -EINVAL;
|
||||
|
||||
if (uflags & FS_QUOTA_PDQ_ENFD)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (uflags & FS_QUOTA_UDQ_ENFD)
|
||||
SET_BCH_SB_USRQUOTA(c->disk_sb, true);
|
||||
|
||||
if (uflags & FS_QUOTA_GDQ_ENFD)
|
||||
SET_BCH_SB_GRPQUOTA(c->disk_sb, true);
|
||||
#if 0
|
||||
if (uflags & FS_QUOTA_PDQ_ENFD)
|
||||
SET_BCH_SB_PRJQUOTA(c->disk_sb, true);
|
||||
#endif
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_quota_disable(struct super_block *sb, unsigned uflags)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (uflags & FS_QUOTA_UDQ_ENFD)
|
||||
SET_BCH_SB_USRQUOTA(c->disk_sb, false);
|
||||
|
||||
if (uflags & FS_QUOTA_GDQ_ENFD)
|
||||
SET_BCH_SB_GRPQUOTA(c->disk_sb, false);
|
||||
|
||||
if (uflags & FS_QUOTA_PDQ_ENFD)
|
||||
SET_BCH_SB_PRJQUOTA(c->disk_sb, false);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_quota_remove(struct super_block *sb, unsigned uflags)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
int ret;
|
||||
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
if (uflags & FS_USER_QUOTA) {
|
||||
if (c->opts.usrquota)
|
||||
return -EINVAL;
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
|
||||
POS(QTYP_USR, 0),
|
||||
POS(QTYP_USR + 1, 0),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (uflags & FS_GROUP_QUOTA) {
|
||||
if (c->opts.grpquota)
|
||||
return -EINVAL;
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
|
||||
POS(QTYP_GRP, 0),
|
||||
POS(QTYP_GRP + 1, 0),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (uflags & FS_PROJ_QUOTA) {
|
||||
if (c->opts.prjquota)
|
||||
return -EINVAL;
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_QUOTAS,
|
||||
POS(QTYP_PRJ, 0),
|
||||
POS(QTYP_PRJ + 1, 0),
|
||||
ZERO_VERSION, NULL, NULL, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return quota status information, such as enforcements, quota file inode
|
||||
* numbers etc.
|
||||
*/
|
||||
static int bch2_quota_get_state(struct super_block *sb, struct qc_state *state)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
unsigned qtypes = enabled_qtypes(c);
|
||||
unsigned i;
|
||||
|
||||
memset(state, 0, sizeof(*state));
|
||||
|
||||
for (i = 0; i < QTYP_NR; i++) {
|
||||
state->s_state[i].flags |= QCI_SYSFILE;
|
||||
|
||||
if (!(qtypes & (1 << i)))
|
||||
continue;
|
||||
|
||||
state->s_state[i].flags |= QCI_ACCT_ENABLED;
|
||||
|
||||
state->s_state[i].spc_timelimit = c->quotas[i].limits[Q_SPC].timelimit;
|
||||
state->s_state[i].spc_warnlimit = c->quotas[i].limits[Q_SPC].warnlimit;
|
||||
|
||||
state->s_state[i].ino_timelimit = c->quotas[i].limits[Q_INO].timelimit;
|
||||
state->s_state[i].ino_warnlimit = c->quotas[i].limits[Q_INO].warnlimit;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust quota timers & warnings
|
||||
*/
|
||||
static int bch2_quota_set_info(struct super_block *sb, int type,
|
||||
struct qc_info *info)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
struct bch_sb_field_quota *sb_quota;
|
||||
struct bch_memquota_type *q;
|
||||
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
if (type >= QTYP_NR)
|
||||
return -EINVAL;
|
||||
|
||||
if (!((1 << type) & enabled_qtypes(c)))
|
||||
return -ESRCH;
|
||||
|
||||
if (info->i_fieldmask &
|
||||
~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS))
|
||||
return -EINVAL;
|
||||
|
||||
q = &c->quotas[type];
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
sb_quota = bch2_sb_get_quota(c->disk_sb);
|
||||
if (!sb_quota) {
|
||||
sb_quota = bch2_fs_sb_resize_quota(c, sizeof(*sb_quota) / sizeof(u64));
|
||||
if (!sb_quota)
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
if (info->i_fieldmask & QC_SPC_TIMER)
|
||||
sb_quota->q[type].c[Q_SPC].timelimit =
|
||||
cpu_to_le32(info->i_spc_timelimit);
|
||||
|
||||
if (info->i_fieldmask & QC_SPC_WARNS)
|
||||
sb_quota->q[type].c[Q_SPC].warnlimit =
|
||||
cpu_to_le32(info->i_spc_warnlimit);
|
||||
|
||||
if (info->i_fieldmask & QC_INO_TIMER)
|
||||
sb_quota->q[type].c[Q_INO].timelimit =
|
||||
cpu_to_le32(info->i_ino_timelimit);
|
||||
|
||||
if (info->i_fieldmask & QC_INO_WARNS)
|
||||
sb_quota->q[type].c[Q_INO].warnlimit =
|
||||
cpu_to_le32(info->i_ino_warnlimit);
|
||||
|
||||
bch2_sb_quota_read(c);
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get/set individual quotas: */
|
||||
|
||||
static void __bch2_quota_get(struct qc_dqblk *dst, struct bch_memquota *src)
|
||||
{
|
||||
dst->d_space = src->c[Q_SPC].v << 9;
|
||||
dst->d_spc_hardlimit = src->c[Q_SPC].hardlimit << 9;
|
||||
dst->d_spc_softlimit = src->c[Q_SPC].softlimit << 9;
|
||||
dst->d_spc_timer = src->c[Q_SPC].timer;
|
||||
dst->d_spc_warns = src->c[Q_SPC].warns;
|
||||
|
||||
dst->d_ino_count = src->c[Q_INO].v;
|
||||
dst->d_ino_hardlimit = src->c[Q_INO].hardlimit;
|
||||
dst->d_ino_softlimit = src->c[Q_INO].softlimit;
|
||||
dst->d_ino_timer = src->c[Q_INO].timer;
|
||||
dst->d_ino_warns = src->c[Q_INO].warns;
|
||||
}
|
||||
|
||||
static int bch2_get_quota(struct super_block *sb, struct kqid kqid,
|
||||
struct qc_dqblk *qdq)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
struct bch_memquota_type *q = &c->quotas[kqid.type];
|
||||
qid_t qid = from_kqid(&init_user_ns, kqid);
|
||||
struct bch_memquota *mq;
|
||||
|
||||
memset(qdq, 0, sizeof(*qdq));
|
||||
|
||||
mutex_lock(&q->lock);
|
||||
mq = genradix_ptr(&q->table, qid);
|
||||
if (mq)
|
||||
__bch2_quota_get(qdq, mq);
|
||||
mutex_unlock(&q->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
|
||||
struct qc_dqblk *qdq)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
struct bch_memquota_type *q = &c->quotas[kqid->type];
|
||||
qid_t qid = from_kqid(&init_user_ns, *kqid);
|
||||
struct genradix_iter iter = genradix_iter_init(&q->table, qid);
|
||||
struct bch_memquota *mq;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&q->lock);
|
||||
|
||||
while ((mq = genradix_iter_peek(&iter, &q->table))) {
|
||||
if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
|
||||
__bch2_quota_get(qdq, mq);
|
||||
*kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
|
||||
goto found;
|
||||
}
|
||||
|
||||
genradix_iter_advance(&iter, &q->table);
|
||||
}
|
||||
|
||||
ret = -ENOENT;
|
||||
found:
|
||||
mutex_unlock(&q->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_set_quota(struct super_block *sb, struct kqid qid,
|
||||
struct qc_dqblk *qdq)
|
||||
{
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
struct bkey_i_quota new_quota;
|
||||
int ret;
|
||||
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return -EROFS;
|
||||
|
||||
bkey_quota_init(&new_quota.k_i);
|
||||
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
|
||||
|
||||
bch2_btree_iter_init(&iter, c, BTREE_ID_QUOTAS, new_quota.k.p,
|
||||
BTREE_ITER_WITH_HOLES|BTREE_ITER_INTENT);
|
||||
k = bch2_btree_iter_peek_with_holes(&iter);
|
||||
|
||||
ret = btree_iter_err(k);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
switch (k.k->type) {
|
||||
case BCH_QUOTA:
|
||||
new_quota.v = *bkey_s_c_to_quota(k).v;
|
||||
break;
|
||||
}
|
||||
|
||||
if (qdq->d_fieldmask & QC_SPC_SOFT)
|
||||
new_quota.v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit);
|
||||
if (qdq->d_fieldmask & QC_SPC_HARD)
|
||||
new_quota.v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit);
|
||||
|
||||
if (qdq->d_fieldmask & QC_INO_SOFT)
|
||||
new_quota.v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_spc_softlimit);
|
||||
if (qdq->d_fieldmask & QC_INO_HARD)
|
||||
new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit);
|
||||
|
||||
ret = bch2_btree_insert_at(c, NULL, NULL, NULL, 0,
|
||||
BTREE_INSERT_ENTRY(&iter, &new_quota.k_i));
|
||||
bch2_btree_iter_unlock(&iter);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct quotactl_ops bch2_quotactl_operations = {
|
||||
.quota_enable = bch2_quota_enable,
|
||||
.quota_disable = bch2_quota_disable,
|
||||
.rm_xquota = bch2_quota_remove,
|
||||
|
||||
.get_state = bch2_quota_get_state,
|
||||
.set_info = bch2_quota_set_info,
|
||||
|
||||
.get_dqblk = bch2_get_quota,
|
||||
.get_nextdqblk = bch2_get_next_quota,
|
||||
.set_dqblk = bch2_set_quota,
|
||||
};
|
||||
|
||||
#endif /* CONFIG_BCACHEFS_QUOTA */
|
48
libbcachefs/quota.h
Normal file
48
libbcachefs/quota.h
Normal file
@ -0,0 +1,48 @@
|
||||
#ifndef _BCACHEFS_QUOTA_H
|
||||
#define _BCACHEFS_QUOTA_H
|
||||
|
||||
#include "quota_types.h"
|
||||
|
||||
extern const struct bkey_ops bch2_bkey_quota_ops;
|
||||
|
||||
enum quota_acct_mode {
|
||||
BCH_QUOTA_PREALLOC,
|
||||
BCH_QUOTA_WARN,
|
||||
BCH_QUOTA_NOCHECK,
|
||||
};
|
||||
|
||||
static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u)
|
||||
{
|
||||
return (struct bch_qid) {
|
||||
.q[QTYP_USR] = u->bi_uid,
|
||||
.q[QTYP_GRP] = u->bi_gid,
|
||||
.q[QTYP_PRJ] = u->bi_project,
|
||||
};
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BCACHEFS_QUOTA
|
||||
|
||||
int bch2_quota_acct(struct bch_fs *, struct bch_qid, enum quota_counters,
|
||||
s64, enum quota_acct_mode);
|
||||
|
||||
int bch2_quota_transfer(struct bch_fs *, unsigned, struct bch_qid,
|
||||
struct bch_qid, u64);
|
||||
|
||||
void bch2_fs_quota_exit(struct bch_fs *);
|
||||
void bch2_fs_quota_init(struct bch_fs *);
|
||||
int bch2_fs_quota_read(struct bch_fs *);
|
||||
|
||||
extern const struct quotactl_ops bch2_quotactl_operations;
|
||||
|
||||
#else
|
||||
|
||||
#define bch2_quota_acct(_c, _uid, _gid, _counter, _v) (0)
|
||||
#define bch2_quota_transfer(_c, _type, _src, _dst, _v) (0)
|
||||
|
||||
static inline void bch2_fs_quota_exit(struct bch_fs *c) {}
|
||||
static inline void bch2_fs_quota_init(struct bch_fs *c) {}
|
||||
static inline int bch2_fs_quota_read(struct bch_fs *c) { return 0; }
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _BCACHEFS_QUOTA_H */
|
36
libbcachefs/quota_types.h
Normal file
36
libbcachefs/quota_types.h
Normal file
@ -0,0 +1,36 @@
|
||||
#ifndef _BCACHEFS_QUOTA_TYPES_H
|
||||
#define _BCACHEFS_QUOTA_TYPES_H
|
||||
|
||||
#include <linux/generic-radix-tree.h>
|
||||
|
||||
struct bch_qid {
|
||||
u32 q[QTYP_NR];
|
||||
};
|
||||
|
||||
struct memquota_counter {
|
||||
u64 v;
|
||||
u64 hardlimit;
|
||||
u64 softlimit;
|
||||
s64 timer;
|
||||
int warns;
|
||||
int warning_issued;
|
||||
};
|
||||
|
||||
struct bch_memquota {
|
||||
struct memquota_counter c[Q_COUNTERS];
|
||||
};
|
||||
|
||||
typedef GENRADIX(struct bch_memquota) bch_memquota_table;
|
||||
|
||||
struct quota_limit {
|
||||
u32 timelimit;
|
||||
u32 warnlimit;
|
||||
};
|
||||
|
||||
struct bch_memquota_type {
|
||||
struct quota_limit limits[Q_COUNTERS];
|
||||
bch_memquota_table table;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_QUOTA_TYPES_H */
|
@ -330,9 +330,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
|
||||
if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb)))
|
||||
return "Btree node size not a power of two";
|
||||
|
||||
if (BCH_SB_BTREE_NODE_SIZE(sb) > BTREE_NODE_SIZE_MAX)
|
||||
return "Btree node size too large";
|
||||
|
||||
if (BCH_SB_GC_RESERVE(sb) < 5)
|
||||
return "gc reserve percentage too small";
|
||||
|
||||
@ -383,27 +380,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
|
||||
|
||||
/* device open: */
|
||||
|
||||
static const char *bch2_blkdev_open(const char *path, fmode_t mode,
|
||||
void *holder, struct block_device **ret)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
|
||||
*ret = NULL;
|
||||
bdev = blkdev_get_by_path(path, mode, holder);
|
||||
if (bdev == ERR_PTR(-EBUSY))
|
||||
return "device busy";
|
||||
|
||||
if (IS_ERR(bdev))
|
||||
return "failed to open device";
|
||||
|
||||
if (mode & FMODE_WRITE)
|
||||
bdev_get_queue(bdev)->backing_dev_info->capabilities
|
||||
|= BDI_CAP_STABLE_WRITES;
|
||||
|
||||
*ret = bdev;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void bch2_sb_update(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb *src = c->disk_sb;
|
||||
@ -555,44 +531,55 @@ reread:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *bch2_read_super(const char *path,
|
||||
struct bch_opts opts,
|
||||
struct bch_sb_handle *ret)
|
||||
int bch2_read_super(const char *path, struct bch_opts *opts,
|
||||
struct bch_sb_handle *sb)
|
||||
{
|
||||
u64 offset = opt_get(opts, sb);
|
||||
u64 offset = opt_get(*opts, sb);
|
||||
struct bch_sb_layout layout;
|
||||
const char *err;
|
||||
unsigned i;
|
||||
__le64 *i;
|
||||
int ret;
|
||||
|
||||
memset(ret, 0, sizeof(*ret));
|
||||
ret->mode = FMODE_READ;
|
||||
memset(sb, 0, sizeof(*sb));
|
||||
sb->mode = FMODE_READ;
|
||||
|
||||
if (!opt_get(opts, noexcl))
|
||||
ret->mode |= FMODE_EXCL;
|
||||
if (!opt_get(*opts, noexcl))
|
||||
sb->mode |= FMODE_EXCL;
|
||||
|
||||
if (!opt_get(opts, nochanges))
|
||||
ret->mode |= FMODE_WRITE;
|
||||
if (!opt_get(*opts, nochanges))
|
||||
sb->mode |= FMODE_WRITE;
|
||||
|
||||
err = bch2_blkdev_open(path, ret->mode, ret, &ret->bdev);
|
||||
if (err)
|
||||
return err;
|
||||
sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
|
||||
if (IS_ERR(sb->bdev) &&
|
||||
PTR_ERR(sb->bdev) == -EACCES &&
|
||||
opt_get(*opts, read_only)) {
|
||||
sb->mode &= ~FMODE_WRITE;
|
||||
|
||||
sb->bdev = blkdev_get_by_path(path, sb->mode, sb);
|
||||
if (!IS_ERR(sb->bdev))
|
||||
opt_set(*opts, nochanges, true);
|
||||
}
|
||||
|
||||
if (IS_ERR(sb->bdev))
|
||||
return PTR_ERR(sb->bdev);
|
||||
|
||||
err = "cannot allocate memory";
|
||||
if (__bch2_super_realloc(ret, 0))
|
||||
ret = __bch2_super_realloc(sb, 0);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = -EFAULT;
|
||||
err = "dynamic fault";
|
||||
if (bch2_fs_init_fault("read_super"))
|
||||
goto err;
|
||||
|
||||
err = read_one_super(ret, offset);
|
||||
ret = -EINVAL;
|
||||
err = read_one_super(sb, offset);
|
||||
if (!err)
|
||||
goto got_super;
|
||||
|
||||
if (offset != BCH_SB_SECTOR) {
|
||||
pr_err("error reading superblock: %s", err);
|
||||
if (opt_defined(*opts, sb))
|
||||
goto err;
|
||||
}
|
||||
|
||||
pr_err("error reading default superblock: %s", err);
|
||||
|
||||
@ -600,53 +587,57 @@ const char *bch2_read_super(const char *path,
|
||||
* Error reading primary superblock - read location of backup
|
||||
* superblocks:
|
||||
*/
|
||||
bio_reset(ret->bio);
|
||||
ret->bio->bi_bdev = ret->bdev;
|
||||
ret->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
|
||||
ret->bio->bi_iter.bi_size = sizeof(struct bch_sb_layout);
|
||||
bio_set_op_attrs(ret->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
|
||||
bio_reset(sb->bio);
|
||||
sb->bio->bi_bdev = sb->bdev;
|
||||
sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
|
||||
sb->bio->bi_iter.bi_size = sizeof(struct bch_sb_layout);
|
||||
bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
|
||||
/*
|
||||
* use sb buffer to read layout, since sb buffer is page aligned but
|
||||
* layout won't be:
|
||||
*/
|
||||
bch2_bio_map(ret->bio, ret->sb);
|
||||
bch2_bio_map(sb->bio, sb->sb);
|
||||
|
||||
err = "IO error";
|
||||
if (submit_bio_wait(ret->bio))
|
||||
if (submit_bio_wait(sb->bio))
|
||||
goto err;
|
||||
|
||||
memcpy(&layout, ret->sb, sizeof(layout));
|
||||
memcpy(&layout, sb->sb, sizeof(layout));
|
||||
err = validate_sb_layout(&layout);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < layout.nr_superblocks; i++) {
|
||||
u64 offset = le64_to_cpu(layout.sb_offset[i]);
|
||||
for (i = layout.sb_offset;
|
||||
i < layout.sb_offset + layout.nr_superblocks; i++) {
|
||||
offset = le64_to_cpu(*i);
|
||||
|
||||
if (offset == BCH_SB_SECTOR)
|
||||
if (offset == opt_get(*opts, sb))
|
||||
continue;
|
||||
|
||||
err = read_one_super(ret, offset);
|
||||
err = read_one_super(sb, offset);
|
||||
if (!err)
|
||||
goto got_super;
|
||||
}
|
||||
goto err;
|
||||
got_super:
|
||||
pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
|
||||
le64_to_cpu(ret->sb->version),
|
||||
le64_to_cpu(ret->sb->flags[0]),
|
||||
le64_to_cpu(ret->sb->seq),
|
||||
le32_to_cpu(ret->sb->u64s));
|
||||
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
|
||||
got_super:
|
||||
err = "Superblock block size smaller than device block size";
|
||||
if (le16_to_cpu(ret->sb->block_size) << 9 <
|
||||
bdev_logical_block_size(ret->bdev))
|
||||
ret = -EINVAL;
|
||||
if (le16_to_cpu(sb->sb->block_size) << 9 <
|
||||
bdev_logical_block_size(sb->bdev))
|
||||
goto err;
|
||||
|
||||
return NULL;
|
||||
if (sb->mode & FMODE_WRITE)
|
||||
bdev_get_queue(sb->bdev)->backing_dev_info->capabilities
|
||||
|= BDI_CAP_STABLE_WRITES;
|
||||
|
||||
return 0;
|
||||
err:
|
||||
bch2_free_super(ret);
|
||||
return err;
|
||||
bch2_free_super(sb);
|
||||
pr_err("error reading superblock: %s", err);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* write superblock: */
|
||||
@ -1108,13 +1099,20 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int __bch2_check_mark_super(struct bch_fs *c,
|
||||
struct bch_replicas_cpu_entry search,
|
||||
unsigned max_dev)
|
||||
int bch2_check_mark_super(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
struct bch_replicas_cpu *r, *gc_r;
|
||||
unsigned max_dev;
|
||||
bool marked;
|
||||
|
||||
if (!devs.nr)
|
||||
return 0;
|
||||
|
||||
devlist_to_replicas(devs, data_type, &search, &max_dev);
|
||||
|
||||
rcu_read_lock();
|
||||
r = rcu_dereference(c->replicas);
|
||||
gc_r = rcu_dereference(c->replicas_gc);
|
||||
@ -1126,32 +1124,6 @@ static inline int __bch2_check_mark_super(struct bch_fs *c,
|
||||
: bch2_check_mark_super_slowpath(c, search, max_dev);
|
||||
}
|
||||
|
||||
int bch2_check_mark_super(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
|
||||
if (!bkey_to_replicas(e, data_type, &search, &max_dev))
|
||||
return 0;
|
||||
|
||||
return __bch2_check_mark_super(c, search, max_dev);
|
||||
}
|
||||
|
||||
int bch2_check_mark_super_devlist(struct bch_fs *c,
|
||||
struct bch_devs_list *devs,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
|
||||
if (!devs->nr)
|
||||
return 0;
|
||||
|
||||
devlist_to_replicas(*devs, data_type, &search, &max_dev);
|
||||
return __bch2_check_mark_super(c, search, max_dev);
|
||||
}
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *c, int err)
|
||||
{
|
||||
struct bch_replicas_cpu *new_r, *old_r;
|
||||
@ -1435,12 +1407,19 @@ int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t
|
||||
|
||||
/* Query replicas: */
|
||||
|
||||
static bool __bch2_sb_has_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_cpu_entry search,
|
||||
unsigned max_dev)
|
||||
bool bch2_sb_has_replicas(struct bch_fs *c,
|
||||
enum bch_data_type data_type,
|
||||
struct bch_devs_list devs)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
bool ret;
|
||||
|
||||
if (!devs.nr)
|
||||
return true;
|
||||
|
||||
devlist_to_replicas(devs, data_type, &search, &max_dev);
|
||||
|
||||
rcu_read_lock();
|
||||
ret = replicas_has_entry(rcu_dereference(c->replicas),
|
||||
search, max_dev);
|
||||
@ -1449,31 +1428,6 @@ static bool __bch2_sb_has_replicas(struct bch_fs *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool bch2_sb_has_replicas(struct bch_fs *c, struct bkey_s_c_extent e,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
|
||||
if (!bkey_to_replicas(e, data_type, &search, &max_dev))
|
||||
return true;
|
||||
|
||||
return __bch2_sb_has_replicas(c, search, max_dev);
|
||||
}
|
||||
|
||||
bool bch2_sb_has_replicas_devlist(struct bch_fs *c, struct bch_devs_list *devs,
|
||||
enum bch_data_type data_type)
|
||||
{
|
||||
struct bch_replicas_cpu_entry search;
|
||||
unsigned max_dev;
|
||||
|
||||
if (!devs->nr)
|
||||
return true;
|
||||
|
||||
devlist_to_replicas(*devs, data_type, &search, &max_dev);
|
||||
return __bch2_sb_has_replicas(c, search, max_dev);
|
||||
}
|
||||
|
||||
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
struct bch_devs_mask online_devs)
|
||||
{
|
||||
@ -1579,12 +1533,23 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
|
||||
goto out;
|
||||
|
||||
for_each_cpu_replicas_entry(r, e)
|
||||
if (replicas_test_dev(e, ca->dev_idx)) {
|
||||
if (replicas_test_dev(e, ca->dev_idx))
|
||||
ret |= 1 << e->data_type;
|
||||
break;
|
||||
}
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Quotas: */
|
||||
|
||||
static const char *bch2_sb_validate_quota(struct bch_sb *sb,
|
||||
struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_quota *q = field_to_type(f, quota);
|
||||
|
||||
if (vstruct_bytes(&q->field) != sizeof(*q))
|
||||
return "invalid field quota: wrong size";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -94,8 +94,7 @@ int bch2_super_realloc(struct bch_sb_handle *, unsigned);
|
||||
|
||||
const char *bch2_sb_validate(struct bch_sb_handle *);
|
||||
|
||||
const char *bch2_read_super(const char *, struct bch_opts,
|
||||
struct bch_sb_handle *);
|
||||
int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
|
||||
void bch2_write_super(struct bch_fs *);
|
||||
|
||||
/* BCH_SB_FIELD_journal: */
|
||||
@ -139,14 +138,10 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
|
||||
|
||||
/* BCH_SB_FIELD_replicas: */
|
||||
|
||||
bool bch2_sb_has_replicas(struct bch_fs *, struct bkey_s_c_extent,
|
||||
enum bch_data_type);
|
||||
bool bch2_sb_has_replicas_devlist(struct bch_fs *, struct bch_devs_list *,
|
||||
enum bch_data_type);
|
||||
int bch2_check_mark_super(struct bch_fs *, struct bkey_s_c_extent,
|
||||
enum bch_data_type);
|
||||
int bch2_check_mark_super_devlist(struct bch_fs *, struct bch_devs_list *,
|
||||
enum bch_data_type);
|
||||
bool bch2_sb_has_replicas(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
int bch2_check_mark_super(struct bch_fs *, enum bch_data_type,
|
||||
struct bch_devs_list);
|
||||
|
||||
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t);
|
||||
int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *, char *, size_t);
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "move.h"
|
||||
#include "migrate.h"
|
||||
#include "movinggc.h"
|
||||
#include "quota.h"
|
||||
#include "super.h"
|
||||
#include "super-io.h"
|
||||
#include "sysfs.h"
|
||||
@ -214,14 +215,15 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
*/
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
|
||||
if (!bch2_journal_error(&c->journal))
|
||||
bch2_btree_verify_flushed(c);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_stop(ca);
|
||||
|
||||
bch2_fs_journal_stop(&c->journal);
|
||||
|
||||
if (!bch2_journal_error(&c->journal) &&
|
||||
!test_bit(BCH_FS_ERROR, &c->flags))
|
||||
bch2_btree_verify_flushed(c);
|
||||
|
||||
for_each_member_device(ca, c, i)
|
||||
bch2_dev_allocator_remove(c, ca);
|
||||
}
|
||||
@ -366,6 +368,7 @@ err:
|
||||
|
||||
static void bch2_fs_free(struct bch_fs *c)
|
||||
{
|
||||
bch2_fs_quota_exit(c);
|
||||
bch2_fs_fsio_exit(c);
|
||||
bch2_fs_encryption_exit(c);
|
||||
bch2_fs_btree_cache_exit(c);
|
||||
@ -380,7 +383,7 @@ static void bch2_fs_free(struct bch_fs *c)
|
||||
bioset_exit(&c->bio_write);
|
||||
bioset_exit(&c->bio_read_split);
|
||||
bioset_exit(&c->bio_read);
|
||||
bioset_exit(&c->btree_read_bio);
|
||||
bioset_exit(&c->btree_bio);
|
||||
mempool_exit(&c->btree_interior_update_pool);
|
||||
mempool_exit(&c->btree_reserve_pool);
|
||||
mempool_exit(&c->fill_iter);
|
||||
@ -492,6 +495,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
|
||||
bch2_fs_allocator_init(c);
|
||||
bch2_fs_tiering_init(c);
|
||||
bch2_fs_quota_init(c);
|
||||
|
||||
INIT_LIST_HEAD(&c->list);
|
||||
|
||||
@ -561,8 +565,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
|
||||
sizeof(struct btree_update)) ||
|
||||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
|
||||
bioset_init(&c->btree_read_bio, 1,
|
||||
offsetof(struct btree_read_bio, bio),
|
||||
bioset_init(&c->btree_bio, 1,
|
||||
max(offsetof(struct btree_read_bio, bio),
|
||||
offsetof(struct btree_write_bio, wbio.bio)),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
|
||||
BIOSET_NEED_BVECS) ||
|
||||
@ -671,13 +676,10 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
struct bch_dev *ca;
|
||||
LIST_HEAD(journal);
|
||||
struct jset *j;
|
||||
struct closure cl;
|
||||
time64_t now;
|
||||
unsigned i;
|
||||
int ret = -EINVAL;
|
||||
|
||||
closure_init_stack(&cl);
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
BUG_ON(c->state != BCH_FS_STARTING);
|
||||
@ -705,14 +707,14 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
unsigned level;
|
||||
struct bkey_i *k;
|
||||
|
||||
err = "missing btree root";
|
||||
k = bch2_journal_find_btree_root(c, j, i, &level);
|
||||
if (!k && i < BTREE_ID_ALLOC)
|
||||
goto err;
|
||||
|
||||
if (!k)
|
||||
continue;
|
||||
|
||||
err = "invalid btree root pointer";
|
||||
if (IS_ERR(k))
|
||||
goto err;
|
||||
|
||||
err = "error reading btree root";
|
||||
if (bch2_btree_root_read(c, i, k, level)) {
|
||||
if (i != BTREE_ID_ALLOC)
|
||||
@ -722,6 +724,10 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
if (!c->btree_roots[i].b)
|
||||
bch2_btree_root_alloc(c, i);
|
||||
|
||||
err = "error reading allocation information";
|
||||
ret = bch2_alloc_read(c, &journal);
|
||||
if (ret)
|
||||
@ -739,14 +745,6 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
if (c->opts.noreplay)
|
||||
goto recovery_done;
|
||||
|
||||
err = "cannot allocate new btree root";
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
if (!c->btree_roots[i].b &&
|
||||
bch2_btree_root_alloc(c, i, &cl))
|
||||
goto err;
|
||||
|
||||
closure_sync(&cl);
|
||||
|
||||
/*
|
||||
* bch2_journal_start() can't happen sooner, or btree_gc_finish()
|
||||
* will give spurious errors about oldest_gen > bucket_gen -
|
||||
@ -754,12 +752,9 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
*/
|
||||
bch2_journal_start(c);
|
||||
|
||||
err = "error starting allocator thread";
|
||||
for_each_rw_member(ca, c, i)
|
||||
if (bch2_dev_allocator_start(ca)) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
goto err;
|
||||
}
|
||||
err = "error starting allocator";
|
||||
if (bch2_fs_allocator_start(c))
|
||||
goto err;
|
||||
|
||||
bch_verbose(c, "starting journal replay:");
|
||||
err = "journal replay failed";
|
||||
@ -777,6 +772,14 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "fsck done");
|
||||
|
||||
if (c->opts.usrquota || c->opts.grpquota) {
|
||||
bch_verbose(c, "reading quotas:");
|
||||
ret = bch2_fs_quota_read(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "quotas done");
|
||||
}
|
||||
} else {
|
||||
struct bch_inode_unpacked inode;
|
||||
struct bkey_inode_buf packed_inode;
|
||||
@ -784,6 +787,7 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
bch_notice(c, "initializing new filesystem");
|
||||
|
||||
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
|
||||
set_bit(BCH_FS_BRAND_NEW_FS, &c->flags);
|
||||
|
||||
ret = bch2_initial_gc(c, &journal);
|
||||
if (ret)
|
||||
@ -791,15 +795,15 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
|
||||
err = "unable to allocate journal buckets";
|
||||
for_each_rw_member(ca, c, i)
|
||||
if (bch2_dev_journal_alloc(ca)) {
|
||||
if (bch2_dev_journal_alloc(c, ca)) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = "cannot allocate new btree root";
|
||||
clear_bit(BCH_FS_BRAND_NEW_FS, &c->flags);
|
||||
|
||||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
if (bch2_btree_root_alloc(c, i, &cl))
|
||||
goto err;
|
||||
bch2_btree_root_alloc(c, i);
|
||||
|
||||
/*
|
||||
* journal_res_get() will crash if called before this has
|
||||
@ -808,15 +812,9 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
bch2_journal_start(c);
|
||||
bch2_journal_set_replay_done(&c->journal);
|
||||
|
||||
err = "error starting allocator thread";
|
||||
for_each_rw_member(ca, c, i)
|
||||
if (bch2_dev_allocator_start(ca)) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Wait for new btree roots to be written: */
|
||||
closure_sync(&cl);
|
||||
err = "error starting allocator";
|
||||
if (bch2_fs_allocator_start(c))
|
||||
goto err;
|
||||
|
||||
bch2_inode_init(c, &inode, 0, 0,
|
||||
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
|
||||
@ -830,6 +828,12 @@ static const char *__bch2_fs_start(struct bch_fs *c)
|
||||
NULL, NULL, NULL, 0))
|
||||
goto err;
|
||||
|
||||
if (c->opts.usrquota || c->opts.grpquota) {
|
||||
ret = bch2_fs_quota_read(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = "error writing first journal entry";
|
||||
if (bch2_journal_meta(&c->journal))
|
||||
goto err;
|
||||
@ -867,8 +871,6 @@ out:
|
||||
return err;
|
||||
err:
|
||||
fsck_err:
|
||||
closure_sync(&cl);
|
||||
|
||||
switch (ret) {
|
||||
case BCH_FSCK_ERRORS_NOT_FIXED:
|
||||
bch_err(c, "filesystem contains errors: please report this to the developers");
|
||||
@ -1107,6 +1109,8 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
struct bch_dev *ca;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&c->state_lock);
|
||||
|
||||
if (le64_to_cpu(sb->sb->seq) >
|
||||
le64_to_cpu(c->disk_sb->seq))
|
||||
bch2_sb_to_fs(c, sb->sb);
|
||||
@ -1153,7 +1157,9 @@ static int __bch2_dev_online(struct bch_fs *c, struct bch_sb_handle *sb)
|
||||
bdevname(ca->disk_sb.bdev, c->name);
|
||||
bdevname(ca->disk_sb.bdev, ca->name);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
bch2_mark_dev_superblock(c, ca, BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW)
|
||||
bch2_dev_allocator_add(c, ca);
|
||||
@ -1430,17 +1436,18 @@ err:
|
||||
/* Add new device to running filesystem: */
|
||||
int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_sb_handle sb;
|
||||
const char *err;
|
||||
struct bch_dev *ca = NULL;
|
||||
struct bch_sb_field_members *mi, *dev_mi;
|
||||
struct bch_member saved_mi;
|
||||
unsigned dev_idx, nr_devices, u64s;
|
||||
int ret = -EINVAL;
|
||||
int ret;
|
||||
|
||||
err = bch2_read_super(path, bch2_opts_empty(), &sb);
|
||||
if (err)
|
||||
return -EINVAL;
|
||||
ret = bch2_read_super(path, &opts, &sb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
err = bch2_sb_validate(&sb);
|
||||
if (err)
|
||||
@ -1479,14 +1486,14 @@ have_slot:
|
||||
sizeof(struct bch_member) * nr_devices) / sizeof(u64);
|
||||
err = "no space in superblock for member info";
|
||||
|
||||
mi = bch2_fs_sb_resize_members(c, u64s);
|
||||
if (!mi)
|
||||
goto err_unlock;
|
||||
|
||||
dev_mi = bch2_sb_resize_members(&sb, u64s);
|
||||
if (!dev_mi)
|
||||
goto err_unlock;
|
||||
|
||||
mi = bch2_fs_sb_resize_members(c, u64s);
|
||||
if (!mi)
|
||||
goto err_unlock;
|
||||
|
||||
memcpy(dev_mi, mi, u64s * sizeof(u64));
|
||||
dev_mi->members[dev_idx] = saved_mi;
|
||||
|
||||
@ -1499,30 +1506,30 @@ have_slot:
|
||||
c->disk_sb->nr_devices = nr_devices;
|
||||
c->sb.nr_devices = nr_devices;
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
if (bch2_dev_alloc(c, dev_idx)) {
|
||||
err = "cannot allocate memory";
|
||||
ret = -ENOMEM;
|
||||
goto err_unlock;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (__bch2_dev_online(c, &sb)) {
|
||||
err = "bch2_dev_online() error";
|
||||
ret = -ENOMEM;
|
||||
goto err_unlock;
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_write_super(c);
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
ca = bch_dev_locked(c, dev_idx);
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
|
||||
err = "journal alloc failed";
|
||||
if (bch2_dev_journal_alloc(ca))
|
||||
goto err;
|
||||
|
||||
err = __bch2_dev_read_write(c, ca);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
err = "journal alloc failed";
|
||||
if (bch2_dev_journal_alloc(c, ca))
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_unlock(&c->state_lock);
|
||||
@ -1540,16 +1547,20 @@ err:
|
||||
/* Hot add existing device to running filesystem: */
|
||||
int bch2_dev_online(struct bch_fs *c, const char *path)
|
||||
{
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
struct bch_sb_handle sb = { NULL };
|
||||
struct bch_dev *ca;
|
||||
unsigned dev_idx;
|
||||
const char *err;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
err = bch2_read_super(path, bch2_opts_empty(), &sb);
|
||||
if (err)
|
||||
goto err;
|
||||
ret = bch2_read_super(path, &opts, &sb);
|
||||
if (ret) {
|
||||
mutex_unlock(&c->state_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
dev_idx = sb.sb->dev_idx;
|
||||
|
||||
@ -1557,13 +1568,10 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (__bch2_dev_online(c, &sb)) {
|
||||
err = "__bch2_dev_online() error";
|
||||
mutex_unlock(&c->sb_lock);
|
||||
goto err;
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
|
||||
ca = bch_dev_locked(c, dev_idx);
|
||||
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
|
||||
@ -1585,6 +1593,12 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
|
||||
{
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
if (!bch2_dev_is_online(ca)) {
|
||||
bch_err(ca, "Already offline");
|
||||
mutex_unlock(&c->state_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
|
||||
bch_err(ca, "Cannot offline required disk");
|
||||
mutex_unlock(&c->state_lock);
|
||||
@ -1617,9 +1631,19 @@ int bch2_dev_evacuate(struct bch_fs *c, struct bch_dev *ca)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_journal_flush_device(&c->journal, ca->dev_idx);
|
||||
if (ret) {
|
||||
bch_err(ca, "Migrate failed: error %i flushing journal", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
data = bch2_dev_has_data(c, ca);
|
||||
if (data) {
|
||||
bch_err(ca, "Migrate error: data still present (%x)", data);
|
||||
char buf[100];
|
||||
|
||||
bch2_scnprint_flag_list(buf, sizeof(buf),
|
||||
bch2_data_types, data);
|
||||
bch_err(ca, "Migrate failed, still has data (%s)", buf);
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
@ -1670,33 +1694,33 @@ err:
|
||||
|
||||
/* Filesystem open: */
|
||||
|
||||
const char *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
struct bch_opts opts, struct bch_fs **ret)
|
||||
struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
struct bch_opts opts)
|
||||
{
|
||||
const char *err;
|
||||
struct bch_sb_handle *sb = NULL;
|
||||
struct bch_fs *c = NULL;
|
||||
struct bch_sb_handle *sb;
|
||||
unsigned i, best_sb = 0;
|
||||
const char *err;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
if (!nr_devices)
|
||||
return "need at least one device";
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
return "module unloading";
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
err = "cannot allocate memory";
|
||||
sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
|
||||
if (!sb)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < nr_devices; i++) {
|
||||
err = bch2_read_super(devices[i], opts, &sb[i]);
|
||||
if (err)
|
||||
ret = bch2_read_super(devices[i], &opts, &sb[i]);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
err = bch2_sb_validate(&sb[i]);
|
||||
if (err)
|
||||
goto err;
|
||||
goto err_print;
|
||||
}
|
||||
|
||||
for (i = 1; i < nr_devices; i++)
|
||||
@ -1707,56 +1731,53 @@ const char *bch2_fs_open(char * const *devices, unsigned nr_devices,
|
||||
for (i = 0; i < nr_devices; i++) {
|
||||
err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
|
||||
if (err)
|
||||
goto err;
|
||||
goto err_print;
|
||||
}
|
||||
|
||||
err = "cannot allocate memory";
|
||||
ret = -ENOMEM;
|
||||
c = bch2_fs_alloc(sb[best_sb].sb, opts);
|
||||
if (!c)
|
||||
goto err;
|
||||
|
||||
err = "bch2_dev_online() error";
|
||||
mutex_lock(&c->sb_lock);
|
||||
mutex_lock(&c->state_lock);
|
||||
for (i = 0; i < nr_devices; i++)
|
||||
if (__bch2_dev_online(c, &sb[i])) {
|
||||
mutex_unlock(&c->sb_lock);
|
||||
goto err;
|
||||
mutex_unlock(&c->state_lock);
|
||||
goto err_print;
|
||||
}
|
||||
mutex_unlock(&c->sb_lock);
|
||||
mutex_unlock(&c->state_lock);
|
||||
|
||||
err = "insufficient devices";
|
||||
if (!bch2_fs_may_start(c))
|
||||
goto err;
|
||||
goto err_print;
|
||||
|
||||
if (!c->opts.nostart) {
|
||||
err = __bch2_fs_start(c);
|
||||
if (err)
|
||||
goto err;
|
||||
goto err_print;
|
||||
}
|
||||
|
||||
err = bch2_fs_online(c);
|
||||
if (err)
|
||||
goto err;
|
||||
goto err_print;
|
||||
|
||||
if (ret)
|
||||
*ret = c;
|
||||
else
|
||||
closure_put(&c->cl);
|
||||
|
||||
err = NULL;
|
||||
out:
|
||||
kfree(sb);
|
||||
module_put(THIS_MODULE);
|
||||
if (err)
|
||||
c = NULL;
|
||||
return err;
|
||||
return c;
|
||||
err_print:
|
||||
pr_err("bch_fs_open err opening %s: %s",
|
||||
devices[0], err);
|
||||
ret = -EINVAL;
|
||||
err:
|
||||
if (c)
|
||||
bch2_fs_stop(c);
|
||||
|
||||
for (i = 0; i < nr_devices; i++)
|
||||
bch2_free_super(&sb[i]);
|
||||
goto out;
|
||||
kfree(sb);
|
||||
module_put(THIS_MODULE);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static const char *__bch2_fs_open_incremental(struct bch_sb_handle *sb,
|
||||
@ -1827,9 +1848,8 @@ const char *bch2_fs_open_incremental(const char *path)
|
||||
struct bch_opts opts = bch2_opts_empty();
|
||||
const char *err;
|
||||
|
||||
err = bch2_read_super(path, opts, &sb);
|
||||
if (err)
|
||||
return err;
|
||||
if (bch2_read_super(path, &opts, &sb))
|
||||
return "error reading superblock";
|
||||
|
||||
err = __bch2_fs_open_incremental(&sb, opts);
|
||||
bch2_free_super(&sb);
|
||||
|
@ -198,8 +198,7 @@ const char *bch2_fs_read_write(struct bch_fs *);
|
||||
void bch2_fs_stop(struct bch_fs *);
|
||||
|
||||
const char *bch2_fs_start(struct bch_fs *);
|
||||
const char *bch2_fs_open(char * const *, unsigned, struct bch_opts,
|
||||
struct bch_fs **);
|
||||
struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts);
|
||||
const char *bch2_fs_open_incremental(const char *path);
|
||||
|
||||
#endif /* _BCACHEFS_SUPER_H */
|
||||
|
@ -39,7 +39,8 @@ static int bch2_tiering_thread(void *arg)
|
||||
struct bch_fs *c = container_of(tier, struct bch_fs, tiers[tier->idx]);
|
||||
struct io_clock *clock = &c->io_clock[WRITE];
|
||||
struct bch_dev *ca;
|
||||
u64 tier_capacity, available_sectors, keys_moved, sectors_moved;
|
||||
struct bch_move_stats move_stats;
|
||||
u64 tier_capacity, available_sectors;
|
||||
unsigned long last;
|
||||
unsigned i, nr_devices;
|
||||
|
||||
@ -91,8 +92,7 @@ static int bch2_tiering_thread(void *arg)
|
||||
0,
|
||||
-1,
|
||||
tiering_pred, tier,
|
||||
&keys_moved,
|
||||
§ors_moved);
|
||||
&move_stats);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -64,6 +64,7 @@ struct task_struct *kthread_create(int (*thread_fn)(void *data),
|
||||
vsnprintf(p->comm, sizeof(p->comm), namefmt, args);
|
||||
va_end(args);
|
||||
|
||||
p->flags |= PF_KTHREAD;
|
||||
p->thread_fn = thread_fn;
|
||||
p->thread_data = thread_data;
|
||||
p->state = TASK_UNINTERRUPTIBLE;
|
||||
@ -73,6 +74,7 @@ struct task_struct *kthread_create(int (*thread_fn)(void *data),
|
||||
init_completion(&p->exited);
|
||||
|
||||
pthread_create(&p->thread, NULL, kthread_start_fn, p);
|
||||
pthread_setname_np(p->thread, p->comm);
|
||||
return p;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user