Update bcachefs sources to ff95156479

This commit is contained in:
Kent Overstreet 2017-04-03 23:05:13 -08:00
parent 1f24d50522
commit 64c325ef48
20 changed files with 207 additions and 136 deletions

View File

@ -1 +1 @@
c07073eb3b218df0ea107a3e04d2431703f0c07b 83667254ddf04f558c90f32439e36d7a04ac3a39

View File

@ -121,3 +121,8 @@ update-bcachefs-sources:
echo `cd $(LINUX_DIR); git rev-parse HEAD` > .bcachefs_revision echo `cd $(LINUX_DIR); git rev-parse HEAD` > .bcachefs_revision
cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/ cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/
cp $(LINUX_DIR)/include/trace/events/bcachefs.h include/trace/events/ cp $(LINUX_DIR)/include/trace/events/bcachefs.h include/trace/events/
.PHONE: update-commit-bcachefs-sources
update-commit-bcachefs-sources: update-bcachefs-sources
git commit -m "Update bcachefs sources to `cut -b1-10 .bcachefs_revision`" \
.bcachefs_revision libbcachefs/

View File

@ -256,19 +256,22 @@ static struct nonce prio_nonce(struct prio_set *p)
}}; }};
} }
static int bch2_prio_write(struct bch_dev *ca) int bch2_prio_write(struct bch_dev *ca)
{ {
struct bch_fs *c = ca->fs; struct bch_fs *c = ca->fs;
struct journal *j = &c->journal; struct journal *j = &c->journal;
struct journal_res res = { 0 }; struct journal_res res = { 0 };
bool need_new_journal_entry; bool need_new_journal_entry;
int i, ret; int i, ret = 0;
if (c->opts.nochanges) if (c->opts.nochanges)
return 0; return 0;
mutex_lock(&ca->prio_write_lock);
trace_prio_write_start(ca); trace_prio_write_start(ca);
ca->need_prio_write = false;
atomic64_add(ca->mi.bucket_size * prio_buckets(ca), atomic64_add(ca->mi.bucket_size * prio_buckets(ca),
&ca->meta_sectors_written); &ca->meta_sectors_written);
@ -322,7 +325,7 @@ static int bch2_prio_write(struct bch_dev *ca)
if (bch2_dev_fatal_io_err_on(ret, ca, if (bch2_dev_fatal_io_err_on(ret, ca,
"prio write to bucket %zu", r) || "prio write to bucket %zu", r) ||
bch2_meta_write_fault("prio")) bch2_meta_write_fault("prio"))
return ret; goto err;
} }
spin_lock(&j->lock); spin_lock(&j->lock);
@ -340,7 +343,7 @@ static int bch2_prio_write(struct bch_dev *ca)
ret = bch2_journal_res_get(j, &res, u64s, u64s); ret = bch2_journal_res_get(j, &res, u64s, u64s);
if (ret) if (ret)
return ret; goto err;
need_new_journal_entry = j->buf[res.idx].nr_prio_buckets < need_new_journal_entry = j->buf[res.idx].nr_prio_buckets <
ca->dev_idx + 1; ca->dev_idx + 1;
@ -348,7 +351,7 @@ static int bch2_prio_write(struct bch_dev *ca)
ret = bch2_journal_flush_seq(j, res.seq); ret = bch2_journal_flush_seq(j, res.seq);
if (ret) if (ret)
return ret; goto err;
} while (need_new_journal_entry); } while (need_new_journal_entry);
/* /*
@ -369,7 +372,9 @@ static int bch2_prio_write(struct bch_dev *ca)
spin_unlock(&ca->prio_buckets_lock); spin_unlock(&ca->prio_buckets_lock);
trace_prio_write_end(ca); trace_prio_write_end(ca);
return 0; err:
mutex_unlock(&ca->prio_write_lock);
return ret;
} }
int bch2_prio_read(struct bch_dev *ca) int bch2_prio_read(struct bch_dev *ca)
@ -863,6 +868,7 @@ static int bch2_allocator_thread(void *arg)
{ {
struct bch_dev *ca = arg; struct bch_dev *ca = arg;
struct bch_fs *c = ca->fs; struct bch_fs *c = ca->fs;
long bucket;
int ret; int ret;
set_freezable(); set_freezable();
@ -877,7 +883,7 @@ static int bch2_allocator_thread(void *arg)
*/ */
while (!fifo_empty(&ca->free_inc)) { while (!fifo_empty(&ca->free_inc)) {
long bucket = fifo_peek(&ca->free_inc); bucket = fifo_peek(&ca->free_inc);
/* /*
* Don't remove from free_inc until after it's added * Don't remove from free_inc until after it's added
@ -960,12 +966,8 @@ static int bch2_allocator_thread(void *arg)
* consistent-ish: * consistent-ish:
*/ */
spin_lock(&ca->freelist_lock); spin_lock(&ca->freelist_lock);
while (!fifo_empty(&ca->free_inc)) { while (fifo_pop(&ca->free_inc, bucket))
long bucket;
fifo_pop(&ca->free_inc, bucket);
bch2_mark_free_bucket(ca, ca->buckets + bucket); bch2_mark_free_bucket(ca, ca->buckets + bucket);
}
spin_unlock(&ca->freelist_lock); spin_unlock(&ca->freelist_lock);
goto out; goto out;
} }

View File

@ -24,6 +24,7 @@ void bch2_dev_group_remove(struct dev_group *, struct bch_dev *);
void bch2_dev_group_add(struct dev_group *, struct bch_dev *); void bch2_dev_group_add(struct dev_group *, struct bch_dev *);
int bch2_prio_read(struct bch_dev *); int bch2_prio_read(struct bch_dev *);
int bch2_prio_write(struct bch_dev *);
size_t bch2_bucket_alloc(struct bch_dev *, enum alloc_reserve); size_t bch2_bucket_alloc(struct bch_dev *, enum alloc_reserve);

View File

@ -379,6 +379,8 @@ struct bch_dev {
spinlock_t prio_buckets_lock; spinlock_t prio_buckets_lock;
struct bio *bio_prio; struct bio *bio_prio;
bool prio_read_done; bool prio_read_done;
bool need_prio_write;
struct mutex prio_write_lock;
/* /*
* free: Buckets that are ready to be used * free: Buckets that are ready to be used
@ -456,6 +458,7 @@ enum {
BCH_FS_BDEV_MOUNTED, BCH_FS_BDEV_MOUNTED,
BCH_FS_ERROR, BCH_FS_ERROR,
BCH_FS_FSCK_FIXED_ERRORS, BCH_FS_FSCK_FIXED_ERRORS,
BCH_FS_FIXED_GENS,
}; };
struct btree_debug { struct btree_debug {

View File

@ -617,7 +617,7 @@ struct bch_inode {
__le32 i_flags; __le32 i_flags;
__le16 i_mode; __le16 i_mode;
__u8 fields[0]; __u8 fields[0];
} __attribute__((packed)); } __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(inode, BCH_INODE_FS); BKEY_VAL_TYPE(inode, BCH_INODE_FS);
#define BCH_INODE_FIELDS() \ #define BCH_INODE_FIELDS() \
@ -714,7 +714,7 @@ struct bch_dirent {
__u8 d_type; __u8 d_type;
__u8 d_name[]; __u8 d_name[];
} __attribute__((packed)); } __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(dirent, BCH_DIRENT); BKEY_VAL_TYPE(dirent, BCH_DIRENT);
/* Xattrs */ /* Xattrs */
@ -736,7 +736,7 @@ struct bch_xattr {
__u8 x_name_len; __u8 x_name_len;
__le16 x_val_len; __le16 x_val_len;
__u8 x_name[]; __u8 x_name[];
} __attribute__((packed)); } __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(xattr, BCH_XATTR); BKEY_VAL_TYPE(xattr, BCH_XATTR);
/* Superblock */ /* Superblock */
@ -811,7 +811,7 @@ struct bch_sb_layout {
__u8 nr_superblocks; __u8 nr_superblocks;
__u8 pad[5]; __u8 pad[5];
__u64 sb_offset[61]; __u64 sb_offset[61];
} __attribute__((packed)); } __attribute__((packed, aligned(8)));
#define BCH_SB_LAYOUT_SECTOR 7 #define BCH_SB_LAYOUT_SECTOR 7
@ -1211,7 +1211,7 @@ struct jset {
struct jset_entry start[0]; struct jset_entry start[0];
__u64 _data[0]; __u64 _data[0];
}; };
} __attribute__((packed)); } __attribute__((packed, aligned(8)));
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4); LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
@ -1237,7 +1237,7 @@ struct prio_set {
__le16 write_prio; __le16 write_prio;
__u8 gen; __u8 gen;
} __attribute__((packed)) data[]; } __attribute__((packed)) data[];
} __attribute__((packed)); } __attribute__((packed, aligned(8)));
LE32_BITMASK(PSET_CSUM_TYPE, struct prio_set, flags, 0, 4); LE32_BITMASK(PSET_CSUM_TYPE, struct prio_set, flags, 0, 4);
@ -1295,7 +1295,7 @@ struct bset {
struct bkey_packed start[0]; struct bkey_packed start[0];
__u64 _data[0]; __u64 _data[0];
}; };
} __attribute__((packed)); } __attribute__((packed, aligned(8)));
LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4); LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4);
@ -1325,7 +1325,7 @@ struct btree_node {
}; };
}; };
} __attribute__((packed)); } __attribute__((packed, aligned(8)));
LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4); LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8); LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
@ -1342,7 +1342,7 @@ struct btree_node_entry {
}; };
}; };
} __attribute__((packed)); } __attribute__((packed, aligned(8)));
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -13,7 +13,7 @@ enum bkey_type {
/* Type of a key in btree @id at level @level: */ /* Type of a key in btree @id at level @level: */
static inline enum bkey_type bkey_type(unsigned level, enum btree_id id) static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
{ {
return level ? BKEY_TYPE_BTREE : id; return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
} }
static inline bool btree_type_has_ptrs(enum bkey_type type) static inline bool btree_type_has_ptrs(enum bkey_type type)

View File

@ -129,7 +129,7 @@ static u8 bch2_btree_mark_key(struct bch_fs *c, enum bkey_type type,
int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type, int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k) struct bkey_s_c k)
{ {
int ret; int ret = 0;
switch (k.k->type) { switch (k.k->type) {
case BCH_EXTENT: case BCH_EXTENT:
@ -140,12 +140,17 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
extent_for_each_ptr(e, ptr) { extent_for_each_ptr(e, ptr) {
struct bch_dev *ca = c->devs[ptr->dev]; struct bch_dev *ca = c->devs[ptr->dev];
struct bucket *g = PTR_BUCKET(ca, ptr); struct bucket *g = PTR_BUCKET(ca, ptr);
struct bucket_mark new;
unfixable_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, if (fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
"%s ptr gen in the future: %u > %u", "%s ptr gen in the future: %u > %u",
type == BKEY_TYPE_BTREE type == BKEY_TYPE_BTREE
? "btree" : "data", ? "btree" : "data",
ptr->gen, g->mark.gen); ptr->gen, g->mark.gen)) {
bucket_cmpxchg(g, new, new.gen = ptr->gen);
set_bit(BCH_FS_FIXED_GENS, &c->flags);
ca->need_prio_write = true;
}
} }
break; break;
@ -157,7 +162,6 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
atomic64_read(&c->key_version))); atomic64_read(&c->key_version)));
bch2_btree_mark_key(c, type, k); bch2_btree_mark_key(c, type, k);
return 0;
fsck_err: fsck_err:
return ret; return ret;
} }
@ -382,50 +386,14 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
} }
/** void bch2_gc_start(struct bch_fs *c)
* bch_gc - recompute bucket marks and oldest_gen, rewrite btree nodes
*/
void bch2_gc(struct bch_fs *c)
{ {
struct bch_dev *ca; struct bch_dev *ca;
struct bucket *g; struct bucket *g;
struct bucket_mark new; struct bucket_mark new;
u64 start_time = local_clock();
unsigned i; unsigned i;
int cpu; int cpu;
/*
* Walk _all_ references to buckets, and recompute them:
*
* Order matters here:
* - Concurrent GC relies on the fact that we have a total ordering for
* everything that GC walks - see gc_will_visit_node(),
* gc_will_visit_root()
*
* - also, references move around in the course of index updates and
* various other crap: everything needs to agree on the ordering
* references are allowed to move around in - e.g., we're allowed to
* start with a reference owned by an open_bucket (the allocator) and
* move it to the btree, but not the reverse.
*
* This is necessary to ensure that gc doesn't miss references that
* move around - if references move backwards in the ordering GC
* uses, GC could skip past them
*/
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
return;
trace_gc_start(c);
/*
* Do this before taking gc_lock - bch2_disk_reservation_get() blocks on
* gc_lock if sectors_available goes to 0:
*/
bch2_recalc_sectors_available(c);
down_write(&c->gc_lock);
lg_global_lock(&c->usage_lock); lg_global_lock(&c->usage_lock);
/* /*
@ -466,6 +434,50 @@ void bch2_gc(struct bch_fs *c)
})); }));
ca->oldest_gens[g - ca->buckets] = new.gen; ca->oldest_gens[g - ca->buckets] = new.gen;
} }
}
/**
* bch_gc - recompute bucket marks and oldest_gen, rewrite btree nodes
*/
void bch2_gc(struct bch_fs *c)
{
struct bch_dev *ca;
u64 start_time = local_clock();
unsigned i;
/*
* Walk _all_ references to buckets, and recompute them:
*
* Order matters here:
* - Concurrent GC relies on the fact that we have a total ordering for
* everything that GC walks - see gc_will_visit_node(),
* gc_will_visit_root()
*
* - also, references move around in the course of index updates and
* various other crap: everything needs to agree on the ordering
* references are allowed to move around in - e.g., we're allowed to
* start with a reference owned by an open_bucket (the allocator) and
* move it to the btree, but not the reverse.
*
* This is necessary to ensure that gc doesn't miss references that
* move around - if references move backwards in the ordering GC
* uses, GC could skip past them
*/
if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
return;
trace_gc_start(c);
/*
* Do this before taking gc_lock - bch2_disk_reservation_get() blocks on
* gc_lock if sectors_available goes to 0:
*/
bch2_recalc_sectors_available(c);
down_write(&c->gc_lock);
bch2_gc_start(c);
/* Walk allocator's references: */ /* Walk allocator's references: */
bch2_mark_allocator_buckets(c); bch2_mark_allocator_buckets(c);
@ -964,8 +976,11 @@ err:
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal) int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
{ {
unsigned iter = 0;
enum btree_id id; enum btree_id id;
int ret; int ret;
again:
bch2_gc_start(c);
for (id = 0; id < BTREE_ID_NR; id++) { for (id = 0; id < BTREE_ID_NR; id++) {
ret = bch2_initial_gc_btree(c, id); ret = bch2_initial_gc_btree(c, id);
@ -981,6 +996,17 @@ int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
bch2_mark_metadata(c); bch2_mark_metadata(c);
if (test_bit(BCH_FS_FIXED_GENS, &c->flags)) {
if (iter++ > 2) {
bch_info(c, "Unable to fix bucket gens, looping");
return -EINVAL;
}
bch_info(c, "Fixed gens, restarting initial mark and sweep:");
clear_bit(BCH_FS_FIXED_GENS, &c->flags);
goto again;
}
/* /*
* Skip past versions that might have possibly been used (as nonces), * Skip past versions that might have possibly been used (as nonces),
* but hadn't had their pointers written: * but hadn't had their pointers written:

View File

@ -1305,7 +1305,7 @@ static void btree_node_write_endio(struct bio *bio)
closure_put(cl); closure_put(cl);
} }
if (ca) if (wbio->have_io_ref)
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
} }

View File

@ -74,8 +74,7 @@ static inline void mark_btree_node_intent_locked(struct btree_iter *iter,
mark_btree_node_locked(iter, level, SIX_LOCK_intent); mark_btree_node_locked(iter, level, SIX_LOCK_intent);
} }
static inline enum six_lock_type static inline int btree_lock_want(struct btree_iter *iter, int level)
btree_lock_want(struct btree_iter *iter, int level)
{ {
return level < iter->locks_want return level < iter->locks_want
? SIX_LOCK_intent ? SIX_LOCK_intent

View File

@ -71,12 +71,26 @@ do { \
#define fifo_entry_idx(fifo, p) (((p) - &fifo_peek_front(fifo)) & (fifo)->mask) #define fifo_entry_idx(fifo, p) (((p) - &fifo_peek_front(fifo)) & (fifo)->mask)
#define fifo_push_back(fifo, i) \ #define fifo_push_back_ref(f) \
(fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask])
#define fifo_push_front_ref(f) \
(fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask])
#define fifo_push_back(fifo, new) \
({ \ ({ \
bool _r = !fifo_full((fifo)); \ typeof((fifo)->data) _r = fifo_push_back_ref(fifo); \
if (_r) \ if (_r) \
(fifo)->data[(fifo)->back++ & (fifo)->mask] = (i); \ *_r = (new); \
_r; \ _r != NULL; \
})
#define fifo_push_front(fifo, new) \
({ \
typeof((fifo)->data) _r = fifo_push_front_ref(fifo); \
if (_r) \
*_r = (new); \
_r != NULL; \
}) })
#define fifo_pop_front(fifo, i) \ #define fifo_pop_front(fifo, i) \
@ -87,14 +101,6 @@ do { \
_r; \ _r; \
}) })
#define fifo_push_front(fifo, i) \
({ \
bool _r = !fifo_full((fifo)); \
if (_r) \
(fifo)->data[--(fifo)->front & (fifo)->mask] = (i); \
_r; \
})
#define fifo_pop_back(fifo, i) \ #define fifo_pop_back(fifo, i) \
({ \ ({ \
bool _r = !fifo_empty((fifo)); \ bool _r = !fifo_empty((fifo)); \
@ -103,6 +109,7 @@ do { \
_r; \ _r; \
}) })
#define fifo_push_ref(fifo) fifo_push_back_ref(fifo)
#define fifo_push(fifo, i) fifo_push_back(fifo, (i)) #define fifo_push(fifo, i) fifo_push_back(fifo, (i))
#define fifo_pop(fifo, i) fifo_pop_front(fifo, (i)) #define fifo_pop(fifo, i) fifo_pop_front(fifo, (i))
#define fifo_peek(fifo) fifo_peek_front(fifo) #define fifo_peek(fifo) fifo_peek_front(fifo)

View File

@ -22,7 +22,7 @@ struct bkey_inode_buf {
#define BCH_INODE_FIELD(_name, _bits) + 8 + _bits / 8 #define BCH_INODE_FIELD(_name, _bits) + 8 + _bits / 8
u8 _pad[0 + BCH_INODE_FIELDS()]; u8 _pad[0 + BCH_INODE_FIELDS()];
#undef BCH_INODE_FIELD #undef BCH_INODE_FIELD
} __packed; };
void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);

View File

@ -85,22 +85,6 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
/* Bios with headers */ /* Bios with headers */
static void bch2_submit_wbio(struct bch_fs *c, struct bch_write_bio *wbio,
struct bch_dev *ca, const struct bch_extent_ptr *ptr)
{
wbio->ca = ca;
wbio->submit_time_us = local_clock_us();
wbio->bio.bi_iter.bi_sector = ptr->offset;
wbio->bio.bi_bdev = ca ? ca->disk_sb.bdev : NULL;
if (unlikely(!ca)) {
bcache_io_error(c, &wbio->bio, "device has been removed");
bio_endio(&wbio->bio);
} else {
generic_make_request(&wbio->bio);
}
}
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
const struct bkey_i *k) const struct bkey_i *k)
{ {
@ -116,10 +100,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
extent_for_each_ptr(e, ptr) { extent_for_each_ptr(e, ptr) {
ca = c->devs[ptr->dev]; ca = c->devs[ptr->dev];
if (!percpu_ref_tryget(&ca->io_ref)) {
bch2_submit_wbio(c, wbio, NULL, ptr);
break;
}
if (ptr + 1 < &extent_entry_last(e)->ptr) { if (ptr + 1 < &extent_entry_last(e)->ptr) {
n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO, n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO,
@ -132,6 +112,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
n->bounce = false; n->bounce = false;
n->split = true; n->split = true;
n->put_bio = true; n->put_bio = true;
n->have_io_ref = true;
n->bio.bi_opf = wbio->bio.bi_opf; n->bio.bi_opf = wbio->bio.bi_opf;
__bio_inc_remaining(n->orig); __bio_inc_remaining(n->orig);
} else { } else {
@ -141,7 +122,18 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
if (!journal_flushes_device(ca)) if (!journal_flushes_device(ca))
n->bio.bi_opf |= REQ_FUA; n->bio.bi_opf |= REQ_FUA;
bch2_submit_wbio(c, n, ca, ptr); n->ca = ca;
n->submit_time_us = local_clock_us();
n->bio.bi_iter.bi_sector = ptr->offset;
if (likely(percpu_ref_tryget(&ca->io_ref))) {
n->bio.bi_bdev = ca->disk_sb.bdev;
generic_make_request(&n->bio);
} else {
n->have_io_ref = false;
bcache_io_error(c, &n->bio, "device has been removed");
bio_endio(&n->bio);
}
} }
} }
@ -327,7 +319,7 @@ static void bch2_write_endio(struct bio *bio)
set_closure_fn(cl, bch2_write_io_error, index_update_wq(op)); set_closure_fn(cl, bch2_write_io_error, index_update_wq(op));
} }
if (ca) if (wbio->have_io_ref)
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
if (bio->bi_error && orig) if (bio->bi_error && orig)

View File

@ -73,7 +73,8 @@ struct bch_write_bio {
unsigned submit_time_us; unsigned submit_time_us;
unsigned split:1, unsigned split:1,
bounce:1, bounce:1,
put_bio:1; put_bio:1,
have_io_ref:1;
/* Only for btree writes: */ /* Only for btree writes: */
unsigned used_mempool:1; unsigned used_mempool:1;

View File

@ -1122,21 +1122,31 @@ void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
#endif #endif
} }
static void __bch2_journal_next_entry(struct journal *j) static struct journal_entry_pin_list *
__journal_entry_new(struct journal *j, int count)
{ {
struct journal_entry_pin_list pin_list, *p; struct journal_entry_pin_list *p = fifo_push_ref(&j->pin);
struct journal_buf *buf;
/* /*
* The fifo_push() needs to happen at the same time as j->seq is * The fifo_push() needs to happen at the same time as j->seq is
* incremented for last_seq() to be calculated correctly * incremented for last_seq() to be calculated correctly
*/ */
atomic64_inc(&j->seq); atomic64_inc(&j->seq);
BUG_ON(!fifo_push(&j->pin, pin_list));
p = &fifo_peek_back(&j->pin); BUG_ON(journal_pin_seq(j, p) != atomic64_read(&j->seq));
INIT_LIST_HEAD(&p->list); INIT_LIST_HEAD(&p->list);
atomic_set(&p->count, 1); atomic_set(&p->count, count);
return p;
}
static void __bch2_journal_next_entry(struct journal *j)
{
struct journal_entry_pin_list *p;
struct journal_buf *buf;
p = __journal_entry_new(j, 1);
if (test_bit(JOURNAL_REPLAY_DONE, &j->flags)) { if (test_bit(JOURNAL_REPLAY_DONE, &j->flags)) {
smp_wmb(); smp_wmb();
@ -1149,8 +1159,6 @@ static void __bch2_journal_next_entry(struct journal *j)
memset(buf->data, 0, sizeof(*buf->data)); memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(atomic64_read(&j->seq)); buf->data->seq = cpu_to_le64(atomic64_read(&j->seq));
buf->data->u64s = 0; buf->data->u64s = 0;
BUG_ON(journal_pin_seq(j, p) != atomic64_read(&j->seq));
} }
static inline size_t journal_entry_u64s_reserve(struct journal_buf *buf) static inline size_t journal_entry_u64s_reserve(struct journal_buf *buf)
@ -1423,16 +1431,8 @@ void bch2_journal_start(struct bch_fs *c)
set_bit(JOURNAL_STARTED, &j->flags); set_bit(JOURNAL_STARTED, &j->flags);
while (atomic64_read(&j->seq) < new_seq) { while (atomic64_read(&j->seq) < new_seq)
struct journal_entry_pin_list pin_list, *p; __journal_entry_new(j, 0);
BUG_ON(!fifo_push(&j->pin, pin_list));
p = &fifo_peek_back(&j->pin);
INIT_LIST_HEAD(&p->list);
atomic_set(&p->count, 0);
atomic64_inc(&j->seq);
}
/* /*
* journal_buf_switch() only inits the next journal entry when it * journal_buf_switch() only inits the next journal entry when it
@ -1494,8 +1494,11 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
BTREE_INSERT_JOURNAL_REPLAY); BTREE_INSERT_JOURNAL_REPLAY);
bch2_disk_reservation_put(c, &disk_res); bch2_disk_reservation_put(c, &disk_res);
if (ret) if (ret) {
bch_err(c, "journal replay: error %d while replaying key",
ret);
goto err; goto err;
}
cond_resched(); cond_resched();
keys++; keys++;
@ -1517,8 +1520,10 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
* entry on disk, if we crash before writing the next journal entry: * entry on disk, if we crash before writing the next journal entry:
*/ */
ret = bch2_journal_meta(&c->journal); ret = bch2_journal_meta(&c->journal);
if (ret) if (ret) {
bch_err(c, "journal replay: error %d flushing journal", ret);
goto err; goto err;
}
} }
bch_info(c, "journal replay done, %i keys in %i entries, seq %llu", bch_info(c, "journal replay done, %i keys in %i entries, seq %llu",
@ -1526,11 +1531,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
bch2_journal_set_replay_done(&c->journal); bch2_journal_set_replay_done(&c->journal);
err: err:
if (ret)
bch_err(c, "journal replay error: %d", ret);
bch2_journal_entries_free(list); bch2_journal_entries_free(list);
return ret; return ret;
} }
@ -2372,7 +2373,7 @@ retry:
switch (journal_buf_switch(j, false)) { switch (journal_buf_switch(j, false)) {
case JOURNAL_ENTRY_ERROR: case JOURNAL_ENTRY_ERROR:
spin_unlock(&j->lock); spin_unlock(&j->lock);
return -EIO; return -EROFS;
case JOURNAL_ENTRY_INUSE: case JOURNAL_ENTRY_INUSE:
/* haven't finished writing out the previous one: */ /* haven't finished writing out the previous one: */
spin_unlock(&j->lock); spin_unlock(&j->lock);

View File

@ -72,7 +72,7 @@ const struct bch_option bch2_opt_table[] = {
#undef BCH_OPT #undef BCH_OPT
}; };
static enum bch_opt_id bch2_opt_lookup(const char *name) static int bch2_opt_lookup(const char *name)
{ {
const struct bch_option *i; const struct bch_option *i;
@ -209,7 +209,7 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
enum bch_opt_id bch2_parse_sysfs_opt(const char *name, const char *val, enum bch_opt_id bch2_parse_sysfs_opt(const char *name, const char *val,
u64 *res) u64 *res)
{ {
enum bch_opt_id id = bch2_opt_lookup(name); int id = bch2_opt_lookup(name);
int ret; int ret;
if (id < 0) if (id < 0)
@ -225,7 +225,7 @@ enum bch_opt_id bch2_parse_sysfs_opt(const char *name, const char *val,
ssize_t bch2_opt_show(struct bch_opts *opts, const char *name, ssize_t bch2_opt_show(struct bch_opts *opts, const char *name,
char *buf, size_t size) char *buf, size_t size)
{ {
enum bch_opt_id id = bch2_opt_lookup(name); int id = bch2_opt_lookup(name);
const struct bch_option *opt; const struct bch_option *opt;
u64 v; u64 v;

View File

@ -25,7 +25,7 @@ bch2_hash_info_init(struct bch_fs *c,
/* XXX ick */ /* XXX ick */
struct bch_hash_info info = { struct bch_hash_info info = {
.type = (bi->i_flags >> INODE_STR_HASH_OFFSET) & .type = (bi->i_flags >> INODE_STR_HASH_OFFSET) &
~(~0 << INODE_STR_HASH_BITS) ~(~0U << INODE_STR_HASH_BITS)
}; };
switch (info.type) { switch (info.type) {

View File

@ -768,6 +768,15 @@ static const char *__bch2_fs_start(struct bch_fs *c)
if (ret) if (ret)
goto err; goto err;
for_each_rw_member(ca, c, i)
if (ca->need_prio_write) {
ret = bch2_prio_write(ca);
if (ret) {
percpu_ref_put(&ca->io_ref);
goto err;
}
}
bch_verbose(c, "fsck done"); bch_verbose(c, "fsck done");
} else { } else {
struct bch_inode_unpacked inode; struct bch_inode_unpacked inode;
@ -1092,6 +1101,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
spin_lock_init(&ca->freelist_lock); spin_lock_init(&ca->freelist_lock);
spin_lock_init(&ca->prio_buckets_lock); spin_lock_init(&ca->prio_buckets_lock);
mutex_init(&ca->heap_lock); mutex_init(&ca->heap_lock);
mutex_init(&ca->prio_write_lock);
bch2_dev_moving_gc_init(ca); bch2_dev_moving_gc_init(ca);
INIT_WORK(&ca->io_error_work, bch2_nonfatal_io_error_work); INIT_WORK(&ca->io_error_work, bch2_nonfatal_io_error_work);
@ -1265,6 +1275,15 @@ bool bch2_fs_may_start(struct bch_fs *c, int flags)
return true; return true;
} }
/*
* Note: this function is also used by the error paths - when a particular
* device sees an error, we call it to determine whether we can just set the
* device RO, or - if this function returns false - we'll set the whole
* filesystem RO:
*
* XXX: maybe we should be more explicit about whether we're changing state
* because we got an error or what have you?
*/
bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags) enum bch_member_state new_state, int flags)
{ {
@ -1273,6 +1292,16 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
if (new_state == BCH_MEMBER_STATE_RW) if (new_state == BCH_MEMBER_STATE_RW)
return true; return true;
if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
return true;
/*
* If the device is already offline - whatever is going on with it can't
* possible make the FS need to go RO:
*/
if (!bch2_dev_is_online(ca))
return true;
if (ca->mi.has_data && if (ca->mi.has_data &&
!(flags & BCH_FORCE_IF_DATA_DEGRADED)) !(flags & BCH_FORCE_IF_DATA_DEGRADED))
return false; return false;

View File

@ -32,6 +32,11 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter)
return ca; return ca;
} }
static inline bool bch2_dev_is_online(struct bch_dev *ca)
{
return !percpu_ref_is_zero(&ca->io_ref);
}
#define __for_each_member_device(ca, c, iter) \ #define __for_each_member_device(ca, c, iter) \
for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter))); (iter)++) for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter))); (iter)++)

View File

@ -85,7 +85,7 @@ ssize_t bch2_hprint(char *buf, s64 v)
int u, t = 0; int u, t = 0;
for (u = 0; v >= 1024 || v <= -1024; u++) { for (u = 0; v >= 1024 || v <= -1024; u++) {
t = v & ~(~0 << 10); t = v & ~(~0U << 10);
v >>= 10; v >>= 10;
} }