Update bcachefs sources to b1a4dc53be bcachefs: Set lost+found mode to 0700

This commit is contained in:
Kent Overstreet 2019-11-09 22:49:03 -05:00
parent f597e81aab
commit 1f7098c222
21 changed files with 426 additions and 392 deletions

View File

@ -1 +1 @@
9e76e8d98c52c128641b0f916a1990a37d60d22e
b1a4dc53be10a4c3132fccaaf604d73861a52d2d

View File

@ -1058,26 +1058,20 @@ int __bch2_bkey_cmp_packed(const struct bkey_packed *l,
const struct bkey_packed *r,
const struct btree *b)
{
int packed = bkey_lr_packed(l, r);
struct bkey unpacked;
if (likely(packed == BKEY_PACKED_BOTH))
if (likely(bkey_packed(l) && bkey_packed(r)))
return __bch2_bkey_cmp_packed_format_checked(l, r, b);
switch (packed) {
case BKEY_PACKED_NONE:
return bkey_cmp(((struct bkey *) l)->p,
((struct bkey *) r)->p);
case BKEY_PACKED_LEFT:
return __bch2_bkey_cmp_left_packed_format_checked(b,
(struct bkey_packed *) l,
&((struct bkey *) r)->p);
case BKEY_PACKED_RIGHT:
return -__bch2_bkey_cmp_left_packed_format_checked(b,
(struct bkey_packed *) r,
&((struct bkey *) l)->p);
default:
unreachable();
if (bkey_packed(l)) {
__bkey_unpack_key_format_checked(b, &unpacked, l);
l = (void*) &unpacked;
} else if (bkey_packed(r)) {
__bkey_unpack_key_format_checked(b, &unpacked, r);
r = (void*) &unpacked;
}
return bkey_cmp(((struct bkey *) l)->p, ((struct bkey *) r)->p);
}
__pure __flatten

View File

@ -418,7 +418,7 @@ bch2_sort_repack_merge(struct bch_fs *c,
struct bkey_packed *prev = NULL, *k_packed;
struct bkey_s k;
struct btree_nr_keys nr;
BKEY_PADDED(k) tmp;
struct bkey unpacked;
memset(&nr, 0, sizeof(nr));
@ -426,11 +426,7 @@ bch2_sort_repack_merge(struct bch_fs *c,
if (filter_whiteouts && bkey_whiteout(k_packed))
continue;
EBUG_ON(bkeyp_val_u64s(&src->format, k_packed) >
BKEY_EXTENT_VAL_U64s_MAX);
bch2_bkey_unpack(src, &tmp.k, k_packed);
k = bkey_i_to_s(&tmp.k);
k = __bkey_disassemble(src, k_packed, &unpacked);
if (filter_whiteouts &&
bch2_bkey_normalize(c, k))

View File

@ -294,38 +294,23 @@ static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
/* Auxiliary search trees */
#define BFLOAT_FAILED_UNPACKED (U8_MAX - 0)
#define BFLOAT_FAILED_PREV (U8_MAX - 1)
#define BFLOAT_FAILED_OVERFLOW (U8_MAX - 2)
#define BFLOAT_FAILED (U8_MAX - 2)
#define KEY_WORDS BITS_TO_LONGS(1 << BKEY_EXPONENT_BITS)
#define BFLOAT_FAILED_UNPACKED U8_MAX
#define BFLOAT_FAILED U8_MAX
struct bkey_float {
u8 exponent;
u8 key_offset;
union {
u32 mantissa32;
struct {
u16 mantissa16;
u16 _pad;
};
};
} __packed;
#define BFLOAT_32BIT_NR 32U
u16 mantissa;
};
#define BKEY_MANTISSA_BITS 16
static unsigned bkey_float_byte_offset(unsigned idx)
{
int d = (idx - BFLOAT_32BIT_NR) << 1;
d &= ~(d >> 31);
return idx * 6 - d;
return idx * sizeof(struct bkey_float);
}
struct ro_aux_tree {
struct bkey_float _d[0];
struct bkey_float f[0];
};
struct rw_aux_tree {
@ -380,8 +365,8 @@ static unsigned bset_aux_tree_buf_end(const struct bset_tree *t)
return t->aux_data_offset;
case BSET_RO_AUX_TREE:
return t->aux_data_offset +
DIV_ROUND_UP(bkey_float_byte_offset(t->size) +
sizeof(u8) * t->size, 8);
DIV_ROUND_UP(t->size * sizeof(struct bkey_float) +
t->size * sizeof(u8), 8);
case BSET_RW_AUX_TREE:
return t->aux_data_offset +
DIV_ROUND_UP(sizeof(struct rw_aux_tree) * t->size, 8);
@ -420,17 +405,11 @@ static u8 *ro_aux_tree_prev(const struct btree *b,
return __aux_tree_base(b, t) + bkey_float_byte_offset(t->size);
}
static struct bkey_float *bkey_float_get(struct ro_aux_tree *b,
unsigned idx)
{
return (void *) b + bkey_float_byte_offset(idx);
}
static struct bkey_float *bkey_float(const struct btree *b,
const struct bset_tree *t,
unsigned idx)
{
return bkey_float_get(ro_aux_tree_base(b, t), idx);
return ro_aux_tree_base(b, t)->f + idx;
}
static void bset_aux_tree_verify(struct btree *b)
@ -669,21 +648,6 @@ static unsigned rw_aux_tree_bsearch(struct btree *b,
return idx;
}
static inline unsigned bfloat_mantissa(const struct bkey_float *f,
unsigned idx)
{
return idx < BFLOAT_32BIT_NR ? f->mantissa32 : f->mantissa16;
}
static inline void bfloat_mantissa_set(struct bkey_float *f,
unsigned idx, unsigned mantissa)
{
if (idx < BFLOAT_32BIT_NR)
f->mantissa32 = mantissa;
else
f->mantissa16 = mantissa;
}
static inline unsigned bkey_mantissa(const struct bkey_packed *k,
const struct bkey_float *f,
unsigned idx)
@ -703,9 +667,9 @@ static inline unsigned bkey_mantissa(const struct bkey_packed *k,
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
v >>= f->exponent & 7;
#else
v >>= 64 - (f->exponent & 7) - (idx < BFLOAT_32BIT_NR ? 32 : 16);
v >>= 64 - (f->exponent & 7) - BKEY_MANTISSA_BITS;
#endif
return idx < BFLOAT_32BIT_NR ? (u32) v : (u16) v;
return (u16) v;
}
static void make_bfloat(struct btree *b, struct bset_tree *t,
@ -715,14 +679,10 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
{
struct bkey_float *f = bkey_float(b, t, j);
struct bkey_packed *m = tree_to_bkey(b, t, j);
struct bkey_packed *p = tree_to_prev_bkey(b, t, j);
struct bkey_packed *l, *r;
unsigned bits = j < BFLOAT_32BIT_NR ? 32 : 16;
unsigned mantissa;
int shift, exponent, high_bit;
EBUG_ON(bkey_next(p) != m);
if (is_power_of_2(j)) {
l = min_key;
@ -764,8 +724,7 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
* the original key.
*/
if (!bkey_packed(l) || !bkey_packed(r) ||
!bkey_packed(p) || !bkey_packed(m) ||
if (!bkey_packed(l) || !bkey_packed(r) || !bkey_packed(m) ||
!b->nr_key_bits) {
f->exponent = BFLOAT_FAILED_UNPACKED;
return;
@ -782,8 +741,8 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
* of the key: we handle this later:
*/
high_bit = max(bch2_bkey_greatest_differing_bit(b, l, r),
min_t(unsigned, bits, b->nr_key_bits) - 1);
exponent = high_bit - (bits - 1);
min_t(unsigned, BKEY_MANTISSA_BITS, b->nr_key_bits) - 1);
exponent = high_bit - (BKEY_MANTISSA_BITS - 1);
/*
* Then we calculate the actual shift value, from the start of the key
@ -792,12 +751,12 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
shift = (int) (b->format.key_u64s * 64 - b->nr_key_bits) + exponent;
EBUG_ON(shift + bits > b->format.key_u64s * 64);
EBUG_ON(shift + BKEY_MANTISSA_BITS > b->format.key_u64s * 64);
#else
shift = high_bit_offset +
b->nr_key_bits -
exponent -
bits;
BKEY_MANTISSA_BITS;
EBUG_ON(shift < KEY_PACKED_BITS_START);
#endif
@ -813,37 +772,7 @@ static void make_bfloat(struct btree *b, struct bset_tree *t,
if (exponent < 0)
mantissa |= ~(~0U << -exponent);
bfloat_mantissa_set(f, j, mantissa);
/*
* The bfloat must be able to tell its key apart from the previous key -
* if its key and the previous key don't differ in the required bits,
* flag as failed - unless the keys are actually equal, in which case
* we aren't required to return a specific one:
*/
if (exponent > 0 &&
bfloat_mantissa(f, j) == bkey_mantissa(p, f, j) &&
bkey_cmp_packed(b, p, m)) {
f->exponent = BFLOAT_FAILED_PREV;
return;
}
/*
* f->mantissa must compare >= the original key - for transitivity with
* the comparison in bset_search_tree. If we're dropping set bits,
* increment it:
*/
if (exponent > (int) bch2_bkey_ffs(b, m)) {
if (j < BFLOAT_32BIT_NR
? f->mantissa32 == U32_MAX
: f->mantissa16 == U16_MAX)
f->exponent = BFLOAT_FAILED_OVERFLOW;
if (j < BFLOAT_32BIT_NR)
f->mantissa32++;
else
f->mantissa16++;
}
f->mantissa = mantissa;
}
/* bytes remaining - only valid for last bset: */
@ -856,14 +785,8 @@ static unsigned __bset_tree_capacity(struct btree *b, struct bset_tree *t)
static unsigned bset_ro_tree_capacity(struct btree *b, struct bset_tree *t)
{
unsigned bytes = __bset_tree_capacity(b, t);
if (bytes < 7 * BFLOAT_32BIT_NR)
return bytes / 7;
bytes -= 7 * BFLOAT_32BIT_NR;
return BFLOAT_32BIT_NR + bytes / 5;
return __bset_tree_capacity(b, t) /
(sizeof(struct bkey_float) + sizeof(u8));
}
static unsigned bset_rw_tree_capacity(struct btree *b, struct bset_tree *t)
@ -1333,14 +1256,38 @@ static struct bkey_packed *bset_search_write_set(const struct btree *b,
return rw_aux_to_bkey(b, t, l);
}
noinline
static int bset_search_tree_slowpath(const struct btree *b,
struct bset_tree *t, struct bpos *search,
const struct bkey_packed *packed_search,
unsigned n)
static inline void prefetch_four_cachelines(void *p)
{
return bkey_cmp_p_or_unp(b, tree_to_bkey(b, t, n),
packed_search, search) < 0;
#ifdef CONFIG_X86_64
asm(".intel_syntax noprefix;"
"prefetcht0 [%0 - 127 + 64 * 0];"
"prefetcht0 [%0 - 127 + 64 * 1];"
"prefetcht0 [%0 - 127 + 64 * 2];"
"prefetcht0 [%0 - 127 + 64 * 3];"
".att_syntax prefix;"
:
: "r" (p + 127));
#else
prefetch(p + L1_CACHE_BYTES * 0);
prefetch(p + L1_CACHE_BYTES * 1);
prefetch(p + L1_CACHE_BYTES * 2);
prefetch(p + L1_CACHE_BYTES * 3);
#endif
}
static inline bool bkey_mantissa_bits_dropped(const struct btree *b,
const struct bkey_float *f,
unsigned idx)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
unsigned key_bits_start = b->format.key_u64s * 64 - b->nr_key_bits;
return f->exponent > key_bits_start;
#else
unsigned key_bits_end = high_bit_offset + b->nr_key_bits;
return f->exponent + BKEY_MANTISSA_BITS < key_bits_end;
#endif
}
__flatten
@ -1350,44 +1297,37 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
const struct bkey_packed *packed_search)
{
struct ro_aux_tree *base = ro_aux_tree_base(b, t);
struct bkey_float *f = bkey_float_get(base, 1);
void *p;
unsigned inorder, n = 1;
struct bkey_float *f;
struct bkey_packed *k;
unsigned inorder, n = 1, l, r;
int cmp;
while (1) {
if (likely(n << 4 < t->size)) {
p = bkey_float_get(base, n << 4);
prefetch(p);
} else if (n << 3 < t->size) {
inorder = __eytzinger1_to_inorder(n, t->size, t->extra);
p = bset_cacheline(b, t, inorder);
#ifdef CONFIG_X86_64
asm(".intel_syntax noprefix;"
"prefetcht0 [%0 - 127 + 64 * 0];"
"prefetcht0 [%0 - 127 + 64 * 1];"
"prefetcht0 [%0 - 127 + 64 * 2];"
"prefetcht0 [%0 - 127 + 64 * 3];"
".att_syntax prefix;"
:
: "r" (p + 127));
#else
prefetch(p + L1_CACHE_BYTES * 0);
prefetch(p + L1_CACHE_BYTES * 1);
prefetch(p + L1_CACHE_BYTES * 2);
prefetch(p + L1_CACHE_BYTES * 3);
#endif
} else if (n >= t->size)
break;
do {
if (likely(n << 4 < t->size))
prefetch(&base->f[n << 4]);
f = bkey_float_get(base, n);
f = &base->f[n];
if (packed_search &&
likely(f->exponent < BFLOAT_FAILED))
n = n * 2 + (bfloat_mantissa(f, n) <
bkey_mantissa(packed_search, f, n));
else
n = n * 2 + bset_search_tree_slowpath(b, t,
search, packed_search, n);
if (!unlikely(packed_search))
goto slowpath;
if (unlikely(f->exponent >= BFLOAT_FAILED))
goto slowpath;
l = f->mantissa;
r = bkey_mantissa(packed_search, f, n);
if (unlikely(l == r) && bkey_mantissa_bits_dropped(b, f, n))
goto slowpath;
n = n * 2 + (l < r);
continue;
slowpath:
k = tree_to_bkey(b, t, n);
cmp = bkey_cmp_p_or_unp(b, k, packed_search, search);
if (!cmp)
return k;
n = n * 2 + (cmp < 0);
} while (n < t->size);
inorder = __eytzinger1_to_inorder(n >> 1, t->size, t->extra);
@ -1396,29 +1336,23 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
* n would have been the node we recursed to - the low bit tells us if
* we recursed left or recursed right.
*/
if (n & 1) {
return cacheline_to_bkey(b, t, inorder, f->key_offset);
} else {
if (--inorder) {
n = eytzinger1_prev(n >> 1, t->size);
f = bkey_float_get(base, n);
return cacheline_to_bkey(b, t, inorder, f->key_offset);
} else
if (likely(!(n & 1))) {
--inorder;
if (unlikely(!inorder))
return btree_bkey_first(b, t);
f = &base->f[eytzinger1_prev(n >> 1, t->size)];
}
return cacheline_to_bkey(b, t, inorder, f->key_offset);
}
/*
* Returns the first key greater than or equal to @search
*/
__always_inline __flatten
static struct bkey_packed *bch2_bset_search(struct btree *b,
static __always_inline __flatten
struct bkey_packed *__bch2_bset_search(struct btree *b,
struct bset_tree *t,
struct bpos *search,
struct bkey_packed *packed_search,
const struct bkey_packed *lossy_packed_search)
{
struct bkey_packed *m;
/*
* First, we search for a cacheline, then lastly we do a linear search
@ -1437,11 +1371,9 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
switch (bset_aux_tree_type(t)) {
case BSET_NO_AUX_TREE:
m = btree_bkey_first(b, t);
break;
return btree_bkey_first(b, t);
case BSET_RW_AUX_TREE:
m = bset_search_write_set(b, t, search, lossy_packed_search);
break;
return bset_search_write_set(b, t, search, lossy_packed_search);
case BSET_RO_AUX_TREE:
/*
* Each node in the auxiliary search tree covers a certain range
@ -1453,10 +1385,20 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
if (bkey_cmp(*search, t->max_key) > 0)
return btree_bkey_last(b, t);
m = bset_search_tree(b, t, search, lossy_packed_search);
break;
return bset_search_tree(b, t, search, lossy_packed_search);
default:
unreachable();
}
}
static __always_inline __flatten
struct bkey_packed *bch2_bset_search_linear(struct btree *b,
struct bset_tree *t,
struct bpos *search,
struct bkey_packed *packed_search,
const struct bkey_packed *lossy_packed_search,
struct bkey_packed *m)
{
if (lossy_packed_search)
while (m != btree_bkey_last(b, t) &&
bkey_iter_cmp_p_or_unp(b, search, lossy_packed_search,
@ -1479,6 +1421,23 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
return m;
}
/*
* Returns the first key greater than or equal to @search
*/
static __always_inline __flatten
struct bkey_packed *bch2_bset_search(struct btree *b,
struct bset_tree *t,
struct bpos *search,
struct bkey_packed *packed_search,
const struct bkey_packed *lossy_packed_search)
{
struct bkey_packed *m = __bch2_bset_search(b, t, search,
lossy_packed_search);
return bch2_bset_search_linear(b, t, search,
packed_search, lossy_packed_search, m);
}
/* Btree node iterator */
static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
@ -1569,9 +1528,10 @@ __flatten
void bch2_btree_node_iter_init(struct btree_node_iter *iter,
struct btree *b, struct bpos *search)
{
struct bset_tree *t;
struct bkey_packed p, *packed_search = NULL;
struct btree_node_iter_set *pos = iter->data;
struct bkey_packed *k[MAX_BSETS];
unsigned i;
EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0);
bset_aux_tree_verify(b);
@ -1590,14 +1550,20 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
return;
}
for_each_bset(b, t) {
struct bkey_packed *k = bch2_bset_search(b, t, search,
packed_search, &p);
for (i = 0; i < b->nsets; i++) {
k[i] = __bch2_bset_search(b, b->set + i, search, &p);
prefetch_four_cachelines(k[i]);
}
for (i = 0; i < b->nsets; i++) {
struct bset_tree *t = b->set + i;
struct bkey_packed *end = btree_bkey_last(b, t);
if (k != end)
k[i] = bch2_bset_search_linear(b, t, search,
packed_search, &p, k[i]);
if (k[i] != end)
*pos++ = (struct btree_node_iter_set) {
__btree_node_key_to_offset(b, k),
__btree_node_key_to_offset(b, k[i]),
__btree_node_key_to_offset(b, end)
};
}
@ -1794,17 +1760,9 @@ void bch2_btree_keys_stats(struct btree *b, struct bset_stats *stats)
stats->floats += t->size - 1;
for (j = 1; j < t->size; j++)
switch (bkey_float(b, t, j)->exponent) {
case BFLOAT_FAILED_UNPACKED:
stats->failed_unpacked++;
break;
case BFLOAT_FAILED_PREV:
stats->failed_prev++;
break;
case BFLOAT_FAILED_OVERFLOW:
stats->failed_overflow++;
break;
}
stats->failed +=
bkey_float(b, t, j)->exponent ==
BFLOAT_FAILED;
}
}
}
@ -1813,9 +1771,7 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
struct bkey_packed *k)
{
struct bset_tree *t = bch2_bkey_to_bset(b, k);
struct bkey_packed *l, *r, *p;
struct bkey uk, up;
char buf1[200], buf2[200];
struct bkey uk;
unsigned j, inorder;
if (out->pos != out->end)
@ -1833,7 +1789,7 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
return;
switch (bkey_float(b, t, j)->exponent) {
case BFLOAT_FAILED_UNPACKED:
case BFLOAT_FAILED:
uk = bkey_unpack_key(b, k);
pr_buf(out,
" failed unpacked at depth %u\n"
@ -1841,41 +1797,5 @@ void bch2_bfloat_to_text(struct printbuf *out, struct btree *b,
ilog2(j),
uk.p.inode, uk.p.offset);
break;
case BFLOAT_FAILED_PREV:
p = tree_to_prev_bkey(b, t, j);
l = is_power_of_2(j)
? btree_bkey_first(b, t)
: tree_to_prev_bkey(b, t, j >> ffs(j));
r = is_power_of_2(j + 1)
? bch2_bkey_prev_all(b, t, btree_bkey_last(b, t))
: tree_to_bkey(b, t, j >> (ffz(j) + 1));
up = bkey_unpack_key(b, p);
uk = bkey_unpack_key(b, k);
bch2_to_binary(buf1, high_word(&b->format, p), b->nr_key_bits);
bch2_to_binary(buf2, high_word(&b->format, k), b->nr_key_bits);
pr_buf(out,
" failed prev at depth %u\n"
"\tkey starts at bit %u but first differing bit at %u\n"
"\t%llu:%llu\n"
"\t%llu:%llu\n"
"\t%s\n"
"\t%s\n",
ilog2(j),
bch2_bkey_greatest_differing_bit(b, l, r),
bch2_bkey_greatest_differing_bit(b, p, k),
uk.p.inode, uk.p.offset,
up.p.inode, up.p.offset,
buf1, buf2);
break;
case BFLOAT_FAILED_OVERFLOW:
uk = bkey_unpack_key(b, k);
pr_buf(out,
" failed overflow at depth %u\n"
"\t%llu:%llu\n",
ilog2(j),
uk.p.inode, uk.p.offset);
break;
}
}

View File

@ -582,9 +582,7 @@ struct bset_stats {
} sets[BSET_TREE_NR_TYPES];
size_t floats;
size_t failed_unpacked;
size_t failed_prev;
size_t failed_overflow;
size_t failed;
};
void bch2_btree_keys_stats(struct btree *, struct bset_stats *);

View File

@ -909,9 +909,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
" nr packed keys %u\n"
" nr unpacked keys %u\n"
" floats %zu\n"
" failed unpacked %zu\n"
" failed prev %zu\n"
" failed overflow %zu\n",
" failed unpacked %zu\n",
f->key_u64s,
f->bits_per_field[0],
f->bits_per_field[1],
@ -928,7 +926,5 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
b->nr.packed_keys,
b->nr.unpacked_keys,
stats.floats,
stats.failed_unpacked,
stats.failed_prev,
stats.failed_overflow);
stats.failed);
}

View File

@ -1096,7 +1096,12 @@ static int btree_iter_traverse_one(struct btree_iter *iter)
if (unlikely(iter->level >= BTREE_MAX_DEPTH))
return 0;
if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
/*
* if we need interior nodes locked, call btree_iter_relock() to make
* sure we walk back up enough that we lock them:
*/
if (iter->uptodate == BTREE_ITER_NEED_RELOCK ||
iter->locks_want > 1)
bch2_btree_iter_relock(iter, false);
if (iter->uptodate < BTREE_ITER_NEED_RELOCK)

View File

@ -1464,7 +1464,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
struct bkey_s_c k;
struct bkey_alloc_unpacked u;
struct bkey_i_alloc *a;
unsigned old;
u16 *dst_sectors;
bool overflow;
int ret;
@ -1519,22 +1519,24 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
goto out;
}
if (!p.ptr.cached) {
old = u.dirty_sectors;
overflow = checked_add(u.dirty_sectors, sectors);
} else {
old = u.cached_sectors;
overflow = checked_add(u.cached_sectors, sectors);
dst_sectors = !p.ptr.cached
? &u.dirty_sectors
: &u.cached_sectors;
overflow = checked_add(*dst_sectors, sectors);
if (overflow) {
bch2_fs_inconsistent(c,
"bucket sector count overflow: %u + %lli > U16_MAX",
*dst_sectors, sectors);
/* return an error indicating that we need full fsck */
ret = -EIO;
goto out;
}
u.data_type = u.dirty_sectors || u.cached_sectors
? data_type : 0;
bch2_fs_inconsistent_on(overflow, c,
"bucket sector count overflow: %u + %lli > U16_MAX",
old, sectors);
BUG_ON(overflow);
a = trans_update_key(trans, iter, BKEY_ALLOC_U64s_MAX);
ret = PTR_ERR_OR_ZERO(a);
if (ret)

View File

@ -135,17 +135,16 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
return ret;
}
void bch2_increment_clock(struct bch_fs *c, unsigned sectors, int rw)
void __bch2_increment_clock(struct io_clock *clock)
{
struct io_clock *clock = &c->io_clock[rw];
struct io_timer *timer;
unsigned long now;
unsigned sectors;
/* Buffer up one megabyte worth of IO in the percpu counter */
preempt_disable();
if (likely(this_cpu_add_return(*clock->pcpu_buf, sectors) <
IO_CLOCK_PCPU_SECTORS)) {
if (this_cpu_read(*clock->pcpu_buf) < IO_CLOCK_PCPU_SECTORS) {
preempt_enable();
return;
}

View File

@ -6,7 +6,18 @@ void bch2_io_timer_add(struct io_clock *, struct io_timer *);
void bch2_io_timer_del(struct io_clock *, struct io_timer *);
void bch2_kthread_io_clock_wait(struct io_clock *, unsigned long,
unsigned long);
void bch2_increment_clock(struct bch_fs *, unsigned, int);
void __bch2_increment_clock(struct io_clock *);
static inline void bch2_increment_clock(struct bch_fs *c, unsigned sectors,
int rw)
{
struct io_clock *clock = &c->io_clock[rw];
if (unlikely(this_cpu_add_return(*clock->pcpu_buf, sectors) >=
IO_CLOCK_PCPU_SECTORS))
__bch2_increment_clock(clock);
}
void bch2_io_clock_schedule_timeout(struct io_clock *, unsigned long);

View File

@ -64,7 +64,7 @@ void bch2_io_error(struct bch_dev *ca)
enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
const char *fmt, ...)
{
struct fsck_err_state *s;
struct fsck_err_state *s = NULL;
va_list args;
bool fix = false, print = true, suppressing = false;
char _buf[sizeof(s->buf)], *buf = _buf;
@ -99,8 +99,13 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
found:
list_move(&s->list, &c->fsck_errors);
s->nr++;
suppressing = s->nr == FSCK_ERR_RATELIMIT_NR;
print = s->nr <= FSCK_ERR_RATELIMIT_NR;
if (c->opts.ratelimit_errors &&
s->nr >= FSCK_ERR_RATELIMIT_NR) {
if (s->nr == FSCK_ERR_RATELIMIT_NR)
suppressing = true;
else
print = false;
}
buf = s->buf;
print:
va_start(args, fmt);
@ -156,7 +161,7 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
mutex_lock(&c->fsck_error_lock);
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
if (s->nr > FSCK_ERR_RATELIMIT_NR)
if (s->ratelimited)
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf);
list_del(&s->list);

View File

@ -114,6 +114,7 @@ struct fsck_err_state {
struct list_head list;
const char *fmt;
u64 nr;
bool ratelimited;
char buf[512];
};

View File

@ -1218,7 +1218,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
struct bkey_i whiteout = *insert;
struct bkey_packed *_k;
struct bkey unpacked;
BKEY_PADDED(k) tmp;
EBUG_ON(iter->level);
EBUG_ON(!insert->k.size);
@ -1292,25 +1291,23 @@ next:
bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p);
if (update_btree) {
bkey_copy(&tmp.k, insert);
if (deleting)
tmp.k.k.type = KEY_TYPE_discard;
insert->k.type = KEY_TYPE_discard;
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
extent_bset_insert(c, iter, &tmp.k);
extent_bset_insert(c, iter, insert);
}
if (update_journal) {
bkey_copy(&tmp.k, !deleting ? insert : &whiteout);
struct bkey_i *k = !deleting ? insert : &whiteout;
if (deleting)
tmp.k.k.type = KEY_TYPE_discard;
k->k.type = KEY_TYPE_discard;
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
EBUG_ON(bkey_deleted(&k->k) || !k->k.size);
bch2_btree_journal_key(trans, iter, &tmp.k);
bch2_btree_journal_key(trans, iter, k);
}
bch2_cut_front(insert->k.p, insert);
@ -1390,16 +1387,18 @@ static unsigned bch2_crc_field_size_max[] = {
};
static void bch2_extent_crc_pack(union bch_extent_crc *dst,
struct bch_extent_crc_unpacked src)
struct bch_extent_crc_unpacked src,
enum bch_extent_entry_type type)
{
#define set_common_fields(_dst, _src) \
_dst.type = 1 << type; \
_dst.csum_type = _src.csum_type, \
_dst.compression_type = _src.compression_type, \
_dst._compressed_size = _src.compressed_size - 1, \
_dst._uncompressed_size = _src.uncompressed_size - 1, \
_dst.offset = _src.offset
switch (extent_entry_type(to_entry(dst))) {
switch (type) {
case BCH_EXTENT_ENTRY_crc32:
set_common_fields(dst->crc32, src);
dst->crc32.csum = *((__le32 *) &src.csum.lo);
@ -1426,23 +1425,24 @@ void bch2_extent_crc_append(struct bkey_i *k,
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
union bch_extent_crc *crc = (void *) ptrs.end;
enum bch_extent_entry_type type;
if (bch_crc_bytes[new.csum_type] <= 4 &&
new.uncompressed_size - 1 <= CRC32_SIZE_MAX &&
new.nonce <= CRC32_NONCE_MAX)
crc->type = 1 << BCH_EXTENT_ENTRY_crc32;
type = BCH_EXTENT_ENTRY_crc32;
else if (bch_crc_bytes[new.csum_type] <= 10 &&
new.uncompressed_size - 1 <= CRC64_SIZE_MAX &&
new.nonce <= CRC64_NONCE_MAX)
crc->type = 1 << BCH_EXTENT_ENTRY_crc64;
type = BCH_EXTENT_ENTRY_crc64;
else if (bch_crc_bytes[new.csum_type] <= 16 &&
new.uncompressed_size - 1 <= CRC128_SIZE_MAX &&
new.nonce <= CRC128_NONCE_MAX)
crc->type = 1 << BCH_EXTENT_ENTRY_crc128;
type = BCH_EXTENT_ENTRY_crc128;
else
BUG();
bch2_extent_crc_pack(crc, new);
bch2_extent_crc_pack(crc, new, type);
k->k.u64s += extent_entry_u64s(ptrs.end);
@ -1645,7 +1645,8 @@ enum merge_result bch2_extent_merge(struct bch_fs *c,
crc_l.uncompressed_size += crc_r.uncompressed_size;
crc_l.compressed_size += crc_r.compressed_size;
bch2_extent_crc_pack(entry_to_crc(en_l), crc_l);
bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
extent_entry_type(en_l));
}
bch2_key_resize(l.k, l.k->size + r.k->size);

View File

@ -507,12 +507,25 @@ static void bch2_set_page_dirty(struct bch_fs *c,
__set_page_dirty_nobuffers(page);
}
vm_fault_t bch2_page_fault(struct vm_fault *vmf)
{
struct file *file = vmf->vma->vm_file;
struct bch_inode_info *inode = file_bch_inode(file);
int ret;
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
ret = filemap_fault(vmf);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
return ret;
}
vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct file *file = vmf->vma->vm_file;
struct bch_inode_info *inode = file_bch_inode(file);
struct address_space *mapping = inode->v.i_mapping;
struct address_space *mapping = file->f_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_page_reservation res;
unsigned len;
@ -530,8 +543,7 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
* a write_invalidate_inode_pages_range() that works without dropping
* page lock before invalidating page
*/
if (current->pagecache_lock != &mapping->add_lock)
pagecache_add_get(&mapping->add_lock);
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
lock_page(page);
isize = i_size_read(&inode->v);
@ -551,14 +563,13 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
}
bch2_set_page_dirty(c, inode, page, &res, 0, len);
bch2_page_reservation_put(c, inode, &res);
wait_for_stable_page(page);
out:
if (current->pagecache_lock != &mapping->add_lock)
pagecache_add_put(&mapping->add_lock);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
sb_end_pagefault(inode->v.i_sb);
bch2_page_reservation_put(c, inode, &res);
return ret;
}
@ -888,8 +899,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_SLOTS);
if (current->pagecache_lock != &mapping->add_lock)
pagecache_add_get(&mapping->add_lock);
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
while ((page = readpage_iter_next(&readpages_iter))) {
pgoff_t index = readpages_iter.offset + readpages_iter.idx;
@ -912,8 +922,7 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
&readpages_iter);
}
if (current->pagecache_lock != &mapping->add_lock)
pagecache_add_put(&mapping->add_lock);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
bch2_trans_exit(&trans);
kfree(readpages_iter.pages);
@ -1294,8 +1303,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
bch2_page_reservation_init(c, inode, res);
*fsdata = res;
/* Not strictly necessary - same reason as mkwrite(): */
pagecache_add_get(&mapping->add_lock);
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
@ -1347,7 +1355,7 @@ err:
put_page(page);
*pagep = NULL;
err_unlock:
pagecache_add_put(&mapping->add_lock);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
kfree(res);
*fsdata = NULL;
return ret;
@ -1391,7 +1399,7 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
unlock_page(page);
put_page(page);
pagecache_add_put(&mapping->add_lock);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
bch2_page_reservation_put(c, inode, res);
kfree(res);
@ -1549,7 +1557,7 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
ssize_t written = 0;
int ret = 0;
pagecache_add_get(&mapping->add_lock);
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
do {
unsigned offset = pos & (PAGE_SIZE - 1);
@ -1606,7 +1614,7 @@ again:
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(iter));
pagecache_add_put(&mapping->add_lock);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
return written ? written : ret;
}
@ -1730,6 +1738,43 @@ start:
}
}
ssize_t bch2_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct bch_inode_info *inode = file_bch_inode(file);
struct address_space *mapping = file->f_mapping;
size_t count = iov_iter_count(iter);
ssize_t ret;
if (!count)
return 0; /* skip atime */
if (iocb->ki_flags & IOCB_DIRECT) {
struct blk_plug plug;
ret = filemap_write_and_wait_range(mapping,
iocb->ki_pos,
iocb->ki_pos + count - 1);
if (ret < 0)
return ret;
file_accessed(file);
blk_start_plug(&plug);
ret = bch2_direct_IO_read(iocb, iter);
blk_finish_plug(&plug);
if (ret >= 0)
iocb->ki_pos += ret;
} else {
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
ret = generic_file_read_iter(iocb, iter);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
}
return ret;
}
/* O_DIRECT writes */
static long bch2_dio_write_loop(struct dio_write *dio)
@ -1744,34 +1789,23 @@ static long bch2_dio_write_loop(struct dio_write *dio)
struct bio_vec *bv;
unsigned unaligned;
u64 new_i_size;
loff_t offset;
bool sync;
long ret;
if (dio->loop)
goto loop;
/* Write and invalidate pagecache range that we're writing to: */
offset = req->ki_pos + (dio->op.written << 9);
ret = write_invalidate_inode_pages_range(mapping,
offset,
offset + iov_iter_count(&dio->iter) - 1);
if (unlikely(ret))
goto err;
while (1) {
offset = req->ki_pos + (dio->op.written << 9);
BUG_ON(current->pagecache_lock);
current->pagecache_lock = &mapping->add_lock;
if (kthread)
use_mm(dio->mm);
BUG_ON(current->faults_disabled_mapping);
current->faults_disabled_mapping = mapping;
ret = bio_iov_iter_get_pages(bio, &dio->iter);
current->faults_disabled_mapping = NULL;
if (kthread)
unuse_mm(dio->mm);
current->pagecache_lock = NULL;
if (unlikely(ret < 0))
goto err;
@ -1791,14 +1825,8 @@ static long bch2_dio_write_loop(struct dio_write *dio)
goto err;
}
/* gup might have faulted pages back in: */
ret = write_invalidate_inode_pages_range(mapping,
offset,
offset + bio->bi_iter.bi_size - 1);
if (unlikely(ret))
goto err;
dio->op.pos = POS(inode->v.i_ino, offset >> 9);
dio->op.pos = POS(inode->v.i_ino,
(req->ki_pos >> 9) + dio->op.written);
task_io_account_write(bio->bi_iter.bi_size);
@ -1850,7 +1878,7 @@ loop:
ret = dio->op.error ?: ((long) dio->op.written << 9);
err:
__pagecache_block_put(&mapping->add_lock);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
bch2_disk_reservation_put(c, &dio->op.res);
bch2_quota_reservation_put(c, inode, &dio->quota_res);
@ -1916,7 +1944,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
goto err;
inode_dio_begin(&inode->v);
__pagecache_block_get(&mapping->add_lock);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
extending = req->ki_pos + iter->count > inode->v.i_size;
if (!extending) {
@ -1964,6 +1992,12 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
dio->op.opts.data_replicas))
goto err_put_bio;
ret = write_invalidate_inode_pages_range(mapping,
req->ki_pos,
req->ki_pos + iter->count - 1);
if (unlikely(ret))
goto err_put_bio;
ret = bch2_dio_write_loop(dio);
err:
if (locked)
@ -1972,7 +2006,7 @@ err:
req->ki_pos += ret;
return ret;
err_put_bio:
__pagecache_block_put(&mapping->add_lock);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
bch2_disk_reservation_put(c, &dio->op.res);
bch2_quota_reservation_put(c, inode, &dio->quota_res);
bio_put(bio);
@ -1980,21 +2014,6 @@ err_put_bio:
goto err;
}
ssize_t bch2_direct_IO(struct kiocb *req, struct iov_iter *iter)
{
struct blk_plug plug;
ssize_t ret;
if (iov_iter_rw(iter) == WRITE)
return -EINVAL;
blk_start_plug(&plug);
ret = bch2_direct_IO_read(req, iter);
blk_finish_plug(&plug);
return ret;
}
ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
@ -2236,7 +2255,7 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
int ret = 0;
inode_dio_wait(&inode->v);
pagecache_block_get(&mapping->add_lock);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
/*
* fetch current on disk i_size: inode is locked, i_size can only
@ -2307,7 +2326,7 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
err:
pagecache_block_put(&mapping->add_lock);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
return ret;
}
@ -2316,14 +2335,13 @@ err:
static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
u64 discard_start = round_up(offset, block_bytes(c)) >> 9;
u64 discard_end = round_down(offset + len, block_bytes(c)) >> 9;
int ret = 0;
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
pagecache_block_get(&mapping->add_lock);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
ret = __bch2_truncate_page(inode,
offset >> PAGE_SHIFT,
@ -2352,7 +2370,7 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len
i_sectors_acct(c, inode, NULL, i_sectors_delta);
}
err:
pagecache_block_put(&mapping->add_lock);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret;
@ -2383,7 +2401,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
*/
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
pagecache_block_get(&mapping->add_lock);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
if (insert) {
ret = -EFBIG;
@ -2570,7 +2588,7 @@ bkey_err:
}
err:
bch2_trans_exit(&trans);
pagecache_block_put(&mapping->add_lock);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret;
}
@ -2594,7 +2612,7 @@ static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
pagecache_block_get(&mapping->add_lock);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) {
ret = inode_newsize_ok(&inode->v, end);
@ -2737,7 +2755,7 @@ bkey_err:
}
err:
bch2_trans_exit(&trans);
pagecache_block_put(&mapping->add_lock);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret;
}
@ -2813,8 +2831,8 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
struct bch_inode_info *dst = file_bch_inode(file_dst);
struct bch_fs *c = src->v.i_sb->s_fs_info;
s64 i_sectors_delta = 0;
u64 aligned_len;
loff_t ret = 0;
loff_t aligned_len;
if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
return -EINVAL;
@ -2830,26 +2848,23 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
abs(pos_src - pos_dst) < len)
return -EINVAL;
bch2_lock_inodes(INODE_LOCK, src, dst);
bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
file_update_time(file_dst);
inode_dio_wait(&src->v);
inode_dio_wait(&dst->v);
__pagecache_block_get(&src->v.i_mapping->add_lock);
__pagecache_block_get(&dst->v.i_mapping->add_lock);
ret = generic_remap_file_range_prep(file_src, pos_src,
file_dst, pos_dst,
&len, remap_flags);
if (ret < 0 || len == 0)
goto err;
aligned_len = round_up(len, block_bytes(c));
aligned_len = round_up((u64) len, block_bytes(c));
ret = write_invalidate_inode_pages_range(dst->v.i_mapping,
pos_dst, pos_dst + aligned_len);
pos_dst, pos_dst + len - 1);
if (ret)
goto err;
@ -2864,24 +2879,20 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
if (ret < 0)
goto err;
ret <<= 9;
/*
* due to alignment, we might have remapped slightly more than requsted
*/
ret = min(ret, len);
ret = min((u64) ret << 9, (u64) len);
/* XXX get a quota reservation */
i_sectors_acct(c, dst, NULL, i_sectors_delta);
spin_lock(&dst->v.i_lock);
if (pos_dst + len > dst->v.i_size)
i_size_write(&dst->v, pos_dst + len);
if (pos_dst + ret > dst->v.i_size)
i_size_write(&dst->v, pos_dst + ret);
spin_unlock(&dst->v.i_lock);
err:
__pagecache_block_put(&dst->v.i_mapping->add_lock);
__pagecache_block_put(&src->v.i_mapping->add_lock);
bch2_unlock_inodes(INODE_LOCK, src, dst);
bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
return ret;
}

View File

@ -27,8 +27,7 @@ int bch2_write_begin(struct file *, struct address_space *, loff_t,
int bch2_write_end(struct file *, struct address_space *, loff_t,
unsigned, unsigned, struct page *, void *);
ssize_t bch2_direct_IO(struct kiocb *, struct iov_iter *);
ssize_t bch2_read_iter(struct kiocb *, struct iov_iter *);
ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
int bch2_fsync(struct file *, loff_t, loff_t, int);
@ -41,6 +40,7 @@ loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
loff_t bch2_llseek(struct file *, loff_t, int);
vm_fault_t bch2_page_fault(struct vm_fault *);
vm_fault_t bch2_page_mkwrite(struct vm_fault *);
void bch2_invalidatepage(struct page *, unsigned int, unsigned int);
int bch2_releasepage(struct page *, gfp_t);

View File

@ -49,6 +49,53 @@ static void journal_seq_copy(struct bch_inode_info *dst,
} while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old);
}
static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
{
BUG_ON(atomic_long_read(&lock->v) == 0);
if (atomic_long_sub_return_release(i, &lock->v) == 0)
wake_up_all(&lock->wait);
}
static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
{
long v = atomic_long_read(&lock->v), old;
do {
old = v;
if (i > 0 ? v < 0 : v > 0)
return false;
} while ((v = atomic_long_cmpxchg_acquire(&lock->v,
old, old + i)) != old);
return true;
}
static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
{
wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
}
void bch2_pagecache_add_put(struct pagecache_lock *lock)
{
__pagecache_lock_put(lock, 1);
}
void bch2_pagecache_add_get(struct pagecache_lock *lock)
{
__pagecache_lock_get(lock, 1);
}
void bch2_pagecache_block_put(struct pagecache_lock *lock)
{
__pagecache_lock_put(lock, -1);
}
void bch2_pagecache_block_get(struct pagecache_lock *lock)
{
__pagecache_lock_get(lock, -1);
}
void bch2_inode_update_after_write(struct bch_fs *c,
struct bch_inode_info *inode,
struct bch_inode_unpacked *bi,
@ -706,10 +753,15 @@ static int bch2_getattr(const struct path *path, struct kstat *stat,
if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
stat->attributes |= STATX_ATTR_IMMUTABLE;
stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
stat->attributes |= STATX_ATTR_APPEND;
stat->attributes_mask |= STATX_ATTR_APPEND;
if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
stat->attributes |= STATX_ATTR_NODUMP;
stat->attributes_mask |= STATX_ATTR_NODUMP;
return 0;
}
@ -872,7 +924,7 @@ retry:
}
static const struct vm_operations_struct bch_vm_ops = {
.fault = filemap_fault,
.fault = bch2_page_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = bch2_page_mkwrite,
};
@ -906,7 +958,7 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
static const struct file_operations bch_file_operations = {
.llseek = bch2_llseek,
.read_iter = generic_file_read_iter,
.read_iter = bch2_read_iter,
.write_iter = bch2_write_iter,
.mmap = bch2_mmap,
.open = generic_file_open,
@ -994,7 +1046,7 @@ static const struct address_space_operations bch_address_space_operations = {
.write_end = bch2_write_end,
.invalidatepage = bch2_invalidatepage,
.releasepage = bch2_releasepage,
.direct_IO = bch2_direct_IO,
.direct_IO = noop_direct_IO,
#ifdef CONFIG_MIGRATION
.migratepage = bch2_migrate_page,
#endif
@ -1090,6 +1142,7 @@ static struct inode *bch2_alloc_inode(struct super_block *sb)
inode_init_once(&inode->v);
mutex_init(&inode->ei_update_lock);
pagecache_lock_init(&inode->ei_pagecache_lock);
mutex_init(&inode->ei_quota_lock);
inode->ei_journal_seq = 0;

View File

@ -10,6 +10,26 @@
#include <linux/seqlock.h>
#include <linux/stat.h>
/*
* Two-state lock - can be taken for add or block - both states are shared,
* like read side of rwsem, but conflict with other state:
*/
struct pagecache_lock {
atomic_long_t v;
wait_queue_head_t wait;
};
static inline void pagecache_lock_init(struct pagecache_lock *lock)
{
atomic_long_set(&lock->v, 0);
init_waitqueue_head(&lock->wait);
}
void bch2_pagecache_add_put(struct pagecache_lock *);
void bch2_pagecache_add_get(struct pagecache_lock *);
void bch2_pagecache_block_put(struct pagecache_lock *);
void bch2_pagecache_block_get(struct pagecache_lock *);
struct bch_inode_info {
struct inode v;
@ -18,6 +38,8 @@ struct bch_inode_info {
u64 ei_quota_reserved;
unsigned long ei_last_dirtied;
struct pagecache_lock ei_pagecache_lock;
struct mutex ei_quota_lock;
struct bch_qid ei_qid;
@ -37,7 +59,8 @@ static inline int ptrcmp(void *l, void *r)
enum bch_inode_lock_op {
INODE_LOCK = (1U << 0),
INODE_UPDATE_LOCK = (1U << 1),
INODE_PAGECACHE_BLOCK = (1U << 1),
INODE_UPDATE_LOCK = (1U << 2),
};
#define bch2_lock_inodes(_locks, ...) \
@ -49,9 +72,11 @@ do { \
\
for (i = 1; i < ARRAY_SIZE(a); i++) \
if (a[i] != a[i - 1]) { \
if (_locks & INODE_LOCK) \
if ((_locks) & INODE_LOCK) \
down_write_nested(&a[i]->v.i_rwsem, i); \
if (_locks & INODE_UPDATE_LOCK) \
if ((_locks) & INODE_PAGECACHE_BLOCK) \
bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\
if ((_locks) & INODE_UPDATE_LOCK) \
mutex_lock_nested(&a[i]->ei_update_lock, i);\
} \
} while (0)
@ -65,9 +90,11 @@ do { \
\
for (i = 1; i < ARRAY_SIZE(a); i++) \
if (a[i] != a[i - 1]) { \
if (_locks & INODE_LOCK) \
if ((_locks) & INODE_LOCK) \
up_write(&a[i]->v.i_rwsem); \
if (_locks & INODE_UPDATE_LOCK) \
if ((_locks) & INODE_PAGECACHE_BLOCK) \
bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\
if ((_locks) & INODE_UPDATE_LOCK) \
mutex_unlock(&a[i]->ei_update_lock); \
} \
} while (0)

View File

@ -797,7 +797,7 @@ create_lostfound:
bch2_create_trans(&trans,
BCACHEFS_ROOT_INO, root_inode,
lostfound_inode, &lostfound,
0, 0, S_IFDIR|0755, 0, NULL, NULL));
0, 0, S_IFDIR|0700, 0, NULL, NULL));
if (ret)
bch_err(c, "error creating lost+found: %i", ret);

View File

@ -1270,7 +1270,6 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
closure_return_with_destructor(cl, promote_done);
}
noinline
static struct promote_op *__promote_alloc(struct bch_fs *c,
enum btree_id btree_id,
struct bpos pos,
@ -1344,7 +1343,8 @@ err:
return NULL;
}
static inline struct promote_op *promote_alloc(struct bch_fs *c,
noinline
static struct promote_op *promote_alloc(struct bch_fs *c,
struct bvec_iter iter,
struct bkey_s_c k,
struct extent_ptr_decoded *pick,
@ -1908,7 +1908,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
flags |= BCH_READ_MUST_BOUNCE;
BUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
if (pick.crc.compression_type != BCH_COMPRESSION_NONE ||
(pick.crc.csum_type != BCH_CSUM_NONE &&
@ -1920,8 +1920,9 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
bounce = true;
}
promote = promote_alloc(c, iter, k, &pick, orig->opts, flags,
&rbio, &bounce, &read_full);
if (orig->opts.promote_target)
promote = promote_alloc(c, iter, k, &pick, orig->opts, flags,
&rbio, &bounce, &read_full);
if (!read_full) {
EBUG_ON(pick.crc.compression_type);
@ -1949,7 +1950,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
* data in the write path, but we're not going to use it all
* here:
*/
BUG_ON(rbio->bio.bi_iter.bi_size <
EBUG_ON(rbio->bio.bi_iter.bi_size <
pick.crc.compressed_size << 9);
rbio->bio.bi_iter.bi_size =
pick.crc.compressed_size << 9;
@ -1982,10 +1983,10 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
noclone:
rbio = orig;
rbio->bio.bi_iter = iter;
BUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN));
}
BUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size);
rbio->c = c;
rbio->submit_time = local_clock();
@ -2001,6 +2002,7 @@ noclone:
rbio->hole = 0;
rbio->retry = 0;
rbio->context = 0;
/* XXX: only initialize this if needed */
rbio->devs_have = bch2_bkey_devs(k);
rbio->pick = pick;
rbio->pos = pos;
@ -2017,11 +2019,11 @@ noclone:
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
percpu_down_read(&c->mark_lock);
rcu_read_lock();
bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ);
percpu_up_read(&c->mark_lock);
rcu_read_unlock();
if (likely(!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT)))) {
if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
bio_inc_remaining(&orig->bio);
trace_read_split(&orig->bio);
}

View File

@ -68,6 +68,12 @@ enum opt_type {
* - helptext
*/
#ifdef __KERNEL__
#define RATELIMIT_ERRORS true
#else
#define RATELIMIT_ERRORS false
#endif
#define BCH_OPTS() \
x(block_size, u16, \
OPT_FORMAT, \
@ -227,6 +233,11 @@ enum opt_type {
OPT_BOOL(), \
NO_SB_OPT, false, \
NULL, "Fix errors during fsck without asking") \
x(ratelimit_errors, u8, \
OPT_MOUNT, \
OPT_BOOL(), \
NO_SB_OPT, RATELIMIT_ERRORS, \
NULL, "Ratelimit error messages during fsck") \
x(nochanges, u8, \
OPT_MOUNT, \
OPT_BOOL(), \

View File

@ -290,10 +290,12 @@ err:
ret2 = PTR_ERR_OR_ZERO(inode_iter);
if (!ret2 &&
inode_u.bi_size < new_i_size)
inode_u.bi_size < new_i_size) {
inode_u.bi_size = new_i_size;
ret2 = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, journal_seq,
BTREE_INSERT_ATOMIC);
}
} while (ret2 == -EINTR);
ret = bch2_trans_exit(&trans) ?: ret;