Update bcachefs sources to b964c6cba8 bcachefs: Change lockrestart_do() to always call bch2_trans_begin()

This commit is contained in:
Kent Overstreet 2021-07-28 17:26:38 -04:00
parent 308d7adcc5
commit f06b01e9ea
30 changed files with 442 additions and 663 deletions

View File

@ -1 +1 @@
0a9be96b5087967ac57007a131614ab6b4cadce4 b964c6cba873eb5d2ebd0174876b664730e69a73

View File

@ -1,319 +0,0 @@
# SPDX-License-Identifier: GPL-2.0
####
# kbuild: Generic definitions
# Convenient variables
comma := ,
quote := "
squote := '
empty :=
space := $(empty) $(empty)
space_escape := _-_SPACE_-_
pound := \#
###
# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
dot-target = $(dir $@).$(notdir $@)
###
# The temporary file to save gcc -MMD generated dependencies must not
# contain a comma
depfile = $(subst $(comma),_,$(dot-target).d)
###
# filename of target with directory and extension stripped
basetarget = $(basename $(notdir $@))
###
# real prerequisites without phony targets
real-prereqs = $(filter-out $(PHONY), $^)
###
# Escape single quote for use in echo statements
escsq = $(subst $(squote),'\$(squote)',$1)
###
# Quote a string to pass it to C files. foo => '"foo"'
stringify = $(squote)$(quote)$1$(quote)$(squote)
###
# Easy method for doing a status message
kecho := :
quiet_kecho := echo
silent_kecho := :
kecho := $($(quiet)kecho)
###
# filechk is used to check if the content of a generated file is updated.
# Sample usage:
#
# filechk_sample = echo $(KERNELRELEASE)
# version.h: FORCE
# $(call filechk,sample)
#
# The rule defined shall write to stdout the content of the new file.
# The existing file will be compared with the new one.
# - If no file exist it is created
# - If the content differ the new file is used
# - If they are equal no change, and no timestamp update
define filechk
$(Q)set -e; \
mkdir -p $(dir $@); \
trap "rm -f $(dot-target).tmp" EXIT; \
{ $(filechk_$(1)); } > $(dot-target).tmp; \
if [ ! -r $@ ] || ! cmp -s $@ $(dot-target).tmp; then \
$(kecho) ' UPD $@'; \
mv -f $(dot-target).tmp $@; \
fi
endef
######
# gcc support functions
# See documentation in Documentation/kbuild/makefiles.rst
# cc-cross-prefix
# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-)
# Return first <prefix> where a <prefix>gcc is found in PATH.
# If no gcc found in PATH with listed prefixes return nothing
#
# Note: '2>/dev/null' is here to force Make to invoke a shell. Otherwise, it
# would try to directly execute the shell builtin 'command'. This workaround
# should be kept for a long time since this issue was fixed only after the
# GNU Make 4.2.1 release.
cc-cross-prefix = $(firstword $(foreach c, $(1), \
$(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c))))
# output directory for tests below
TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$
# try-run
# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
# Exit code chooses option. "$$TMP" serves as a temporary file and is
# automatically cleaned up.
try-run = $(shell set -e; \
TMP=$(TMPOUT)/tmp; \
TMPO=$(TMPOUT)/tmp.o; \
mkdir -p $(TMPOUT); \
trap "rm -rf $(TMPOUT)" EXIT; \
if ($(1)) >/dev/null 2>&1; \
then echo "$(2)"; \
else echo "$(3)"; \
fi)
# as-option
# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
as-option = $(call try-run,\
$(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2))
# as-instr
# Usage: cflags-y += $(call as-instr,instr,option1,option2)
as-instr = $(call try-run,\
printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
# __cc-option
# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
__cc-option = $(call try-run,\
$(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4))
# cc-option
# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
cc-option = $(call __cc-option, $(CC),\
$(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS),$(1),$(2))
# cc-option-yn
# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
cc-option-yn = $(call try-run,\
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
# cc-disable-warning
# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
cc-disable-warning = $(call try-run,\
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
# cc-ifversion
# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4))
# ld-option
# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
# ld-ifversion
# Usage: $(call ld-ifversion, -ge, 22252, y)
ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4))
######
###
# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.build obj=
# Usage:
# $(Q)$(MAKE) $(build)=dir
build := -f $(srctree)/scripts/Makefile.build obj
###
# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.dtbinst obj=
# Usage:
# $(Q)$(MAKE) $(dtbinst)=dir
dtbinst := -f $(srctree)/scripts/Makefile.dtbinst obj
###
# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=
# Usage:
# $(Q)$(MAKE) $(clean)=dir
clean := -f $(srctree)/scripts/Makefile.clean obj
# echo command.
# Short version is used, if $(quiet) equals `quiet_', otherwise full one.
echo-cmd = $(if $($(quiet)cmd_$(1)),\
echo ' $(call escsq,$($(quiet)cmd_$(1)))$(echo-why)';)
# printing commands
cmd = @set -e; $(echo-cmd) $(cmd_$(1))
###
# if_changed - execute command if any prerequisite is newer than
# target, or command line has changed
# if_changed_dep - as if_changed, but uses fixdep to reveal dependencies
# including used config symbols
# if_changed_rule - as if_changed but execute rule instead
# See Documentation/kbuild/makefiles.rst for more info
ifneq ($(KBUILD_NOCMDDEP),1)
# Check if both commands are the same including their order. Result is empty
# string if equal. User may override this check using make KBUILD_NOCMDDEP=1
cmd-check = $(filter-out $(subst $(space),$(space_escape),$(strip $(cmd_$@))), \
$(subst $(space),$(space_escape),$(strip $(cmd_$1))))
else
cmd-check = $(if $(strip $(cmd_$@)),,1)
endif
# Replace >$< with >$$< to preserve $ when reloading the .cmd file
# (needed for make)
# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file
# (needed for make)
# Replace >'< with >'\''< to be able to enclose the whole string in '...'
# (needed for the shell)
make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1)))))
# Find any prerequisites that are newer than target or that do not exist.
# (This is not true for now; $? should contain any non-existent prerequisites,
# but it does not work as expected when .SECONDARY is present. This seems a bug
# of GNU Make.)
# PHONY targets skipped in both cases.
newer-prereqs = $(filter-out $(PHONY),$?)
# Execute command if command has changed or prerequisite(s) are updated.
if_changed = $(if $(newer-prereqs)$(cmd-check), \
$(cmd); \
printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:)
# Execute the command and also postprocess generated .d dependencies file.
if_changed_dep = $(if $(newer-prereqs)$(cmd-check),$(cmd_and_fixdep),@:)
cmd_and_fixdep = \
$(cmd); \
scripts/basic/fixdep $(depfile) $@ '$(make-cmd)' > $(dot-target).cmd;\
rm -f $(depfile)
# Usage: $(call if_changed_rule,foo)
# Will check if $(cmd_foo) or any of the prerequisites changed,
# and if so will execute $(rule_foo).
if_changed_rule = $(if $(newer-prereqs)$(cmd-check),$(rule_$(1)),@:)
###
# why - tell why a target got built
# enabled by make V=2
# Output (listed in the order they are checked):
# (1) - due to target is PHONY
# (2) - due to target missing
# (3) - due to: file1.h file2.h
# (4) - due to command line change
# (5) - due to missing .cmd file
# (6) - due to target not in $(targets)
# (1) PHONY targets are always build
# (2) No target, so we better build it
# (3) Prerequisite is newer than target
# (4) The command line stored in the file named dir/.target.cmd
# differed from actual command line. This happens when compiler
# options changes
# (5) No dir/.target.cmd file (used to store command line)
# (6) No dir/.target.cmd file and target not listed in $(targets)
# This is a good hint that there is a bug in the kbuild file
ifeq ($(KBUILD_VERBOSE),2)
why = \
$(if $(filter $@, $(PHONY)),- due to target is PHONY, \
$(if $(wildcard $@), \
$(if $(newer-prereqs),- due to: $(newer-prereqs), \
$(if $(cmd-check), \
$(if $(cmd_$@),- due to command line change, \
$(if $(filter $@, $(targets)), \
- due to missing .cmd file, \
- due to $(notdir $@) not in $$(targets) \
) \
) \
) \
), \
- due to target missing \
) \
)
echo-why = $(call escsq, $(strip $(why)))
endif
###############################################################################
#
# When a Kconfig string contains a filename, it is suitable for
# passing to shell commands. It is surrounded by double-quotes, and
# any double-quotes or backslashes within it are escaped by
# backslashes.
#
# This is no use for dependencies or $(wildcard). We need to strip the
# surrounding quotes and the escaping from quotes and backslashes, and
# we *do* need to escape any spaces in the string. So, for example:
#
# Usage: $(eval $(call config_filename,FOO))
#
# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option,
# transformed as described above to be suitable for use within the
# makefile.
#
# Also, if the filename is a relative filename and exists in the source
# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to
# be prefixed to *both* command invocation and dependencies.
#
# Note: We also print the filenames in the quiet_cmd_foo text, and
# perhaps ought to have a version specially escaped for that purpose.
# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good
# enough. It'll strip the quotes in the common case where there's no
# space and it's a simple filename, and it'll retain the quotes when
# there's a space. There are some esoteric cases in which it'll print
# the wrong thing, but we don't really care. The actual dependencies
# and commands *do* get it right, with various combinations of single
# and double quotes, backslashes and spaces in the filenames.
#
###############################################################################
#
define config_filename
ifneq ($$(CONFIG_$(1)),"")
$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1)))))))
ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME)))
else
ifeq ($$(wildcard $$($(1)_FILENAME)),)
ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),)
$(1)_SRCPREFIX := $(srctree)/
endif
endif
endif
endif
endef
#
###############################################################################
# delete partially updated (i.e. corrupted) files on error
.DELETE_ON_ERROR:
# do not delete intermediate files automatically
.SECONDARY:

View File

@ -22,7 +22,7 @@ LDFLAGS+=$(CFLAGS) $(EXTRA_LDFLAGS)
VERSION?=$(shell git describe --dirty=+ 2>/dev/null || echo v0.1-nogit) VERSION?=$(shell git describe --dirty=+ 2>/dev/null || echo v0.1-nogit)
include Kbuild.include include Makefile.compiler
CFLAGS+=$(call cc-disable-warning, unused-but-set-variable) CFLAGS+=$(call cc-disable-warning, unused-but-set-variable)
CFLAGS+=$(call cc-disable-warning, stringop-overflow) CFLAGS+=$(call cc-disable-warning, stringop-overflow)
@ -178,8 +178,8 @@ update-bcachefs-sources:
git add include/linux/list_nulls.h git add include/linux/list_nulls.h
cp $(LINUX_DIR)/include/linux/poison.h include/linux/ cp $(LINUX_DIR)/include/linux/poison.h include/linux/
git add include/linux/poison.h git add include/linux/poison.h
cp $(LINUX_DIR)/scripts/Kbuild.include ./ cp $(LINUX_DIR)/scripts/Makefile.compiler ./
git add Kbuild.include git add Makefile.compiler
$(RM) libbcachefs/*.mod.c $(RM) libbcachefs/*.mod.c
git -C $(LINUX_DIR) rev-parse HEAD | tee .bcachefs_revision git -C $(LINUX_DIR) rev-parse HEAD | tee .bcachefs_revision
git add .bcachefs_revision git add .bcachefs_revision

74
Makefile.compiler Normal file
View File

@ -0,0 +1,74 @@
# SPDX-License-Identifier: GPL-2.0-only
# cc-cross-prefix
# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-)
# Return first <prefix> where a <prefix>gcc is found in PATH.
# If no gcc found in PATH with listed prefixes return nothing
#
# Note: '2>/dev/null' is here to force Make to invoke a shell. Otherwise, it
# would try to directly execute the shell builtin 'command'. This workaround
# should be kept for a long time since this issue was fixed only after the
# GNU Make 4.2.1 release.
cc-cross-prefix = $(firstword $(foreach c, $(1), \
$(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c))))
# output directory for tests below
TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$
# try-run
# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
# Exit code chooses option. "$$TMP" serves as a temporary file and is
# automatically cleaned up.
try-run = $(shell set -e; \
TMP=$(TMPOUT)/tmp; \
mkdir -p $(TMPOUT); \
trap "rm -rf $(TMPOUT)" EXIT; \
if ($(1)) >/dev/null 2>&1; \
then echo "$(2)"; \
else echo "$(3)"; \
fi)
# as-option
# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
as-option = $(call try-run,\
$(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2))
# as-instr
# Usage: cflags-y += $(call as-instr,instr,option1,option2)
as-instr = $(call try-run,\
printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
# __cc-option
# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
__cc-option = $(call try-run,\
$(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4))
# cc-option
# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
cc-option = $(call __cc-option, $(CC),\
$(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS),$(1),$(2))
# cc-option-yn
# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
cc-option-yn = $(call try-run,\
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
# cc-disable-warning
# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
cc-disable-warning = $(call try-run,\
$(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
# cc-ifversion
# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4))
# ld-option
# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
# ld-ifversion
# Usage: $(call ld-ifversion, -ge, 22252, y)
ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4))

View File

@ -775,14 +775,16 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
TRACE_EVENT(iter_traverse, TRACE_EVENT(iter_traverse,
TP_PROTO(unsigned long trans_ip, TP_PROTO(unsigned long trans_ip,
unsigned long caller_ip, unsigned long caller_ip,
bool key_cache,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos, struct bpos *pos,
int ret), int ret),
TP_ARGS(trans_ip, caller_ip, btree_id, pos, ret), TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, ret),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, trans_ip ) __field(unsigned long, trans_ip )
__field(unsigned long, caller_ip ) __field(unsigned long, caller_ip )
__field(u8, key_cache )
__field(u8, btree_id ) __field(u8, btree_id )
__field(u64, pos_inode ) __field(u64, pos_inode )
__field(u64, pos_offset ) __field(u64, pos_offset )
@ -793,6 +795,7 @@ TRACE_EVENT(iter_traverse,
TP_fast_assign( TP_fast_assign(
__entry->trans_ip = trans_ip; __entry->trans_ip = trans_ip;
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
__entry->key_cache = key_cache;
__entry->btree_id = btree_id; __entry->btree_id = btree_id;
__entry->pos_inode = pos->inode; __entry->pos_inode = pos->inode;
__entry->pos_offset = pos->offset; __entry->pos_offset = pos->offset;
@ -800,9 +803,10 @@ TRACE_EVENT(iter_traverse,
__entry->ret = ret; __entry->ret = ret;
), ),
TP_printk("%ps %pS pos %u %llu:%llu:%u ret %i", TP_printk("%ps %pS key cache %u btree %u %llu:%llu:%u ret %i",
(void *) __entry->trans_ip, (void *) __entry->trans_ip,
(void *) __entry->caller_ip, (void *) __entry->caller_ip,
__entry->key_cache,
__entry->btree_id, __entry->btree_id,
__entry->pos_inode, __entry->pos_inode,
__entry->pos_offset, __entry->pos_offset,
@ -953,15 +957,17 @@ TRACE_EVENT(trans_restart_mem_realloced,
DECLARE_EVENT_CLASS(node_lock_fail, DECLARE_EVENT_CLASS(node_lock_fail,
TP_PROTO(unsigned long trans_ip, TP_PROTO(unsigned long trans_ip,
unsigned long caller_ip, unsigned long caller_ip,
bool key_cache,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos, struct bpos *pos,
unsigned level, u32 iter_seq, unsigned node, u32 node_seq), unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
TP_ARGS(trans_ip, caller_ip, btree_id, pos, TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos,
level, iter_seq, node, node_seq), level, iter_seq, node, node_seq),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(unsigned long, trans_ip ) __field(unsigned long, trans_ip )
__field(unsigned long, caller_ip ) __field(unsigned long, caller_ip )
__field(u8, key_cache )
__field(u8, btree_id ) __field(u8, btree_id )
__field(u64, pos_inode ) __field(u64, pos_inode )
__field(u64, pos_offset ) __field(u64, pos_offset )
@ -975,6 +981,7 @@ DECLARE_EVENT_CLASS(node_lock_fail,
TP_fast_assign( TP_fast_assign(
__entry->trans_ip = trans_ip; __entry->trans_ip = trans_ip;
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
__entry->key_cache = key_cache;
__entry->btree_id = btree_id; __entry->btree_id = btree_id;
__entry->pos_inode = pos->inode; __entry->pos_inode = pos->inode;
__entry->pos_offset = pos->offset; __entry->pos_offset = pos->offset;
@ -985,9 +992,10 @@ DECLARE_EVENT_CLASS(node_lock_fail,
__entry->node_seq = node_seq; __entry->node_seq = node_seq;
), ),
TP_printk("%ps %pS btree %u pos %llu:%llu:%u level %u iter seq %u node %u node seq %u", TP_printk("%ps %pS key cache %u btree %u pos %llu:%llu:%u level %u iter seq %u node %u node seq %u",
(void *) __entry->trans_ip, (void *) __entry->trans_ip,
(void *) __entry->caller_ip, (void *) __entry->caller_ip,
__entry->key_cache,
__entry->btree_id, __entry->btree_id,
__entry->pos_inode, __entry->pos_inode,
__entry->pos_offset, __entry->pos_offset,
@ -999,20 +1007,22 @@ DECLARE_EVENT_CLASS(node_lock_fail,
DEFINE_EVENT(node_lock_fail, node_upgrade_fail, DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
TP_PROTO(unsigned long trans_ip, TP_PROTO(unsigned long trans_ip,
unsigned long caller_ip, unsigned long caller_ip,
bool key_cache,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos, struct bpos *pos,
unsigned level, u32 iter_seq, unsigned node, u32 node_seq), unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
TP_ARGS(trans_ip, caller_ip, btree_id, pos, TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos,
level, iter_seq, node, node_seq) level, iter_seq, node, node_seq)
); );
DEFINE_EVENT(node_lock_fail, node_relock_fail, DEFINE_EVENT(node_lock_fail, node_relock_fail,
TP_PROTO(unsigned long trans_ip, TP_PROTO(unsigned long trans_ip,
unsigned long caller_ip, unsigned long caller_ip,
bool key_cache,
enum btree_id btree_id, enum btree_id btree_id,
struct bpos *pos, struct bpos *pos,
unsigned level, u32 iter_seq, unsigned node, u32 node_seq), unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
TP_ARGS(trans_ip, caller_ip, btree_id, pos, TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos,
level, iter_seq, node, node_seq) level, iter_seq, node, node_seq)
); );

View File

@ -221,6 +221,8 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c_xattr xattr; struct bkey_s_c_xattr xattr;
struct posix_acl *acl = NULL; struct posix_acl *acl = NULL;
struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
retry: retry:
@ -239,7 +241,14 @@ retry:
goto out; goto out;
} }
xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret) {
acl = ERR_PTR(ret);
goto out;
}
xattr = bkey_s_c_to_xattr(k);
acl = bch2_acl_from_disk(xattr_val(xattr.v), acl = bch2_acl_from_disk(xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len)); le16_to_cpu(xattr.v->x_val_len));
@ -325,8 +334,7 @@ retry:
ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, bch2_trans_commit(&trans, NULL,
&inode->ei_journal_seq, &inode->ei_journal_seq, 0);
BTREE_INSERT_NOUNLOCK);
btree_err: btree_err:
bch2_trans_iter_put(&trans, inode_iter); bch2_trans_iter_put(&trans, inode_iter);
@ -356,6 +364,7 @@ int bch2_acl_chmod(struct btree_trans *trans,
struct bkey_s_c_xattr xattr; struct bkey_s_c_xattr xattr;
struct bkey_i_xattr *new; struct bkey_i_xattr *new;
struct posix_acl *acl; struct posix_acl *acl;
struct bkey_s_c k;
int ret; int ret;
iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
@ -366,7 +375,11 @@ int bch2_acl_chmod(struct btree_trans *trans,
if (ret) if (ret)
return ret == -ENOENT ? 0 : ret; return ret == -ENOENT ? 0 : ret;
xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); k = bch2_btree_iter_peek_slot(iter);
xattr = bkey_s_c_to_xattr(k);
if (ret)
goto err;
acl = bch2_acl_from_disk(xattr_val(xattr.v), acl = bch2_acl_from_disk(xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len)); le16_to_cpu(xattr.v->x_val_len));
ret = PTR_ERR_OR_ZERO(acl); ret = PTR_ERR_OR_ZERO(acl);

View File

@ -374,7 +374,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags)
percpu_ref_put(&ca->ref); percpu_ref_put(&ca->ref);
goto err; goto err;
} }
bch2_btree_iter_next_slot(iter); bch2_btree_iter_advance(iter);
} }
} }
err: err:

View File

@ -269,7 +269,7 @@ void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
{ {
bch2_bkey_to_text(out, k.k); bch2_bkey_to_text(out, k.k);
if (k.k) { if (bkey_val_bytes(k.k)) {
pr_buf(out, ": "); pr_buf(out, ": ");
bch2_val_to_text(out, c, k); bch2_val_to_text(out, c, k);
} }

View File

@ -648,8 +648,10 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
* Parent node must be locked, else we could read in a btree node that's * Parent node must be locked, else we could read in a btree node that's
* been freed: * been freed:
*/ */
if (iter && !bch2_btree_node_relock(iter, level + 1)) if (iter && !bch2_btree_node_relock(iter, level + 1)) {
btree_trans_restart(iter->trans);
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
}
b = bch2_btree_node_mem_alloc(c); b = bch2_btree_node_mem_alloc(c);
if (IS_ERR(b)) if (IS_ERR(b))
@ -686,18 +688,17 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
if (!sync) if (!sync)
return NULL; return NULL;
/*
* XXX: this will probably always fail because btree_iter_relock()
* currently fails for iterators that aren't pointed at a valid btree
* node
*/
if (iter && if (iter &&
(!bch2_trans_relock(iter->trans) || (!bch2_trans_relock(iter->trans) ||
!bch2_btree_iter_relock(iter, _THIS_IP_))) !bch2_btree_iter_relock_intent(iter))) {
BUG_ON(!iter->trans->restarted);
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
}
if (!six_relock_type(&b->c.lock, lock_type, seq)) if (!six_relock_type(&b->c.lock, lock_type, seq)) {
btree_trans_restart(iter->trans);
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
}
return b; return b;
} }
@ -718,6 +719,7 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
return; return;
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&b->key)); bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&b->key));
bch2_bpos_to_text(&PBUF(buf2), b->data->min_key);
bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); bch2_bpos_to_text(&PBUF(buf3), b->data->max_key);
bch2_fs_inconsistent(c, "btree node header doesn't match ptr\n" bch2_fs_inconsistent(c, "btree node header doesn't match ptr\n"
@ -752,20 +754,23 @@ static inline void btree_check_header(struct bch_fs *c, struct btree *b)
* The btree node will have either a read or a write lock held, depending on * The btree node will have either a read or a write lock held, depending on
* the @write parameter. * the @write parameter.
*/ */
struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_iter *iter,
const struct bkey_i *k, unsigned level, const struct bkey_i *k, unsigned level,
enum six_lock_type lock_type, enum six_lock_type lock_type,
unsigned long trace_ip) unsigned long trace_ip)
{ {
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
struct btree *b; struct btree *b;
struct bset_tree *t; struct bset_tree *t;
EBUG_ON(level >= BTREE_MAX_DEPTH); EBUG_ON(level >= BTREE_MAX_DEPTH);
b = btree_node_mem_ptr(k); if (c->opts.btree_node_mem_ptr_optimization) {
if (b) b = btree_node_mem_ptr(k);
goto lock_node; if (b)
goto lock_node;
}
retry: retry:
b = btree_cache_find(bc, k); b = btree_cache_find(bc, k);
if (unlikely(!b)) { if (unlikely(!b)) {
@ -818,7 +823,7 @@ lock_node:
if (!btree_node_lock(b, k->k.p, level, iter, lock_type, if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
lock_node_check_fn, (void *) k, trace_ip)) { lock_node_check_fn, (void *) k, trace_ip)) {
if (b->hash_val != btree_ptr_hash_val(k)) if (!trans->restarted)
goto retry; goto retry;
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
} }
@ -830,10 +835,11 @@ lock_node:
if (bch2_btree_node_relock(iter, level + 1)) if (bch2_btree_node_relock(iter, level + 1))
goto retry; goto retry;
trace_trans_restart_btree_node_reused(iter->trans->ip, trace_trans_restart_btree_node_reused(trans->ip,
trace_ip, trace_ip,
iter->btree_id, iter->btree_id,
&iter->real_pos); &iter->real_pos);
btree_trans_restart(trans);
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
} }
} }
@ -842,19 +848,20 @@ lock_node:
u32 seq = b->c.lock.state.seq; u32 seq = b->c.lock.state.seq;
six_unlock_type(&b->c.lock, lock_type); six_unlock_type(&b->c.lock, lock_type);
bch2_trans_unlock(iter->trans); bch2_trans_unlock(trans);
bch2_btree_node_wait_on_read(b); bch2_btree_node_wait_on_read(b);
/* /*
* XXX: check if this always fails - btree_iter_relock() * should_be_locked is not set on this iterator yet, so we need
* currently fails for iterators that aren't pointed at a valid * to relock it specifically:
* btree node
*/ */
if (iter && if (iter &&
(!bch2_trans_relock(iter->trans) || (!bch2_trans_relock(trans) ||
!bch2_btree_iter_relock(iter, _THIS_IP_))) !bch2_btree_iter_relock_intent(iter))) {
BUG_ON(!trans->restarted);
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
}
if (!six_relock_type(&b->c.lock, lock_type, seq)) if (!six_relock_type(&b->c.lock, lock_type, seq))
goto retry; goto retry;
@ -899,9 +906,11 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
EBUG_ON(level >= BTREE_MAX_DEPTH); EBUG_ON(level >= BTREE_MAX_DEPTH);
b = btree_node_mem_ptr(k); if (c->opts.btree_node_mem_ptr_optimization) {
if (b) b = btree_node_mem_ptr(k);
goto lock_node; if (b)
goto lock_node;
}
retry: retry:
b = btree_cache_find(bc, k); b = btree_cache_find(bc, k);
if (unlikely(!b)) { if (unlikely(!b)) {
@ -966,9 +975,9 @@ out:
return b; return b;
} }
void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, int bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
const struct bkey_i *k, const struct bkey_i *k,
enum btree_id btree_id, unsigned level) enum btree_id btree_id, unsigned level)
{ {
struct btree_cache *bc = &c->btree_cache; struct btree_cache *bc = &c->btree_cache;
struct btree *b; struct btree *b;
@ -978,9 +987,10 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
b = btree_cache_find(bc, k); b = btree_cache_find(bc, k);
if (b) if (b)
return; return 0;
bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false); b = bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false);
return PTR_ERR_OR_ZERO(b);
} }
void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k) void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)

View File

@ -22,15 +22,15 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *, struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_iter *,
const struct bkey_i *, unsigned, const struct bkey_i *, unsigned,
enum six_lock_type, unsigned long); enum six_lock_type, unsigned long);
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
enum btree_id, unsigned, bool); enum btree_id, unsigned, bool);
void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *, int bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
const struct bkey_i *, enum btree_id, unsigned); const struct bkey_i *, enum btree_id, unsigned);
void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *); void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *);

View File

@ -189,6 +189,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade,
(upgrade (upgrade
? trace_node_upgrade_fail ? trace_node_upgrade_fail
: trace_node_relock_fail)(iter->trans->ip, trace_ip, : trace_node_relock_fail)(iter->trans->ip, trace_ip,
btree_iter_type(iter) == BTREE_ITER_CACHED,
iter->btree_id, &iter->real_pos, iter->btree_id, &iter->real_pos,
l, iter->l[l].lock_seq, l, iter->l[l].lock_seq,
is_btree_node(iter, l) is_btree_node(iter, l)
@ -197,7 +198,6 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade,
is_btree_node(iter, l) is_btree_node(iter, l)
? iter->l[l].b->c.lock.state.seq ? iter->l[l].b->c.lock.state.seq
: 0); : 0);
fail_idx = l; fail_idx = l;
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
} }
@ -309,7 +309,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
} }
if (unlikely(deadlock_iter)) { if (unlikely(deadlock_iter)) {
trace_trans_restart_would_deadlock(iter->trans->ip, ip, trace_trans_restart_would_deadlock(trans->ip, ip,
trans->in_traverse_all, reason, trans->in_traverse_all, reason,
deadlock_iter->btree_id, deadlock_iter->btree_id,
btree_iter_type(deadlock_iter), btree_iter_type(deadlock_iter),
@ -317,6 +317,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
iter->btree_id, iter->btree_id,
btree_iter_type(iter), btree_iter_type(iter),
&pos); &pos);
btree_trans_restart(trans);
return false; return false;
} }
@ -375,10 +376,44 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans)
static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {} static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
#endif #endif
/*
* Only for btree_cache.c - only relocks intent locks
*/
bool bch2_btree_iter_relock_intent(struct btree_iter *iter)
{
unsigned l;
for (l = iter->level;
l < iter->locks_want && btree_iter_node(iter, l);
l++) {
if (!bch2_btree_node_relock(iter, l)) {
trace_node_relock_fail(iter->trans->ip, _RET_IP_,
btree_iter_type(iter) == BTREE_ITER_CACHED,
iter->btree_id, &iter->real_pos,
l, iter->l[l].lock_seq,
is_btree_node(iter, l)
? 0
: (unsigned long) iter->l[l].b,
is_btree_node(iter, l)
? iter->l[l].b->c.lock.state.seq
: 0);
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
btree_trans_restart(iter->trans);
return false;
}
}
return true;
}
__flatten __flatten
bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip) bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip)
{ {
return btree_iter_get_locks(iter, false, trace_ip); bool ret = btree_iter_get_locks(iter, false, trace_ip);
if (!ret)
btree_trans_restart(iter->trans);
return ret;
} }
bool __bch2_btree_iter_upgrade(struct btree_iter *iter, bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
@ -421,6 +456,8 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
btree_iter_get_locks(linked, true, _THIS_IP_); btree_iter_get_locks(linked, true, _THIS_IP_);
} }
if (iter->should_be_locked)
btree_trans_restart(iter->trans);
return false; return false;
} }
@ -459,8 +496,7 @@ void bch2_trans_downgrade(struct btree_trans *trans)
/* Btree transaction locking: */ /* Btree transaction locking: */
static inline bool btree_iter_should_be_locked(struct btree_trans *trans, static inline bool btree_iter_should_be_locked(struct btree_iter *iter)
struct btree_iter *iter)
{ {
return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) || return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
iter->should_be_locked; iter->should_be_locked;
@ -470,11 +506,15 @@ bool bch2_trans_relock(struct btree_trans *trans)
{ {
struct btree_iter *iter; struct btree_iter *iter;
if (unlikely(trans->restarted))
return false;
trans_for_each_iter(trans, iter) trans_for_each_iter(trans, iter)
if (!bch2_btree_iter_relock(iter, _RET_IP_) && if (btree_iter_should_be_locked(iter) &&
btree_iter_should_be_locked(trans, iter)) { !bch2_btree_iter_relock(iter, _RET_IP_)) {
trace_trans_restart_relock(trans->ip, _RET_IP_, trace_trans_restart_relock(trans->ip, _RET_IP_,
iter->btree_id, &iter->real_pos); iter->btree_id, &iter->real_pos);
BUG_ON(!trans->restarted);
return false; return false;
} }
return true; return true;
@ -1055,11 +1095,12 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
return b == *rootp ? 0 : -1; return b == *rootp ? 0 : -1;
} }
static inline int btree_iter_lock_root(struct btree_iter *iter, static inline int btree_iter_lock_root(struct btree_trans *trans,
struct btree_iter *iter,
unsigned depth_want, unsigned depth_want,
unsigned long trace_ip) unsigned long trace_ip)
{ {
struct bch_fs *c = iter->trans->c; struct bch_fs *c = trans->c;
struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b; struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
enum six_lock_type lock_type; enum six_lock_type lock_type;
unsigned i; unsigned i;
@ -1087,8 +1128,11 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
if (unlikely(!btree_node_lock(b, SPOS_MAX, iter->level, if (unlikely(!btree_node_lock(b, SPOS_MAX, iter->level,
iter, lock_type, iter, lock_type,
lock_root_check_fn, rootp, lock_root_check_fn, rootp,
trace_ip))) trace_ip))) {
return -EINTR; if (trans->restarted)
return -EINTR;
continue;
}
if (likely(b == READ_ONCE(*rootp) && if (likely(b == READ_ONCE(*rootp) &&
b->c.level == iter->level && b->c.level == iter->level &&
@ -1109,7 +1153,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
} }
noinline noinline
static void btree_iter_prefetch(struct btree_iter *iter) static int btree_iter_prefetch(struct btree_iter *iter)
{ {
struct bch_fs *c = iter->trans->c; struct bch_fs *c = iter->trans->c;
struct btree_iter_level *l = &iter->l[iter->level]; struct btree_iter_level *l = &iter->l[iter->level];
@ -1120,10 +1164,11 @@ static void btree_iter_prefetch(struct btree_iter *iter)
? (iter->level > 1 ? 0 : 2) ? (iter->level > 1 ? 0 : 2)
: (iter->level > 1 ? 1 : 16); : (iter->level > 1 ? 1 : 16);
bool was_locked = btree_node_locked(iter, iter->level); bool was_locked = btree_node_locked(iter, iter->level);
int ret = 0;
bch2_bkey_buf_init(&tmp); bch2_bkey_buf_init(&tmp);
while (nr) { while (nr && !ret) {
if (!bch2_btree_node_relock(iter, iter->level)) if (!bch2_btree_node_relock(iter, iter->level))
break; break;
@ -1133,14 +1178,15 @@ static void btree_iter_prefetch(struct btree_iter *iter)
break; break;
bch2_bkey_buf_unpack(&tmp, c, l->b, k); bch2_bkey_buf_unpack(&tmp, c, l->b, k);
bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id, ret = bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id,
iter->level - 1); iter->level - 1);
} }
if (!was_locked) if (!was_locked)
btree_node_unlock(iter, iter->level); btree_node_unlock(iter, iter->level);
bch2_bkey_buf_exit(&tmp, c); bch2_bkey_buf_exit(&tmp, c);
return ret;
} }
static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
@ -1164,10 +1210,11 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
btree_node_unlock(iter, plevel); btree_node_unlock(iter, plevel);
} }
static __always_inline int btree_iter_down(struct btree_iter *iter, static __always_inline int btree_iter_down(struct btree_trans *trans,
struct btree_iter *iter,
unsigned long trace_ip) unsigned long trace_ip)
{ {
struct bch_fs *c = iter->trans->c; struct bch_fs *c = trans->c;
struct btree_iter_level *l = &iter->l[iter->level]; struct btree_iter_level *l = &iter->l[iter->level];
struct btree *b; struct btree *b;
unsigned level = iter->level - 1; unsigned level = iter->level - 1;
@ -1181,7 +1228,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter,
bch2_bkey_buf_unpack(&tmp, c, l->b, bch2_bkey_buf_unpack(&tmp, c, l->b,
bch2_btree_node_iter_peek(&l->iter, l->b)); bch2_btree_node_iter_peek(&l->iter, l->b));
b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip); b = bch2_btree_node_get(trans, iter, tmp.k, level, lock_type, trace_ip);
ret = PTR_ERR_OR_ZERO(b); ret = PTR_ERR_OR_ZERO(b);
if (unlikely(ret)) if (unlikely(ret))
goto err; goto err;
@ -1194,7 +1241,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter,
btree_node_mem_ptr_set(iter, level + 1, b); btree_node_mem_ptr_set(iter, level + 1, b);
if (iter->flags & BTREE_ITER_PREFETCH) if (iter->flags & BTREE_ITER_PREFETCH)
btree_iter_prefetch(iter); ret = btree_iter_prefetch(iter);
if (btree_node_read_locked(iter, level + 1)) if (btree_node_read_locked(iter, level + 1))
btree_node_unlock(iter, level + 1); btree_node_unlock(iter, level + 1);
@ -1215,25 +1262,19 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
struct btree_iter *iter; struct btree_iter *iter;
u8 sorted[BTREE_ITER_MAX]; u8 sorted[BTREE_ITER_MAX];
int i, nr_sorted = 0; int i, nr_sorted = 0;
bool relock_fail;
if (trans->in_traverse_all) if (trans->in_traverse_all)
return -EINTR; return -EINTR;
trans->in_traverse_all = true; trans->in_traverse_all = true;
retry_all: retry_all:
trans->restarted = false;
nr_sorted = 0; nr_sorted = 0;
relock_fail = false;
trans_for_each_iter(trans, iter) { trans_for_each_iter(trans, iter) {
if (!bch2_btree_iter_relock(iter, _THIS_IP_))
relock_fail = true;
sorted[nr_sorted++] = iter->idx; sorted[nr_sorted++] = iter->idx;
} iter->should_be_locked = false;
if (!relock_fail) {
trans->in_traverse_all = false;
return 0;
} }
#define btree_iter_cmp_by_idx(_l, _r) \ #define btree_iter_cmp_by_idx(_l, _r) \
@ -1289,15 +1330,6 @@ retry_all:
if (ret) if (ret)
goto retry_all; goto retry_all;
} }
if (hweight64(trans->iters_live) > 1)
ret = -EINTR;
else
trans_for_each_iter(trans, iter)
if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) {
ret = -EINTR;
break;
}
out: out:
bch2_btree_cache_cannibalize_unlock(c); bch2_btree_cache_cannibalize_unlock(c);
@ -1307,7 +1339,7 @@ out:
return ret; return ret;
} }
int bch2_btree_iter_traverse_all(struct btree_trans *trans) static int bch2_btree_iter_traverse_all(struct btree_trans *trans)
{ {
return __btree_iter_traverse_all(trans, 0, _RET_IP_); return __btree_iter_traverse_all(trans, 0, _RET_IP_);
} }
@ -1353,9 +1385,19 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
static int btree_iter_traverse_one(struct btree_iter *iter, static int btree_iter_traverse_one(struct btree_iter *iter,
unsigned long trace_ip) unsigned long trace_ip)
{ {
struct btree_trans *trans = iter->trans;
unsigned l, depth_want = iter->level; unsigned l, depth_want = iter->level;
int ret = 0; int ret = 0;
/*
* Ensure we obey iter->should_be_locked: if it's set, we can't unlock
* and re-traverse the iterator without a transaction restart:
*/
if (iter->should_be_locked) {
ret = bch2_btree_iter_relock(iter, trace_ip) ? 0 : -EINTR;
goto out;
}
if (btree_iter_type(iter) == BTREE_ITER_CACHED) { if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
ret = bch2_btree_iter_traverse_cached(iter); ret = bch2_btree_iter_traverse_cached(iter);
goto out; goto out;
@ -1385,8 +1427,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
*/ */
while (iter->level > depth_want) { while (iter->level > depth_want) {
ret = btree_iter_node(iter, iter->level) ret = btree_iter_node(iter, iter->level)
? btree_iter_down(iter, trace_ip) ? btree_iter_down(trans, iter, trace_ip)
: btree_iter_lock_root(iter, depth_want, trace_ip); : btree_iter_lock_root(trans, iter, depth_want, trace_ip);
if (unlikely(ret)) { if (unlikely(ret)) {
if (ret == 1) { if (ret == 1) {
/* /*
@ -1414,7 +1456,9 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
iter->uptodate = BTREE_ITER_NEED_PEEK; iter->uptodate = BTREE_ITER_NEED_PEEK;
out: out:
trace_iter_traverse(iter->trans->ip, trace_ip, BUG_ON((ret == -EINTR) != !!trans->restarted);
trace_iter_traverse(trans->ip, trace_ip,
btree_iter_type(iter) == BTREE_ITER_CACHED,
iter->btree_id, &iter->real_pos, ret); iter->btree_id, &iter->real_pos, ret);
bch2_btree_iter_verify(iter); bch2_btree_iter_verify(iter);
return ret; return ret;
@ -1427,8 +1471,10 @@ static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
ret = bch2_trans_cond_resched(trans) ?: ret = bch2_trans_cond_resched(trans) ?:
btree_iter_traverse_one(iter, _RET_IP_); btree_iter_traverse_one(iter, _RET_IP_);
if (unlikely(ret)) if (unlikely(ret) && hweight64(trans->iters_linked) == 1) {
ret = __btree_iter_traverse_all(trans, ret, _RET_IP_); ret = __btree_iter_traverse_all(trans, ret, _RET_IP_);
BUG_ON(ret == -EINTR);
}
return ret; return ret;
} }
@ -1559,6 +1605,8 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p
int cmp = bpos_cmp(new_pos, iter->real_pos); int cmp = bpos_cmp(new_pos, iter->real_pos);
unsigned l = iter->level; unsigned l = iter->level;
EBUG_ON(iter->trans->restarted);
if (!cmp) if (!cmp)
goto out; goto out;
@ -2118,6 +2166,8 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
struct btree_iter *iter, *best = NULL; struct btree_iter *iter, *best = NULL;
struct bpos real_pos, pos_min = POS_MIN; struct bpos real_pos, pos_min = POS_MIN;
EBUG_ON(trans->restarted);
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) && btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NOT_EXTENTS) && !(flags & BTREE_ITER_NOT_EXTENTS) &&
@ -2282,6 +2332,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
if (old_bytes) { if (old_bytes) {
trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes); trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes);
btree_trans_restart(trans);
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
} }
} }
@ -2307,33 +2358,26 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans)
} }
/** /**
* bch2_trans_reset() - reset a transaction after a interrupted attempt * bch2_trans_begin() - reset a transaction after a interrupted attempt
* @trans: transaction to reset * @trans: transaction to reset
* @flags: transaction reset flags.
* *
* While iterating over nodes or updating nodes a attempt to lock a btree * While iterating over nodes or updating nodes a attempt to lock a btree
* node may return EINTR when the trylock fails. When this occurs * node may return EINTR when the trylock fails. When this occurs
* bch2_trans_reset() or bch2_trans_begin() should be called and the * bch2_trans_begin() should be called and the transaction retried.
* transaction retried.
*
* Transaction reset flags include:
*
* - TRANS_RESET_NOUNLOCK - Do not attempt to unlock and reschedule the
* transaction.
* - TRANS_RESET_NOTRAVERSE - Do not traverse all linked iters.
*/ */
void bch2_trans_reset(struct btree_trans *trans, unsigned flags) void bch2_trans_begin(struct btree_trans *trans)
{ {
struct btree_iter *iter; struct btree_iter *iter;
trans_for_each_iter(trans, iter) { trans_for_each_iter(trans, iter)
iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT| iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT|
BTREE_ITER_SET_POS_AFTER_COMMIT); BTREE_ITER_SET_POS_AFTER_COMMIT);
iter->should_be_locked = false;
}
/*
* XXX: we shouldn't be doing this if the transaction was restarted, but
* currently we still overflow transaction iterators if we do that
* */
bch2_trans_unlink_iters(trans); bch2_trans_unlink_iters(trans);
trans->iters_touched &= trans->iters_live; trans->iters_touched &= trans->iters_live;
trans->extra_journal_res = 0; trans->extra_journal_res = 0;
@ -2351,12 +2395,12 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
(void *) &trans->fs_usage_deltas->memset_start); (void *) &trans->fs_usage_deltas->memset_start);
} }
if (!(flags & TRANS_RESET_NOUNLOCK)) bch2_trans_cond_resched(trans);
bch2_trans_cond_resched(trans);
if (!(flags & TRANS_RESET_NOTRAVERSE) && if (trans->restarted)
trans->iters_linked)
bch2_btree_iter_traverse_all(trans); bch2_btree_iter_traverse_all(trans);
trans->restarted = false;
} }
static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c) static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)

View File

@ -111,11 +111,20 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_packed *, struct btree_node_iter *, struct bkey_packed *,
unsigned, unsigned); unsigned, unsigned);
bool bch2_btree_iter_relock_intent(struct btree_iter *);
bool bch2_btree_iter_relock(struct btree_iter *, unsigned long); bool bch2_btree_iter_relock(struct btree_iter *, unsigned long);
bool bch2_trans_relock(struct btree_trans *); bool bch2_trans_relock(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *);
__always_inline
static inline int btree_trans_restart(struct btree_trans *trans)
{
trans->restarted = true;
bch2_trans_unlock(trans);
return -EINTR;
}
bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned); bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter, static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
@ -147,8 +156,6 @@ void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
int __must_check bch2_btree_iter_traverse(struct btree_iter *); int __must_check bch2_btree_iter_traverse(struct btree_iter *);
int bch2_btree_iter_traverse_all(struct btree_trans *);
struct btree *bch2_btree_iter_peek_node(struct btree_iter *); struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_iter *); struct btree *bch2_btree_iter_next_node(struct btree_iter *);
@ -316,22 +323,7 @@ static inline void set_btree_iter_dontneed(struct btree_trans *trans, struct btr
trans->iters_touched &= ~(1ULL << iter->idx); trans->iters_touched &= ~(1ULL << iter->idx);
} }
#define TRANS_RESET_NOTRAVERSE (1 << 0) void bch2_trans_begin(struct btree_trans *);
#define TRANS_RESET_NOUNLOCK (1 << 1)
void bch2_trans_reset(struct btree_trans *, unsigned);
/**
* bch2_trans_begin() - ensure lock consistency of transaction on retry
* @trans: transaction to prepare
*
* Ensure lock ordering is correct before potentially retrying a transaction
* after a failed trylock.
*/
static inline void bch2_trans_begin(struct btree_trans *trans)
{
return bch2_trans_reset(trans, 0);
}
void *bch2_trans_kmalloc(struct btree_trans *, size_t); void *bch2_trans_kmalloc(struct btree_trans *, size_t);
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);

View File

@ -214,7 +214,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
if (!bch2_btree_node_relock(ck_iter, 0)) { if (!bch2_btree_node_relock(ck_iter, 0)) {
trace_transaction_restart_ip(trans->ip, _THIS_IP_); trace_transaction_restart_ip(trans->ip, _THIS_IP_);
ret = -EINTR; ret = btree_trans_restart(trans);
goto err; goto err;
} }
@ -233,6 +233,10 @@ static int btree_key_cache_fill(struct btree_trans *trans,
} }
} }
/*
* XXX: not allowed to be holding read locks when we take a write lock,
* currently
*/
bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter); bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
if (new_k) { if (new_k) {
kfree(ck->k); kfree(ck->k);
@ -299,10 +303,8 @@ retry:
if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want, if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
bkey_cached_check_fn, iter, _THIS_IP_)) { bkey_cached_check_fn, iter, _THIS_IP_)) {
if (ck->key.btree_id != iter->btree_id || if (!trans->restarted)
bpos_cmp(ck->key.pos, iter->pos)) {
goto retry; goto retry;
}
trace_transaction_restart_ip(trans->ip, _THIS_IP_); trace_transaction_restart_ip(trans->ip, _THIS_IP_);
ret = -EINTR; ret = -EINTR;
@ -322,10 +324,10 @@ retry:
iter->l[0].b = (void *) ck; iter->l[0].b = (void *) ck;
fill: fill:
if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) { if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
if (!btree_node_intent_locked(iter, 0)) if (!iter->locks_want &&
bch2_btree_iter_upgrade(iter, 1); !!__bch2_btree_iter_upgrade(iter, 1)) {
if (!btree_node_intent_locked(iter, 0)) {
trace_transaction_restart_ip(trans->ip, _THIS_IP_); trace_transaction_restart_ip(trans->ip, _THIS_IP_);
BUG_ON(!trans->restarted);
ret = -EINTR; ret = -EINTR;
goto err; goto err;
} }
@ -340,13 +342,14 @@ fill:
iter->uptodate = BTREE_ITER_NEED_PEEK; iter->uptodate = BTREE_ITER_NEED_PEEK;
if (!(iter->flags & BTREE_ITER_INTENT)) if ((iter->flags & BTREE_ITER_INTENT) &&
bch2_btree_iter_downgrade(iter); !bch2_btree_iter_upgrade(iter, 1)) {
else if (!iter->locks_want) { BUG_ON(!trans->restarted);
if (!__bch2_btree_iter_upgrade(iter, 1)) ret = -EINTR;
ret = -EINTR;
} }
BUG_ON(!ret && !btree_node_locked(iter, 0));
return ret; return ret;
err: err:
if (ret != -EINTR) { if (ret != -EINTR) {
@ -377,10 +380,9 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
BTREE_ITER_CACHED_NOFILL| BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_CACHED_NOCREATE| BTREE_ITER_CACHED_NOCREATE|
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
retry:
ret = bch2_btree_iter_traverse(c_iter); ret = bch2_btree_iter_traverse(c_iter);
if (ret) if (ret)
goto err; goto out;
ck = (void *) c_iter->l[0].b; ck = (void *) c_iter->l[0].b;
if (!ck || if (!ck ||
@ -399,9 +401,10 @@ retry:
* to be using alloc reserves: * to be using alloc reserves:
* */ * */
ret = bch2_btree_iter_traverse(b_iter) ?: ret = bch2_btree_iter_traverse(b_iter) ?:
bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?: bch2_trans_update(trans, b_iter, ck->k,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
BTREE_TRIGGER_NORUN) ?:
bch2_trans_commit(trans, NULL, NULL, bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_RESERVE|
@ -409,15 +412,10 @@ retry:
? BTREE_INSERT_JOURNAL_RESERVED ? BTREE_INSERT_JOURNAL_RESERVED
: 0)| : 0)|
commit_flags); commit_flags);
err:
if (ret == -EINTR)
goto retry;
if (ret == -EAGAIN)
goto out;
if (ret) { if (ret) {
bch2_fs_fatal_err_on(!bch2_journal_error(j), c, bch2_fs_fatal_err_on(ret != -EINTR &&
ret != -EAGAIN &&
!bch2_journal_error(j), c,
"error flushing key cache: %i", ret); "error flushing key cache: %i", ret);
goto out; goto out;
} }
@ -465,7 +463,6 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
struct bkey_cached *ck = struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal); container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key; struct bkey_cached_key key;
struct btree_trans trans;
int ret = 0; int ret = 0;
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
@ -480,10 +477,9 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
} }
six_unlock_read(&ck->c.lock); six_unlock_read(&ck->c.lock);
bch2_trans_init(&trans, c, 0, 0); ret = bch2_trans_do(c, NULL, NULL, 0,
ret = btree_key_cache_flush_pos(&trans, key, seq, btree_key_cache_flush_pos(&trans, key, seq,
BTREE_INSERT_JOURNAL_RECLAIM, false); BTREE_INSERT_JOURNAL_RECLAIM, false));
bch2_trans_exit(&trans);
unlock: unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);

View File

@ -380,9 +380,10 @@ struct btree_trans {
int srcu_idx; int srcu_idx;
u8 nr_updates; u8 nr_updates;
unsigned used_mempool:1; bool used_mempool:1;
unsigned error:1; bool error:1;
unsigned in_traverse_all:1; bool in_traverse_all:1;
bool restarted:1;
/* /*
* For when bch2_trans_update notices we'll be splitting a compressed * For when bch2_trans_update notices we'll be splitting a compressed
* extent: * extent:

View File

@ -15,7 +15,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
enum btree_insert_flags { enum btree_insert_flags {
__BTREE_INSERT_NOUNLOCK,
__BTREE_INSERT_NOFAIL, __BTREE_INSERT_NOFAIL,
__BTREE_INSERT_NOCHECK_RW, __BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_LAZY_RW, __BTREE_INSERT_LAZY_RW,
@ -29,11 +28,6 @@ enum btree_insert_flags {
__BCH_HASH_SET_MUST_REPLACE, __BCH_HASH_SET_MUST_REPLACE,
}; };
/*
* Don't drop locks _after_ successfully updating btree:
*/
#define BTREE_INSERT_NOUNLOCK (1 << __BTREE_INSERT_NOUNLOCK)
/* Don't check for -ENOSPC: */ /* Don't check for -ENOSPC: */
#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL) #define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
@ -110,12 +104,10 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
({ \ ({ \
int _ret; \ int _ret; \
\ \
while (1) { \ do { \
bch2_trans_begin(_trans); \
_ret = (_do); \ _ret = (_do); \
if (_ret != -EINTR) \ } while (_ret == -EINTR); \
break; \
bch2_trans_reset(_trans, 0); \
} \
\ \
_ret; \ _ret; \
}) })

View File

@ -960,9 +960,6 @@ retry:
if (flags & BTREE_INSERT_GC_LOCK_HELD) if (flags & BTREE_INSERT_GC_LOCK_HELD)
lockdep_assert_held(&c->gc_lock); lockdep_assert_held(&c->gc_lock);
else if (!down_read_trylock(&c->gc_lock)) { else if (!down_read_trylock(&c->gc_lock)) {
if (flags & BTREE_INSERT_NOUNLOCK)
return ERR_PTR(-EINTR);
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
down_read(&c->gc_lock); down_read(&c->gc_lock);
if (!bch2_trans_relock(trans)) { if (!bch2_trans_relock(trans)) {
@ -1005,20 +1002,11 @@ retry:
BTREE_UPDATE_JOURNAL_RES, BTREE_UPDATE_JOURNAL_RES,
journal_flags|JOURNAL_RES_GET_NONBLOCK); journal_flags|JOURNAL_RES_GET_NONBLOCK);
if (ret == -EAGAIN) { if (ret == -EAGAIN) {
/*
* this would be cleaner if bch2_journal_preres_get() took a
* closure argument
*/
if (flags & BTREE_INSERT_NOUNLOCK) {
trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_);
ret = -EINTR;
goto err;
}
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
if (flags & BTREE_INSERT_JOURNAL_RECLAIM) { if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
bch2_btree_update_free(as); bch2_btree_update_free(as);
btree_trans_restart(trans);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
@ -1043,8 +1031,7 @@ retry:
if (ret) if (ret)
goto err; goto err;
ret = bch2_btree_reserve_get(as, nr_nodes, flags, ret = bch2_btree_reserve_get(as, nr_nodes, flags, &cl);
!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
if (ret) if (ret)
goto err; goto err;
@ -1057,8 +1044,6 @@ err:
bch2_btree_update_free(as); bch2_btree_update_free(as);
if (ret == -EAGAIN) { if (ret == -EAGAIN) {
BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
closure_sync(&cl); closure_sync(&cl);
ret = -EINTR; ret = -EINTR;
@ -1593,12 +1578,12 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
size_t sib_u64s; size_t sib_u64s;
int ret = 0, ret2 = 0; int ret = 0, ret2 = 0;
BUG_ON(!btree_node_locked(iter, level));
retry: retry:
ret = bch2_btree_iter_traverse(iter); ret = bch2_btree_iter_traverse(iter);
if (ret) if (ret)
goto err; return ret;
BUG_ON(!iter->should_be_locked);
BUG_ON(!btree_node_locked(iter, level)); BUG_ON(!btree_node_locked(iter, level));
b = iter->l[level].b; b = iter->l[level].b;
@ -1751,13 +1736,6 @@ err:
if (ret == -EINTR && bch2_trans_relock(trans)) if (ret == -EINTR && bch2_trans_relock(trans))
goto retry; goto retry;
if (ret == -EINTR && !(flags & BTREE_INSERT_NOUNLOCK)) {
ret2 = ret;
ret = bch2_btree_iter_traverse_all(trans);
if (!ret)
goto retry;
}
goto out; goto out;
} }
@ -1949,8 +1927,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM| BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_JOURNAL_RESERVED| BTREE_INSERT_JOURNAL_RESERVED);
BTREE_INSERT_NOUNLOCK);
if (ret) if (ret)
goto err; goto err;

View File

@ -348,11 +348,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
} }
} }
static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
{
__bch2_btree_iter_unlock(iter);
}
static noinline void bch2_trans_mark_gc(struct btree_trans *trans) static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
@ -384,6 +379,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (race_fault()) { if (race_fault()) {
trace_trans_restart_fault_inject(trans->ip, trace_ip); trace_trans_restart_fault_inject(trans->ip, trace_ip);
trans->restarted = true;
return -EINTR; return -EINTR;
} }
@ -520,10 +516,11 @@ static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree
u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0; u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0;
} }
return u64s_delta <= 0 if (u64s_delta > 0)
? (bch2_foreground_maybe_merge(trans, iter, iter->level, return 0;
trans->flags & ~BTREE_INSERT_NOUNLOCK) ?: -EINTR)
: 0; return bch2_foreground_maybe_merge(trans, iter,
iter->level, trans->flags);
} }
/* /*
@ -580,20 +577,15 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
* or anything else that might call bch2_trans_relock(), since that * or anything else that might call bch2_trans_relock(), since that
* would just retake the read locks: * would just retake the read locks:
*/ */
trans_for_each_iter(trans, iter) { trans_for_each_iter(trans, iter)
if (iter->nodes_locked != iter->nodes_intent_locked) { if (iter->nodes_locked != iter->nodes_intent_locked &&
if (btree_iter_keep(trans, iter)) { !bch2_btree_iter_upgrade(iter, 1)) {
if (!bch2_btree_iter_upgrade(iter, 1)) { trace_trans_restart_upgrade(trans->ip, trace_ip,
trace_trans_restart_upgrade(trans->ip, trace_ip, iter->btree_id,
iter->btree_id, &iter->real_pos);
&iter->real_pos); trans->restarted = true;
return -EINTR; return -EINTR;
}
} else {
bch2_btree_iter_unlock_noinline(iter);
}
} }
}
trans_for_each_update(trans, i) { trans_for_each_update(trans, i) {
const char *invalid = bch2_bkey_invalid(c, const char *invalid = bch2_bkey_invalid(c,
@ -655,56 +647,24 @@ int bch2_trans_commit_error(struct btree_trans *trans,
int ret, unsigned long trace_ip) int ret, unsigned long trace_ip)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
unsigned flags = trans->flags;
/*
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
* update; if we haven't done anything yet it doesn't apply
*/
flags &= ~BTREE_INSERT_NOUNLOCK;
switch (ret) { switch (ret) {
case BTREE_INSERT_BTREE_NODE_FULL: case BTREE_INSERT_BTREE_NODE_FULL:
ret = bch2_btree_split_leaf(trans, i->iter, flags); ret = bch2_btree_split_leaf(trans, i->iter, trans->flags);
/*
* if the split succeeded without dropping locks the insert will
* still be atomic (what the caller peeked() and is overwriting
* won't have changed)
*/
#if 0
/*
* XXX:
* split -> btree node merging (of parent node) might still drop
* locks when we're not passing it BTREE_INSERT_NOUNLOCK
*
* we don't want to pass BTREE_INSERT_NOUNLOCK to split as that
* will inhibit merging - but we don't have a reliable way yet
* (do we?) of checking if we dropped locks in this path
*/
if (!ret) if (!ret)
goto retry; return 0;
#endif
/* if (ret == -EINTR)
* don't care if we got ENOSPC because we told split it
* couldn't block:
*/
if (!ret ||
ret == -EINTR ||
(flags & BTREE_INSERT_NOUNLOCK)) {
trace_trans_restart_btree_node_split(trans->ip, trace_ip, trace_trans_restart_btree_node_split(trans->ip, trace_ip,
i->iter->btree_id, i->iter->btree_id,
&i->iter->real_pos); &i->iter->real_pos);
ret = -EINTR;
}
break; break;
case BTREE_INSERT_NEED_MARK_REPLICAS: case BTREE_INSERT_NEED_MARK_REPLICAS:
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas); ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas);
if (ret) if (ret)
return ret; break;
if (bch2_trans_relock(trans)) if (bch2_trans_relock(trans))
return 0; return 0;
@ -716,12 +676,15 @@ int bch2_trans_commit_error(struct btree_trans *trans,
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) && if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
!(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) {
return -EAGAIN; trans->restarted = true;
ret = -EAGAIN;
break;
}
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK); ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
if (ret) if (ret)
return ret; break;
if (bch2_trans_relock(trans)) if (bch2_trans_relock(trans))
return 0; return 0;
@ -737,7 +700,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
wait_event_freezable(c->journal.reclaim_wait, wait_event_freezable(c->journal.reclaim_wait,
(ret = journal_reclaim_wait_done(c))); (ret = journal_reclaim_wait_done(c)));
if (ret < 0) if (ret < 0)
return ret; break;
if (bch2_trans_relock(trans)) if (bch2_trans_relock(trans))
return 0; return 0;
@ -750,7 +713,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
break; break;
} }
BUG_ON(ret == -ENOSPC && (flags & BTREE_INSERT_NOFAIL)); BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted);
BUG_ON(ret == -ENOSPC && (trans->flags & BTREE_INSERT_NOFAIL));
return ret; return ret;
} }
@ -839,8 +803,10 @@ static int extent_handle_overwrites(struct btree_trans *trans,
BTREE_ITER_NOT_EXTENTS| BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT); BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(update_iter); ret = bch2_btree_iter_traverse(update_iter);
if (ret) if (ret) {
bch2_trans_iter_put(trans, update_iter);
goto out; goto out;
}
bch2_trans_update(trans, update_iter, update, bch2_trans_update(trans, update_iter, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
@ -887,7 +853,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
struct btree_insert_entry *i = NULL; struct btree_insert_entry *i = NULL;
struct btree_iter *iter; struct btree_iter *iter;
bool trans_trigger_run; bool trans_trigger_run;
unsigned u64s, reset_flags = 0; unsigned u64s;
int ret = 0; int ret = 0;
if (!trans->nr_updates && if (!trans->nr_updates &&
@ -944,18 +910,13 @@ int __bch2_trans_commit(struct btree_trans *trans)
} while (trans_trigger_run); } while (trans_trigger_run);
trans_for_each_update(trans, i) { trans_for_each_update(trans, i) {
ret = bch2_btree_iter_traverse(i->iter); BUG_ON(!i->iter->should_be_locked);
if (unlikely(ret)) {
trace_trans_restart_traverse(trans->ip, _RET_IP_,
i->iter->btree_id,
&i->iter->pos);
goto out;
}
if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) { if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) {
trace_trans_restart_upgrade(trans->ip, _RET_IP_, trace_trans_restart_upgrade(trans->ip, _RET_IP_,
i->iter->btree_id, i->iter->btree_id,
&i->iter->pos); &i->iter->pos);
trans->restarted = true;
ret = -EINTR; ret = -EINTR;
goto out; goto out;
} }
@ -978,6 +939,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
goto err; goto err;
} }
retry: retry:
BUG_ON(trans->restarted);
memset(&trans->journal_res, 0, sizeof(trans->journal_res)); memset(&trans->journal_res, 0, sizeof(trans->journal_res));
ret = do_bch2_trans_commit(trans, &i, _RET_IP_); ret = do_bch2_trans_commit(trans, &i, _RET_IP_);
@ -998,11 +960,18 @@ out:
if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
percpu_ref_put(&trans->c->writes); percpu_ref_put(&trans->c->writes);
out_reset: out_reset:
if (!ret) trans->extra_journal_res = 0;
reset_flags |= TRANS_RESET_NOTRAVERSE; trans->nr_updates = 0;
if (!ret && (trans->flags & BTREE_INSERT_NOUNLOCK)) trans->hooks = NULL;
reset_flags |= TRANS_RESET_NOUNLOCK; trans->extra_journal_entries = NULL;
bch2_trans_reset(trans, reset_flags); trans->extra_journal_entry_u64s = 0;
if (trans->fs_usage_deltas) {
trans->fs_usage_deltas->used = 0;
memset(&trans->fs_usage_deltas->memset_start, 0,
(void *) &trans->fs_usage_deltas->memset_end -
(void *) &trans->fs_usage_deltas->memset_start);
}
return ret; return ret;
err: err:
@ -1050,7 +1019,11 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
n.iter = bch2_trans_get_iter(trans, n.btree_id, n.k->k.p, n.iter = bch2_trans_get_iter(trans, n.btree_id, n.k->k.p,
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS); BTREE_ITER_NOT_EXTENTS);
ret = bch2_btree_iter_traverse(n.iter);
bch2_trans_iter_put(trans, n.iter); bch2_trans_iter_put(trans, n.iter);
if (ret)
return ret;
} }
BUG_ON(n.iter->flags & BTREE_ITER_IS_EXTENTS); BUG_ON(n.iter->flags & BTREE_ITER_IS_EXTENTS);

View File

@ -210,6 +210,9 @@ int bch2_dirent_rename(struct btree_trans *trans,
goto out; goto out;
old_dst = bch2_btree_iter_peek_slot(dst_iter); old_dst = bch2_btree_iter_peek_slot(dst_iter);
ret = bkey_err(old_dst);
if (ret)
goto out;
if (mode != BCH_RENAME) if (mode != BCH_RENAME)
*dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum); *dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum);
@ -225,6 +228,10 @@ int bch2_dirent_rename(struct btree_trans *trans,
goto out; goto out;
old_src = bch2_btree_iter_peek_slot(src_iter); old_src = bch2_btree_iter_peek_slot(src_iter);
ret = bkey_err(old_src);
if (ret)
goto out;
*src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum); *src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum);
/* Create new dst key: */ /* Create new dst key: */
@ -329,20 +336,25 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c k; struct bkey_s_c k;
u64 inum = 0; u64 inum = 0;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
iter = __bch2_dirent_lookup_trans(&trans, dir_inum, iter = __bch2_dirent_lookup_trans(&trans, dir_inum,
hash_info, name, 0); hash_info, name, 0);
if (IS_ERR(iter)) { ret = PTR_ERR_OR_ZERO(iter);
BUG_ON(PTR_ERR(iter) == -EINTR); if (ret)
goto out; goto out;
}
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto out;
inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
bch2_trans_iter_put(&trans, iter); bch2_trans_iter_put(&trans, iter);
out: out:
BUG_ON(ret == -EINTR);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return inum; return inum;
} }

View File

@ -168,6 +168,10 @@ int bch2_unlink_trans(struct btree_trans *trans,
goto err; goto err;
k = bch2_btree_iter_peek_slot(dirent_iter); k = bch2_btree_iter_peek_slot(dirent_iter);
ret = bkey_err(k);
if (ret)
goto err;
inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT); inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);

View File

@ -801,6 +801,8 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
bch2_bkey_buf_init(&sk); bch2_bkey_buf_init(&sk);
retry: retry:
bch2_trans_begin(trans);
while (1) { while (1) {
struct bkey_s_c k; struct bkey_s_c k;
unsigned bytes, sectors, offset_into_extent; unsigned bytes, sectors, offset_into_extent;
@ -2301,8 +2303,6 @@ int bch2_truncate(struct user_namespace *mnt_userns,
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping; struct address_space *mapping = inode->v.i_mapping;
struct bch_inode_unpacked inode_u; struct bch_inode_unpacked inode_u;
struct btree_trans trans;
struct btree_iter *iter;
u64 new_i_size = iattr->ia_size; u64 new_i_size = iattr->ia_size;
s64 i_sectors_delta = 0; s64 i_sectors_delta = 0;
int ret = 0; int ret = 0;
@ -2323,16 +2323,7 @@ int bch2_truncate(struct user_namespace *mnt_userns,
inode_dio_wait(&inode->v); inode_dio_wait(&inode->v);
bch2_pagecache_block_get(&inode->ei_pagecache_lock); bch2_pagecache_block_get(&inode->ei_pagecache_lock);
/* ret = bch2_inode_find_by_inum(c, inode->v.i_ino, &inode_u);
* fetch current on disk i_size: inode is locked, i_size can only
* increase underneath us:
*/
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0);
ret = PTR_ERR_OR_ZERO(iter);
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans);
if (ret) if (ret)
goto err; goto err;
@ -2557,6 +2548,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
struct bpos atomic_end; struct bpos atomic_end;
unsigned trigger_flags = 0; unsigned trigger_flags = 0;
bch2_trans_begin(&trans);
k = insert k = insert
? bch2_btree_iter_peek_prev(src) ? bch2_btree_iter_peek_prev(src)
: bch2_btree_iter_peek(src); : bch2_btree_iter_peek(src);
@ -2684,13 +2677,13 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
/* already reserved */ /* already reserved */
if (k.k->type == KEY_TYPE_reservation && if (k.k->type == KEY_TYPE_reservation &&
bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) { bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
bch2_btree_iter_next_slot(iter); bch2_btree_iter_advance(iter);
continue; continue;
} }
if (bkey_extent_is_data(k.k) && if (bkey_extent_is_data(k.k) &&
!(mode & FALLOC_FL_ZERO_RANGE)) { !(mode & FALLOC_FL_ZERO_RANGE)) {
bch2_btree_iter_next_slot(iter); bch2_btree_iter_advance(iter);
continue; continue;
} }

View File

@ -156,7 +156,6 @@ retry:
bch2_inode_write(&trans, iter, &inode_u) ?: bch2_inode_write(&trans, iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, bch2_trans_commit(&trans, NULL,
&inode->ei_journal_seq, &inode->ei_journal_seq,
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOFAIL); BTREE_INSERT_NOFAIL);
/* /*
@ -294,8 +293,7 @@ retry:
if (unlikely(ret)) if (unlikely(ret))
goto err_before_quota; goto err_before_quota;
ret = bch2_trans_commit(&trans, NULL, &journal_seq, ret = bch2_trans_commit(&trans, NULL, &journal_seq, 0);
BTREE_INSERT_NOUNLOCK);
if (unlikely(ret)) { if (unlikely(ret)) {
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
KEY_TYPE_QUOTA_WARN); KEY_TYPE_QUOTA_WARN);
@ -416,8 +414,7 @@ static int __bch2_link(struct bch_fs *c,
mutex_lock(&inode->ei_update_lock); mutex_lock(&inode->ei_update_lock);
bch2_trans_init(&trans, c, 4, 1024); bch2_trans_init(&trans, c, 4, 1024);
ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0,
BTREE_INSERT_NOUNLOCK,
bch2_link_trans(&trans, bch2_link_trans(&trans,
dir->v.i_ino, dir->v.i_ino,
inode->v.i_ino, &dir_u, &inode_u, inode->v.i_ino, &dir_u, &inode_u,
@ -469,7 +466,6 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
bch2_trans_init(&trans, c, 4, 1024); bch2_trans_init(&trans, c, 4, 1024);
ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq, ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq,
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOFAIL, BTREE_INSERT_NOFAIL,
bch2_unlink_trans(&trans, bch2_unlink_trans(&trans,
dir->v.i_ino, &dir_u, dir->v.i_ino, &dir_u,
@ -590,8 +586,7 @@ static int bch2_rename2(struct user_namespace *mnt_userns,
goto err; goto err;
} }
ret = __bch2_trans_do(&trans, NULL, &journal_seq, ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0,
BTREE_INSERT_NOUNLOCK,
bch2_rename_trans(&trans, bch2_rename_trans(&trans,
src_dir->v.i_ino, &src_dir_u, src_dir->v.i_ino, &src_dir_u,
dst_dir->v.i_ino, &dst_dir_u, dst_dir->v.i_ino, &dst_dir_u,
@ -734,7 +729,6 @@ retry:
ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, bch2_trans_commit(&trans, NULL,
&inode->ei_journal_seq, &inode->ei_journal_seq,
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOFAIL); BTREE_INSERT_NOFAIL);
btree_err: btree_err:
bch2_trans_iter_put(&trans, inode_iter); bch2_trans_iter_put(&trans, inode_iter);
@ -909,6 +903,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
POS(ei->v.i_ino, start >> 9), 0); POS(ei->v.i_ino, start >> 9), 0);
retry: retry:
bch2_trans_begin(&trans);
while ((k = bch2_btree_iter_peek(iter)).k && while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k)) && !(ret = bkey_err(k)) &&
bkey_cmp(iter->pos, end) < 0) { bkey_cmp(iter->pos, end) < 0) {

View File

@ -727,7 +727,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
k = bch2_btree_iter_peek(iter); k = bch2_btree_iter_peek(iter);
if (!k.k) if (!k.k)
return 1; return 0;
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
@ -803,8 +803,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
ret = __write_inode(trans, &target, target_snapshot) ?: ret = __write_inode(trans, &target, target_snapshot) ?:
bch2_trans_commit(trans, NULL, NULL, bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_LAZY_RW| BTREE_INSERT_LAZY_RW);
BTREE_INSERT_NOUNLOCK);
if (ret) if (ret)
return ret; return ret;
return -EINTR; return -EINTR;
@ -904,19 +903,12 @@ static int check_dirents(struct bch_fs *c)
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH); BTREE_ITER_PREFETCH);
while (1) { do {
ret = lockrestart_do(&trans, ret = lockrestart_do(&trans,
check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs)); check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs));
if (ret == 1) {
/* at end */
ret = 0;
break;
}
if (ret) if (ret)
break; break;
} while (bch2_btree_iter_advance(iter));
bch2_btree_iter_advance(iter);
}
bch2_trans_iter_put(&trans, iter); bch2_trans_iter_put(&trans, iter);
return bch2_trans_exit(&trans) ?: ret; return bch2_trans_exit(&trans) ?: ret;

View File

@ -519,7 +519,7 @@ again:
if (k.k->p.snapshot == snapshot && if (k.k->p.snapshot == snapshot &&
k.k->type != KEY_TYPE_inode && k.k->type != KEY_TYPE_inode &&
!bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) { !bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) {
bch2_btree_iter_next(iter); bch2_btree_iter_advance(iter);
continue; continue;
} }

View File

@ -235,8 +235,12 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
* writing to, because i_size could be up to one block * writing to, because i_size could be up to one block
* less: * less:
*/ */
if (!bkey_cmp(old.k->p, new->k.p)) if (!bkey_cmp(old.k->p, new->k.p)) {
old = bch2_btree_iter_next(iter); old = bch2_btree_iter_next(iter);
ret = bkey_err(old);
if (ret)
break;
}
if (old.k && !bkey_err(old) && if (old.k && !bkey_err(old) &&
old.k->p.inode == extent_iter->pos.inode && old.k->p.inode == extent_iter->pos.inode &&
@ -362,14 +366,13 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k; struct bkey_s_c k;
int ret = 0, ret2 = 0; int ret = 0, ret2 = 0;
while ((k = bch2_btree_iter_peek(iter)).k && while ((bch2_trans_begin(trans),
(k = bch2_btree_iter_peek(iter)).k) &&
bkey_cmp(iter->pos, end) < 0) { bkey_cmp(iter->pos, end) < 0) {
struct disk_reservation disk_res = struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0); bch2_disk_reservation_init(c, 0);
struct bkey_i delete; struct bkey_i delete;
bch2_trans_begin(trans);
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
goto btree_err; goto btree_err;
@ -2270,12 +2273,13 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
bch2_bkey_buf_init(&sk); bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0); bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
POS(inode, bvec_iter.bi_sector), POS(inode, bvec_iter.bi_sector),
BTREE_ITER_SLOTS); BTREE_ITER_SLOTS);
retry:
bch2_trans_begin(&trans);
while (1) { while (1) {
unsigned bytes, sectors, offset_into_extent; unsigned bytes, sectors, offset_into_extent;
enum btree_id data_btree = BTREE_ID_extents; enum btree_id data_btree = BTREE_ID_extents;
@ -2331,19 +2335,20 @@ retry:
swap(bvec_iter.bi_size, bytes); swap(bvec_iter.bi_size, bytes);
bio_advance_iter(&rbio->bio, &bvec_iter, bytes); bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
} }
bch2_trans_iter_put(&trans, iter);
if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID) if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID)
goto retry; goto retry;
bch2_trans_iter_put(&trans, iter);
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&sk, c);
if (ret) { if (ret) {
bch_err_inum_ratelimited(c, inode, bch_err_inum_ratelimited(c, inode,
"read error %i from btree lookup", ret); "read error %i from btree lookup", ret);
rbio->bio.bi_status = BLK_STS_IOERR; rbio->bio.bi_status = BLK_STS_IOERR;
bch2_rbio_done(rbio); bch2_rbio_done(rbio);
} }
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&sk, c);
} }
void bch2_fs_io_exit(struct bch_fs *c) void bch2_fs_io_exit(struct bch_fs *c)

View File

@ -84,7 +84,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bool extending = false, should_check_enospc; bool extending = false, should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0; s64 i_sectors_delta = 0, disk_sectors_delta = 0;
bch2_trans_reset(&trans, 0); bch2_trans_begin(&trans);
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k); ret = bkey_err(k);
@ -191,7 +191,7 @@ nomatch:
} }
atomic_long_inc(&c->extent_migrate_raced); atomic_long_inc(&c->extent_migrate_raced);
trace_move_race(&new->k); trace_move_race(&new->k);
bch2_btree_iter_next_slot(iter); bch2_btree_iter_advance(iter);
goto next; goto next;
} }
out: out:
@ -597,6 +597,8 @@ static int __bch2_move_data(struct bch_fs *c,
} }
} while (delay); } while (delay);
bch2_trans_begin(&trans);
k = bch2_btree_iter_peek(iter); k = bch2_btree_iter_peek(iter);
stats->pos = iter->pos; stats->pos = iter->pos;
@ -652,8 +654,7 @@ static int __bch2_move_data(struct bch_fs *c,
data_cmd, data_opts); data_cmd, data_opts);
if (ret2) { if (ret2) {
if (ret2 == -EINTR) { if (ret2 == -EINTR) {
bch2_trans_reset(&trans, 0); bch2_trans_begin(&trans);
bch2_trans_cond_resched(&trans);
continue; continue;
} }

View File

@ -178,6 +178,11 @@ enum opt_type {
OPT_BOOL(), \ OPT_BOOL(), \
BCH_SB_INODES_USE_KEY_CACHE, true, \ BCH_SB_INODES_USE_KEY_CACHE, true, \
NULL, "Use the btree key cache for the inodes btree") \ NULL, "Use the btree key cache for the inodes btree") \
x(btree_node_mem_ptr_optimization, u8, \
OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
NO_SB_OPT, true, \
NULL, "Stash pointer to in memory btree node in btree ptr")\
x(gc_reserve_percent, u8, \ x(gc_reserve_percent, u8, \
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(5, 21), \ OPT_UINT(5, 21), \

View File

@ -760,7 +760,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
bkey_quota_init(&new_quota.k_i); bkey_quota_init(&new_quota.k_i);
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK, ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_set_quota_trans(&trans, &new_quota, qdq)) ?: bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i)); __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));

View File

@ -192,8 +192,9 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
return k; return k;
} }
bch2_btree_iter_set_pos(iter, end); if (bkey_cmp(iter->pos, end) >= 0)
return bkey_s_c_null; bch2_btree_iter_set_pos(iter, end);
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
} }
s64 bch2_remap_range(struct bch_fs *c, s64 bch2_remap_range(struct bch_fs *c,
@ -304,12 +305,12 @@ s64 bch2_remap_range(struct bch_fs *c,
dst_done = dst_iter->pos.offset - dst_start.offset; dst_done = dst_iter->pos.offset - dst_start.offset;
new_i_size = min(dst_iter->pos.offset << 9, new_i_size); new_i_size = min(dst_iter->pos.offset << 9, new_i_size);
bch2_trans_begin(&trans);
do { do {
struct bch_inode_unpacked inode_u; struct bch_inode_unpacked inode_u;
struct btree_iter *inode_iter; struct btree_iter *inode_iter;
bch2_trans_begin(&trans);
inode_iter = bch2_inode_peek(&trans, &inode_u, inode_iter = bch2_inode_peek(&trans, &inode_u,
dst_start.inode, BTREE_ITER_INTENT); dst_start.inode, BTREE_ITER_INTENT);
ret2 = PTR_ERR_OR_ZERO(inode_iter); ret2 = PTR_ERR_OR_ZERO(inode_iter);

View File

@ -209,7 +209,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
iter = bch2_trans_copy_iter(trans, start); iter = bch2_trans_copy_iter(trans, start);
bch2_btree_iter_next_slot(iter); bch2_btree_iter_advance(iter);
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) { for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
if (k.k->type != desc.key_type && if (k.k->type != desc.key_type &&

View File

@ -118,18 +118,16 @@ void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
le16_to_cpu(xattr.v->x_val_len)); le16_to_cpu(xattr.v->x_val_len));
} }
int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info *inode,
const char *name, void *buffer, size_t size, int type) const char *name, void *buffer, size_t size, int type)
{ {
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode);
struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c_xattr xattr; struct bkey_s_c_xattr xattr;
struct bkey_s_c k;
int ret; int ret;
bch2_trans_init(&trans, c, 0, 0); iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &hash,
iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, &hash,
inode->v.i_ino, inode->v.i_ino,
&X_SEARCH(type, name, strlen(name)), &X_SEARCH(type, name, strlen(name)),
0); 0);
@ -137,7 +135,12 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
if (ret) if (ret)
goto err; goto err;
xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
xattr = bkey_s_c_to_xattr(k);
ret = le16_to_cpu(xattr.v->x_val_len); ret = le16_to_cpu(xattr.v->x_val_len);
if (buffer) { if (buffer) {
if (ret > size) if (ret > size)
@ -145,14 +148,18 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
else else
memcpy(buffer, xattr_val(xattr.v), ret); memcpy(buffer, xattr_val(xattr.v), ret);
} }
bch2_trans_iter_put(&trans, iter); bch2_trans_iter_put(trans, iter);
err: err:
bch2_trans_exit(&trans);
BUG_ON(ret == -EINTR);
return ret == -ENOENT ? -ENODATA : ret; return ret == -ENOENT ? -ENODATA : ret;
} }
int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
const char *name, void *buffer, size_t size, int type)
{
return bch2_trans_do(c, NULL, NULL, 0,
bch2_xattr_get_trans(&trans, inode, name, buffer, size, type));
}
int bch2_xattr_set(struct btree_trans *trans, u64 inum, int bch2_xattr_set(struct btree_trans *trans, u64 inum,
const struct bch_hash_info *hash_info, const struct bch_hash_info *hash_info,
const char *name, const void *value, size_t size, const char *name, const void *value, size_t size,