blob: 8c34a7cba44d95e2105ec0033860a723974e1395 [file] [log] [blame]
From 524a4ff4a341f7701b6065a3faa1d091591e45fd Mon Sep 17 00:00:00 2001
From: Dharmendra Singh <dsingh@ddn.com>
Date: Mon, 23 Oct 2023 20:30:29 +0200
Subject: [PATCH] BACKPORT: FROMLIST: fuse: introduce atomic open
This adds full atomic open support, to avoid lookup before open/create.
If the implementation (fuse server/daemon) does not support atomic open
it falls back to non-atomic open.
Co-developed-by: Bernd Schubert <bschubert@ddn.com>
Signed-off-by: Bernd Schubert <bschubert@ddn.com>
Signed-off-by: Dharmendra Singh <dsingh@ddn.com>
Signed-off-by: Horst Birthelmer <hbirthelmer@ddn.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dharmendra Singh <dsingh@ddn.com>
Cc: linux-fsdevel@vger.kernel.org
(am from https://patchwork.kernel.org/patch/13433386/)
(also found at https://lore.kernel.org/r/20231023183035.11035-3-bschubert@ddn.com)
Downstream changes:
fs/fuse/dir.c
Fix quoted string across lines in pr_debug()
include/uapi/linux/fuse.h
Change opcode for atomic open
UPSTREAM-TASK=b:308366794
BUG=b:307851138
TEST=run virtio-fs with atomic open implemented crosvm chromium:5009218
TEST=run vm.ManyFiles.virtiofs -var "vm.ManyFiles.kernelPath=${KERNELPATH}"
Cq-Depend: chromium:4988544
Change-Id: I88b76900e1d3fcd709dae7212b7f9b27970e403c
Signed-off-by: Yuan Yao <yuanyaogoog@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4988749
Reviewed-by: Takaya Saeki <takayas@chromium.org>
Reviewed-by: Keiichi Watanabe <keiichiw@chromium.org>
---
fs/fuse/dir.c | 214 +++++++++++++++++++++++++++++++++++++-
fs/fuse/fuse_i.h | 3 +
include/uapi/linux/fuse.h | 8 ++
3 files changed, 224 insertions(+), 1 deletion(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ecf86b41c600b4de01efede83722395b088d09a6..fb08b538682eb95ad31e2558239f0ffa85167e3d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -718,7 +718,7 @@ static int _fuse_create_open(struct inode *dir, struct dentry *entry,
static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, dev_t);
-static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
+static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct file *file, unsigned flags,
umode_t mode)
{
@@ -765,6 +765,218 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
return finish_no_open(file, res);
}
+static int _fuse_atomic_open(struct inode *dir, struct dentry *entry,
+ struct file *file, unsigned int flags,
+ umode_t mode)
+{
+ int err;
+ struct inode *inode;
+ FUSE_ARGS(args);
+ struct fuse_mount *fm = get_fuse_mount(dir);
+ struct fuse_conn *fc = fm->fc;
+ struct fuse_forget_link *forget;
+ struct fuse_create_in inarg;
+ struct fuse_open_out outopen;
+ struct fuse_entry_out outentry;
+ struct fuse_inode *fi;
+ struct fuse_file *ff;
+ struct dentry *switched_entry = NULL, *alias = NULL;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+
+ /* Expect a negative dentry */
+ if (unlikely(d_inode(entry)))
+ goto fallback;
+
+ /* Userspace expects S_IFREG in create mode */
+ if ((flags & O_CREAT) && (mode & S_IFMT) != S_IFREG)
+ goto fallback;
+
+ forget = fuse_alloc_forget();
+ err = -ENOMEM;
+ if (!forget)
+ goto out_err;
+
+ err = -ENOMEM;
+ ff = fuse_file_alloc(fm);
+ if (!ff)
+ goto out_put_forget_req;
+
+ if (!fc->dont_mask)
+ mode &= ~current_umask();
+
+ flags &= ~O_NOCTTY;
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outentry, 0, sizeof(outentry));
+ inarg.flags = flags;
+ inarg.mode = mode;
+ inarg.umask = current_umask();
+
+ if (fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
+ !(flags & O_EXCL) && !capable(CAP_FSETID)) {
+ inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
+ }
+
+ args.opcode = FUSE_OPEN_ATOMIC;
+ args.nodeid = get_node_id(dir);
+ args.in_numargs = 2;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.in_args[1].size = entry->d_name.len + 1;
+ args.in_args[1].value = entry->d_name.name;
+ args.out_numargs = 2;
+ args.out_args[0].size = sizeof(outentry);
+ args.out_args[0].value = &outentry;
+ args.out_args[1].size = sizeof(outopen);
+ args.out_args[1].value = &outopen;
+
+ if (flags & O_CREAT) {
+ err = get_create_ext(&args, dir, entry, mode);
+ if (err)
+ goto out_free_ff;
+ }
+
+ err = fuse_simple_request(fm, &args);
+ free_ext_value(&args);
+ if (err == -ENOSYS || err == -ELOOP) {
+ if (unlikely(err == -ENOSYS))
+ fc->no_open_atomic = 1;
+ goto free_and_fallback;
+ }
+
+ if (!err && !outentry.nodeid)
+ err = -ENOENT;
+
+ if (err)
+ goto out_free_ff;
+
+ err = -EIO;
+ if (invalid_nodeid(outentry.nodeid) || fuse_invalid_attr(&outentry.attr))
+ goto out_free_ff;
+
+ ff->fh = outopen.fh;
+ ff->nodeid = outentry.nodeid;
+ ff->open_flags = outopen.open_flags;
+ inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
+ &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
+ if (!inode) {
+ flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
+ fuse_sync_release(NULL, ff, flags);
+ fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ /* prevent racing/parallel lookup on a negative hashed */
+ if (!(flags & O_CREAT) && !d_in_lookup(entry)) {
+ d_drop(entry);
+ switched_entry = d_alloc_parallel(entry->d_parent,
+ &entry->d_name, &wq);
+ if (IS_ERR(switched_entry)) {
+ err = PTR_ERR(switched_entry);
+ switched_entry = NULL;
+ goto out_free_ff;
+ }
+
+ if (unlikely(!d_in_lookup(switched_entry))) {
+ /* fall back */
+ dput(switched_entry);
+ switched_entry = NULL;
+ goto free_and_fallback;
+ }
+
+ entry = switched_entry;
+ }
+
+ if (d_really_is_negative(entry)) {
+ d_drop(entry);
+ alias = d_exact_alias(entry, inode);
+ if (!alias) {
+ alias = d_splice_alias(inode, entry);
+ if (IS_ERR(alias)) {
+ /*
+ * Close the file in user space, but do not unlink it,
+ * if it was created - with network file systems other
+ * clients might have already accessed it.
+ */
+ fi = get_fuse_inode(inode);
+ fuse_sync_release(fi, ff, flags);
+ fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
+ err = PTR_ERR(alias);
+ goto out_err;
+ }
+ }
+
+ if (alias)
+ entry = alias;
+ }
+
+ fuse_change_entry_timeout(entry, &outentry);
+
+ /* File was indeed created */
+ if (outopen.open_flags & FOPEN_FILE_CREATED) {
+ if (!(flags & O_CREAT)) {
+ pr_debug("Server side bug, ignoring.");
+ pr_debug("FOPEN_FILE_CREATED set without O_CREAT.");
+ } else {
+ /* This should be always set when the file is created */
+ fuse_dir_changed(dir);
+ file->f_mode |= FMODE_CREATED;
+ }
+ }
+
+ if (S_ISDIR(mode))
+ ff->open_flags &= ~FOPEN_DIRECT_IO;
+ err = finish_open(file, entry, generic_file_open);
+ if (err) {
+ fi = get_fuse_inode(inode);
+ fuse_sync_release(fi, ff, flags);
+ } else {
+ file->private_data = ff;
+ fuse_finish_open(inode, file);
+ }
+
+ kfree(forget);
+
+ if (switched_entry) {
+ d_lookup_done(switched_entry);
+ dput(switched_entry);
+ }
+
+ dput(alias);
+
+ return err;
+
+out_free_ff:
+ fuse_file_free(ff);
+out_put_forget_req:
+ kfree(forget);
+out_err:
+ if (switched_entry) {
+ d_lookup_done(switched_entry);
+ dput(switched_entry);
+ }
+
+ return err;
+
+free_and_fallback:
+ fuse_file_free(ff);
+ kfree(forget);
+fallback:
+ return fuse_create_open(dir, entry, file, flags, mode);
+}
+
+static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
+ struct file *file, unsigned int flags,
+ umode_t mode)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+
+ if (fc->no_open_atomic)
+ return fuse_create_open(dir, entry, file, flags, mode);
+ else
+ return _fuse_atomic_open(dir, entry, file, flags, mode);
+}
+
/*
* Code shared between mknod, mkdir, symlink and link
*/
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d56f3e7adc48f1f3a8d829ef308393dfdeac2492..09c167926f6aa88423372baea1b12920ea2bee90 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -691,6 +691,9 @@ struct fuse_conn {
/** Is open/release not implemented by fs? */
unsigned no_open:1;
+ /** Is open atomic not implemented by fs? */
+ unsigned no_open_atomic:1;
+
/** Is opendir/releasedir not implemented by fs? */
unsigned no_opendir:1;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index be9b809f1c9111c815b6df1843a43139b78f18a1..26cd2b628c5ac917fe3ceb4a5a66a3cbf09fae83 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -353,6 +353,7 @@ struct fuse_file_lock {
* FOPEN_STREAM: the file is stream-like (no file position at all)
* FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
* FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode
+ * FOPEN_FILE_CREATED: the file was indeed created
*/
#define FOPEN_DIRECT_IO (1 << 0)
#define FOPEN_KEEP_CACHE (1 << 1)
@@ -361,6 +362,7 @@ struct fuse_file_lock {
#define FOPEN_STREAM (1 << 4)
#define FOPEN_NOFLUSH (1 << 5)
#define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6)
+#define FOPEN_FILE_CREATED (1 << 7)
/**
* INIT request/reply flags
@@ -627,6 +629,12 @@ enum fuse_opcode {
CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */
FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */
+ /*
+ * TODO(b/310102543): Update the opcode keep same with kernel patch
+ * after the atomic open kernel patch is merged to upstream.
+ */
+ FUSE_OPEN_ATOMIC = 0xfffffffe, /* u32::MAX - 1 */
+
/* Chrome OS extensions */
FUSE_CHROMEOS_TMPFILE = 0xffffffff, /* u32::MAX */
};
--
2.43.0.rc2.451.g8631bc7472-goog