| From 524a4ff4a341f7701b6065a3faa1d091591e45fd Mon Sep 17 00:00:00 2001 |
| From: Dharmendra Singh <dsingh@ddn.com> |
| Date: Mon, 23 Oct 2023 20:30:29 +0200 |
| Subject: [PATCH] BACKPORT: FROMLIST: fuse: introduce atomic open |
| |
| This adds full atomic open support, to avoid lookup before open/create. |
| If the implementation (fuse server/daemon) does not support atomic open |
| it falls back to non-atomic open. |
| |
| Co-developed-by: Bernd Schubert <bschubert@ddn.com> |
| Signed-off-by: Bernd Schubert <bschubert@ddn.com> |
| Signed-off-by: Dharmendra Singh <dsingh@ddn.com> |
| Signed-off-by: Horst Birthelmer <hbirthelmer@ddn.com> |
| Cc: Miklos Szeredi <miklos@szeredi.hu> |
| Cc: Christian Brauner <brauner@kernel.org> |
| Cc: Al Viro <viro@zeniv.linux.org.uk> |
| Cc: Dharmendra Singh <dsingh@ddn.com> |
| Cc: linux-fsdevel@vger.kernel.org |
| |
| (am from https://patchwork.kernel.org/patch/13433386/) |
| (also found at https://lore.kernel.org/r/20231023183035.11035-3-bschubert@ddn.com) |
| |
| Downstream changes: |
| fs/fuse/dir.c |
| Fix quoted string across lines in pr_debug() |
| include/uapi/linux/fuse.h |
| Change opcode for atomic open |
| |
| UPSTREAM-TASK=b:308366794 |
| BUG=b:307851138 |
| TEST=run virtio-fs with atomic open implemented crosvm chromium:5009218 |
| TEST=run vm.ManyFiles.virtiofs -var "vm.ManyFiles.kernelPath=${KERNELPATH}" |
| |
| Cq-Depend: chromium:4988544 |
| Change-Id: I88b76900e1d3fcd709dae7212b7f9b27970e403c |
| Signed-off-by: Yuan Yao <yuanyaogoog@chromium.org> |
| Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4988749 |
| Reviewed-by: Takaya Saeki <takayas@chromium.org> |
| Reviewed-by: Keiichi Watanabe <keiichiw@chromium.org> |
| --- |
| fs/fuse/dir.c | 214 +++++++++++++++++++++++++++++++++++++- |
| fs/fuse/fuse_i.h | 3 + |
| include/uapi/linux/fuse.h | 8 ++ |
| 3 files changed, 224 insertions(+), 1 deletion(-) |
| |
| diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c |
| index ecf86b41c600b4de01efede83722395b088d09a6..fb08b538682eb95ad31e2558239f0ffa85167e3d 100644 |
| --- a/fs/fuse/dir.c |
| +++ b/fs/fuse/dir.c |
| @@ -718,7 +718,7 @@ static int _fuse_create_open(struct inode *dir, struct dentry *entry, |
| |
| static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, |
| umode_t, dev_t); |
| -static int fuse_atomic_open(struct inode *dir, struct dentry *entry, |
| +static int fuse_create_open(struct inode *dir, struct dentry *entry, |
| struct file *file, unsigned flags, |
| umode_t mode) |
| { |
| @@ -765,6 +765,218 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, |
| return finish_no_open(file, res); |
| } |
| |
| +static int _fuse_atomic_open(struct inode *dir, struct dentry *entry, |
| + struct file *file, unsigned int flags, |
| + umode_t mode) |
| +{ |
| + int err; |
| + struct inode *inode; |
| + FUSE_ARGS(args); |
| + struct fuse_mount *fm = get_fuse_mount(dir); |
| + struct fuse_conn *fc = fm->fc; |
| + struct fuse_forget_link *forget; |
| + struct fuse_create_in inarg; |
| + struct fuse_open_out outopen; |
| + struct fuse_entry_out outentry; |
| + struct fuse_inode *fi; |
| + struct fuse_file *ff; |
| + struct dentry *switched_entry = NULL, *alias = NULL; |
| + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + |
| + /* Expect a negative dentry */ |
| + if (unlikely(d_inode(entry))) |
| + goto fallback; |
| + |
| + /* Userspace expects S_IFREG in create mode */ |
| + if ((flags & O_CREAT) && (mode & S_IFMT) != S_IFREG) |
| + goto fallback; |
| + |
| + forget = fuse_alloc_forget(); |
| + err = -ENOMEM; |
| + if (!forget) |
| + goto out_err; |
| + |
| + err = -ENOMEM; |
| + ff = fuse_file_alloc(fm); |
| + if (!ff) |
| + goto out_put_forget_req; |
| + |
| + if (!fc->dont_mask) |
| + mode &= ~current_umask(); |
| + |
| + flags &= ~O_NOCTTY; |
| + memset(&inarg, 0, sizeof(inarg)); |
| + memset(&outentry, 0, sizeof(outentry)); |
| + inarg.flags = flags; |
| + inarg.mode = mode; |
| + inarg.umask = current_umask(); |
| + |
| + if (fc->handle_killpriv_v2 && (flags & O_TRUNC) && |
| + !(flags & O_EXCL) && !capable(CAP_FSETID)) { |
| + inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; |
| + } |
| + |
| + args.opcode = FUSE_OPEN_ATOMIC; |
| + args.nodeid = get_node_id(dir); |
| + args.in_numargs = 2; |
| + args.in_args[0].size = sizeof(inarg); |
| + args.in_args[0].value = &inarg; |
| + args.in_args[1].size = entry->d_name.len + 1; |
| + args.in_args[1].value = entry->d_name.name; |
| + args.out_numargs = 2; |
| + args.out_args[0].size = sizeof(outentry); |
| + args.out_args[0].value = &outentry; |
| + args.out_args[1].size = sizeof(outopen); |
| + args.out_args[1].value = &outopen; |
| + |
| + if (flags & O_CREAT) { |
| + err = get_create_ext(&args, dir, entry, mode); |
| + if (err) |
| + goto out_free_ff; |
| + } |
| + |
| + err = fuse_simple_request(fm, &args); |
| + free_ext_value(&args); |
| + if (err == -ENOSYS || err == -ELOOP) { |
| + if (unlikely(err == -ENOSYS)) |
| + fc->no_open_atomic = 1; |
| + goto free_and_fallback; |
| + } |
| + |
| + if (!err && !outentry.nodeid) |
| + err = -ENOENT; |
| + |
| + if (err) |
| + goto out_free_ff; |
| + |
| + err = -EIO; |
| + if (invalid_nodeid(outentry.nodeid) || fuse_invalid_attr(&outentry.attr)) |
| + goto out_free_ff; |
| + |
| + ff->fh = outopen.fh; |
| + ff->nodeid = outentry.nodeid; |
| + ff->open_flags = outopen.open_flags; |
| + inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, |
| + &outentry.attr, ATTR_TIMEOUT(&outentry), 0); |
| + if (!inode) { |
| + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); |
| + fuse_sync_release(NULL, ff, flags); |
| + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); |
| + err = -ENOMEM; |
| + goto out_err; |
| + } |
| + |
| + /* prevent racing/parallel lookup on a negative hashed */ |
| + if (!(flags & O_CREAT) && !d_in_lookup(entry)) { |
| + d_drop(entry); |
| + switched_entry = d_alloc_parallel(entry->d_parent, |
| + &entry->d_name, &wq); |
| + if (IS_ERR(switched_entry)) { |
| + err = PTR_ERR(switched_entry); |
| + switched_entry = NULL; |
| + goto out_free_ff; |
| + } |
| + |
| + if (unlikely(!d_in_lookup(switched_entry))) { |
| + /* fall back */ |
| + dput(switched_entry); |
| + switched_entry = NULL; |
| + goto free_and_fallback; |
| + } |
| + |
| + entry = switched_entry; |
| + } |
| + |
| + if (d_really_is_negative(entry)) { |
| + d_drop(entry); |
| + alias = d_exact_alias(entry, inode); |
| + if (!alias) { |
| + alias = d_splice_alias(inode, entry); |
| + if (IS_ERR(alias)) { |
| + /* |
| + * Close the file in user space, but do not unlink it, |
| + * if it was created - with network file systems other |
| + * clients might have already accessed it. |
| + */ |
| + fi = get_fuse_inode(inode); |
| + fuse_sync_release(fi, ff, flags); |
| + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); |
| + err = PTR_ERR(alias); |
| + goto out_err; |
| + } |
| + } |
| + |
| + if (alias) |
| + entry = alias; |
| + } |
| + |
| + fuse_change_entry_timeout(entry, &outentry); |
| + |
| + /* File was indeed created */ |
| + if (outopen.open_flags & FOPEN_FILE_CREATED) { |
| + if (!(flags & O_CREAT)) { |
| + pr_debug("Server side bug, ignoring."); |
| + pr_debug("FOPEN_FILE_CREATED set without O_CREAT."); |
| + } else { |
| + /* This should be always set when the file is created */ |
| + fuse_dir_changed(dir); |
| + file->f_mode |= FMODE_CREATED; |
| + } |
| + } |
| + |
| + if (S_ISDIR(mode)) |
| + ff->open_flags &= ~FOPEN_DIRECT_IO; |
| + err = finish_open(file, entry, generic_file_open); |
| + if (err) { |
| + fi = get_fuse_inode(inode); |
| + fuse_sync_release(fi, ff, flags); |
| + } else { |
| + file->private_data = ff; |
| + fuse_finish_open(inode, file); |
| + } |
| + |
| + kfree(forget); |
| + |
| + if (switched_entry) { |
| + d_lookup_done(switched_entry); |
| + dput(switched_entry); |
| + } |
| + |
| + dput(alias); |
| + |
| + return err; |
| + |
| +out_free_ff: |
| + fuse_file_free(ff); |
| +out_put_forget_req: |
| + kfree(forget); |
| +out_err: |
| + if (switched_entry) { |
| + d_lookup_done(switched_entry); |
| + dput(switched_entry); |
| + } |
| + |
| + return err; |
| + |
| +free_and_fallback: |
| + fuse_file_free(ff); |
| + kfree(forget); |
| +fallback: |
| + return fuse_create_open(dir, entry, file, flags, mode); |
| +} |
| + |
| +static int fuse_atomic_open(struct inode *dir, struct dentry *entry, |
| + struct file *file, unsigned int flags, |
| + umode_t mode) |
| +{ |
| + struct fuse_conn *fc = get_fuse_conn(dir); |
| + |
| + if (fc->no_open_atomic) |
| + return fuse_create_open(dir, entry, file, flags, mode); |
| + else |
| + return _fuse_atomic_open(dir, entry, file, flags, mode); |
| +} |
| + |
| /* |
| * Code shared between mknod, mkdir, symlink and link |
| */ |
| diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h |
| index d56f3e7adc48f1f3a8d829ef308393dfdeac2492..09c167926f6aa88423372baea1b12920ea2bee90 100644 |
| --- a/fs/fuse/fuse_i.h |
| +++ b/fs/fuse/fuse_i.h |
| @@ -691,6 +691,9 @@ struct fuse_conn { |
| /** Is open/release not implemented by fs? */ |
| unsigned no_open:1; |
| |
| + /** Is open atomic not implemented by fs? */ |
| + unsigned no_open_atomic:1; |
| + |
| /** Is opendir/releasedir not implemented by fs? */ |
| unsigned no_opendir:1; |
| |
| diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h |
| index be9b809f1c9111c815b6df1843a43139b78f18a1..26cd2b628c5ac917fe3ceb4a5a66a3cbf09fae83 100644 |
| --- a/include/uapi/linux/fuse.h |
| +++ b/include/uapi/linux/fuse.h |
| @@ -353,6 +353,7 @@ struct fuse_file_lock { |
| * FOPEN_STREAM: the file is stream-like (no file position at all) |
| * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) |
| * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode |
| + * FOPEN_FILE_CREATED: the file was indeed created |
| */ |
| #define FOPEN_DIRECT_IO (1 << 0) |
| #define FOPEN_KEEP_CACHE (1 << 1) |
| @@ -361,6 +362,7 @@ struct fuse_file_lock { |
| #define FOPEN_STREAM (1 << 4) |
| #define FOPEN_NOFLUSH (1 << 5) |
| #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) |
| +#define FOPEN_FILE_CREATED (1 << 7) |
| |
| /** |
| * INIT request/reply flags |
| @@ -627,6 +629,12 @@ enum fuse_opcode { |
| CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ |
| FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ |
| |
| + /* |
| + * TODO(b/310102543): Update the opcode keep same with kernel patch |
| + * after the atomic open kernel patch is merged to upstream. |
| + */ |
| + FUSE_OPEN_ATOMIC = 0xfffffffe, /* u32::MAX - 1 */ |
| + |
| /* Chrome OS extensions */ |
| FUSE_CHROMEOS_TMPFILE = 0xffffffff, /* u32::MAX */ |
| }; |
| -- |
| 2.43.0.rc2.451.g8631bc7472-goog |
| |