From e2d06b96648057617fdaf8b56d90877da8c9e83c Mon Sep 17 00:00:00 2001 From: Horst Birthelmer Date: Thu, 21 May 2026 13:00:35 +0200 Subject: [PATCH 1/4] fuse: harden compound response parsing Bound payload copies by the slot's actual size and validate the result header's op count against the request. This method of calling compounds is private for redfs. Signed-off-by: Horst Birthelmer --- fs/fuse/compound.c | 72 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/fs/fuse/compound.c b/fs/fuse/compound.c index 5d84e3558a06f8..12ecd5ae5e8ddc 100644 --- a/fs/fuse/compound.c +++ b/fs/fuse/compound.c @@ -52,8 +52,18 @@ int fuse_compound_add(struct fuse_compound_req *compound, return 0; } -static void *fuse_copy_response_per_req(struct fuse_args *args, - char *resp) +/* + * Copy a slot's payload into the caller's out_args structs. + * + * @avail is the slot's actual payload size (op_hdr->len minus the slot's + * fuse_out_header). The caller is responsible for ensuring @resp..+@avail is + * within the response buffer. + * + * Returns 0 on success, -EIO if the slot is shorter than the declared args + * sum. Trailing bytes beyond the args sum are ignored (forward-compat). + */ +static int fuse_copy_response_per_req(struct fuse_args *args, + char *resp, size_t avail) { int i; size_t copied = 0; @@ -62,14 +72,17 @@ static void *fuse_copy_response_per_req(struct fuse_args *args, struct fuse_arg current_arg = args->out_args[i]; size_t arg_size = current_arg.size; - if (current_arg.value && arg_size > 0) { - memcpy(current_arg.value, - (char *)resp + copied, arg_size); - copied += arg_size; - } + if (!current_arg.value || arg_size == 0) + continue; + + if (copied + arg_size > avail) + return -EIO; + + memcpy(current_arg.value, resp + copied, arg_size); + copied += arg_size; } - return (char *)resp + copied; + return 0; } int fuse_compound_get_error(struct fuse_compound_req *compound, int op_idx) @@ -77,12 +90,20 @@ int fuse_compound_get_error(struct fuse_compound_req *compound, int op_idx) return compound->op_errors[op_idx]; } +/* + * Parse one slot's response. An empty slot (payload size 0) leaves the + * caller's out_args untouched -- by convention callers zero-init their + * out structs, so an empty slot means "no result for this op". Slots + * with payload must contain at least the args sum; extra trailing bytes + * are ignored. The returned pointer always advances by op_hdr->len. + */ static void *fuse_compound_parse_one_op(struct fuse_compound_req *compound, int op_index, void *op_out_data, void *response_end) { struct fuse_out_header *op_hdr = op_out_data; struct fuse_args *args = compound->op_args[op_index]; + size_t payload_size; if (op_hdr->len < sizeof(struct fuse_out_header)) return NULL; @@ -94,11 +115,15 @@ static void *fuse_compound_parse_one_op(struct fuse_compound_req *compound, if (op_hdr->error != 0) compound->op_errors[op_index] = op_hdr->error; - if (args && op_hdr->len > sizeof(struct fuse_out_header)) - return fuse_copy_response_per_req(args, op_out_data + - sizeof(struct fuse_out_header)); + payload_size = op_hdr->len - sizeof(struct fuse_out_header); + + if (args && payload_size > 0) { + if (fuse_copy_response_per_req(args, + (char *)op_out_data + sizeof(struct fuse_out_header), + payload_size) < 0) + return NULL; + } - /* No response data, just advance past the header */ return (char *)op_out_data + op_hdr->len; } @@ -232,10 +257,25 @@ ssize_t fuse_compound_send(struct fuse_compound_req *compound) actual_response_size = args.out_args[1].size; - if (actual_response_size < sizeof(struct fuse_compound_out)) { - pr_info_ratelimited("FUSE: compound response too small (%zu bytes, minimum %zu bytes)\n", - actual_response_size, - sizeof(struct fuse_compound_out)); + /* + * compound_header (out_args[0]) is fixed-size and already validated + * by fuse_simple_request; check that the server didn't claim more + * results than we requested, and that the payload at least leaves + * room for one fuse_out_header per claimed result. + */ + if (compound->result_header.count > compound->compound_header.count) { + pr_info_ratelimited("FUSE: compound response claims %u ops, request had %u\n", + compound->result_header.count, + compound->compound_header.count); + ret = -EINVAL; + goto out; + } + + if (actual_response_size < + compound->result_header.count * sizeof(struct fuse_out_header)) { + pr_info_ratelimited("FUSE: compound payload too small for %u ops (%zu bytes)\n", + compound->result_header.count, + actual_response_size); ret = -EINVAL; goto out; } From d8e4a24e736d57f47b206e6b5d29b70ff1dd6830 Mon Sep 17 00:00:00 2001 From: Horst Birthelmer Date: Thu, 21 May 2026 13:00:41 +0200 Subject: [PATCH 2/4] fuse: also disable compound open+getattr on -EOPNOTSUPP Some servers signal "unsupported" as -EOPNOTSUPP rather than -ENOSYS; treat both the same and clear the feature flag. EOPNOTSUPP will only disable that specific call. Signed-off-by: Horst Birthelmer --- fs/fuse/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 56303193e8e07b..10c83a9eecb55d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -243,8 +243,10 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, err = fuse_compound_open_getattr(fm, nodeid, open_flags, opcode, ff, &attr_outarg, &outarg); - if (err == -ENOSYS) + if (err == -ENOSYS || err == -EOPNOTSUPP) { fc->compound_open_getattr = 0; + err = -ENOSYS; + } if (!err) fuse_change_attributes(inode, &attr_outarg.attr, NULL, From 72e9067889c1afe9def05a5a0f31e138b44d26eb Mon Sep 17 00:00:00 2001 From: Horst Birthelmer Date: Thu, 21 May 2026 13:59:59 +0200 Subject: [PATCH 3/4] fuse: extract fuse_create_init helper Mirror fuse_lookup_init by moving the CREATE args/inarg setup into a helper. No functional change; prepares for reuse from compound paths. Signed-off-by: Horst Birthelmer --- fs/fuse/dir.c | 57 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 02cab38b93c939..ed6eec662dfc30 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -613,6 +613,39 @@ static void free_ext_value(struct fuse_args *args) * If the filesystem doesn't support this, then fall back to separate * 'mknod' + 'open' requests. */ +static void fuse_create_init(struct fuse_conn *fc, struct fuse_args *args, + u64 nodeid, const struct qstr *name, u32 opcode, + unsigned int flags, umode_t mode, + struct fuse_create_in *inarg, + struct fuse_entry_out *outentry, + struct fuse_open_out *outopen) +{ + bool trunc = flags & O_TRUNC; + + memset(inarg, 0, sizeof(*inarg)); + memset(outentry, 0, sizeof(*outentry)); + memset(outopen, 0, sizeof(*outopen)); + inarg->flags = flags; + inarg->mode = mode; + inarg->umask = current_umask(); + if (fc->handle_killpriv_v2 && trunc && + !(flags & O_EXCL) && !capable(CAP_FSETID)) + inarg->open_flags |= FUSE_OPEN_KILL_SUIDGID; + + args->opcode = opcode; + args->nodeid = nodeid; + args->in_numargs = 2; + args->in_args[0].size = sizeof(*inarg); + args->in_args[0].value = inarg; + args->in_args[1].size = name->len + 1; + args->in_args[1].value = name->name; + args->out_numargs = 2; + args->out_args[0].size = sizeof(*outentry); + args->out_args[0].value = outentry; + args->out_args[1].size = sizeof(*outopen); + args->out_args[1].value = outopen; +} + static int fuse_create_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned int flags, umode_t mode, u32 opcode) @@ -646,29 +679,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, mode &= ~current_umask(); flags &= ~O_NOCTTY; - memset(&inarg, 0, sizeof(inarg)); - memset(&outentry, 0, sizeof(outentry)); - inarg.flags = flags; - inarg.mode = mode; - inarg.umask = current_umask(); - - if (fm->fc->handle_killpriv_v2 && trunc && - !(flags & O_EXCL) && !capable(CAP_FSETID)) { - inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; - } - args.opcode = opcode; - args.nodeid = get_node_id(dir); - args.in_numargs = 2; - args.in_args[0].size = sizeof(inarg); - args.in_args[0].value = &inarg; - args.in_args[1].size = entry->d_name.len + 1; - args.in_args[1].value = entry->d_name.name; - args.out_numargs = 2; - args.out_args[0].size = sizeof(outentry); - args.out_args[0].value = &outentry; - args.out_args[1].size = sizeof(outopen); - args.out_args[1].value = &outopen; + fuse_create_init(fm->fc, &args, get_node_id(dir), &entry->d_name, opcode, + flags, mode, &inarg, &outentry, &outopen); err = get_create_ext(&args, dir, entry, mode); if (err) From 306548d92ead5b505e40e89690567c44de21610e Mon Sep 17 00:00:00 2001 From: Horst Birthelmer Date: Thu, 21 May 2026 14:01:16 +0200 Subject: [PATCH 4/4] fuse: compound LOOKUP+CREATE for atomic_open Collapse the lookup-then-create round trips in fuse_atomic_open() into a single FUSE_COMPOUND. Gated by fc->compound_lookup_create; falls back to the separate-op path on -ENOSYS/-EOPNOTSUPP. Signed-off-by: Horst Birthelmer --- fs/fuse/dir.c | 254 +++++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 3 + fs/fuse/inode.c | 1 + 3 files changed, 258 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ed6eec662dfc30..1288fc528789f4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -739,6 +739,251 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); + +/* + * Compound LOOKUP+CREATE for atomic_open. + * + * Sends a single FUSE_COMPOUND containing FUSE_LOOKUP followed by FUSE_CREATE, + * collapsing the two round trips that fuse_atomic_open() would otherwise make + * (fuse_lookup() then fuse_create_open()) into one. + * + * Semantics match the separate-op path: + * - LOOKUP runs first. If it finds a positive entry, the kernel splices the + * resulting inode onto the dentry and lets the VFS retry via finish_no_open(); + * the CREATE result (if any) is ignored. + * - If LOOKUP returns ENOENT (or nodeid 0), the CREATE result is consumed: + * on success the new inode is instantiated and finish_open() is called; on + * EEXIST the dentry is invalidated; on ENOSYS the no_create flag is set so + * the caller falls back to the mknod path. + * + * Returns 0 on success (either the no_open or finish_open path completed), + * -ENOSYS if the FUSE_COMPOUND mechanism is not supported by the server (the + * caller will disable the feature flag and retry via the separate-op path), or + * any other negative errno for terminal errors. + */ +static int fuse_compound_lookup_create(struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, + umode_t mode) +{ + struct fuse_mount *fm = get_fuse_mount(dir); + struct fuse_conn *fc = fm->fc; + struct fuse_compound_req *compound; + struct fuse_args lookup_args = {}; + struct fuse_args create_args = {}; + struct fuse_entry_out outlookup; + struct fuse_entry_out outentry; + struct fuse_open_out outopen; + struct fuse_create_in create_in; + struct fuse_forget_link *forget; + struct fuse_file *ff; + struct fuse_inode *fi; + struct inode *inode; + struct dentry *newent; + bool trunc = flags & O_TRUNC; + int lookup_err, create_err; + int err; + + /* atomic_open only ever creates regular files */ + BUG_ON((mode & S_IFMT) != S_IFREG); + + forget = fuse_alloc_forget(); + if (!forget) + return -ENOMEM; + + ff = fuse_file_alloc(fm, true); + if (!ff) { + err = -ENOMEM; + goto out_forget; + } + + if (!fc->dont_mask) + mode &= ~current_umask(); + flags &= ~O_NOCTTY; + + compound = fuse_compound_alloc(fm, 0); + if (IS_ERR(compound)) { + err = PTR_ERR(compound); + goto out_ff; + } + + fuse_lookup_init(fc, &lookup_args, get_node_id(dir), &entry->d_name, + &outlookup); + err = fuse_compound_add(compound, &lookup_args); + if (err) + goto out_compound; + + fuse_create_init(fc, &create_args, get_node_id(dir), &entry->d_name, + FUSE_CREATE, flags, mode, + &create_in, &outentry, &outopen); + + err = get_create_ext(&create_args, dir, entry, mode); + if (err) + goto out_compound; + + err = fuse_compound_add(compound, &create_args); + if (err) { + free_ext_value(&create_args); + goto out_compound; + } + + err = fuse_compound_send(compound); + free_ext_value(&create_args); + if (err) { + /* FUSE_COMPOUND unsupported -- caller will disable the flag */ + goto out_compound; + } + + lookup_err = fuse_compound_get_error(compound, 0); + create_err = fuse_compound_get_error(compound, 1); + + /* + * LOOKUP succeeded with a positive nodeid: the file already existed. + * Mirror fuse_lookup_name() + fuse_lookup() post-processing, then take + * the no_open path (CREATE result, if executed, is discarded). + */ + if (!lookup_err && outlookup.nodeid) { + fuse_file_free(ff); + ff = NULL; + + err = -EIO; + if (fuse_invalid_attr(&outlookup.attr)) + goto out_compound; + if (outlookup.nodeid == FUSE_ROOT_ID) + goto out_compound; + + inode = fuse_iget(dir->i_sb, outlookup.nodeid, + outlookup.generation, &outlookup.attr, + ATTR_TIMEOUT(&outlookup), + fuse_get_attr_version(fc), + fuse_get_evict_ctr(fc)); + if (!inode) { + fuse_queue_forget(fc, forget, outlookup.nodeid, 1); + forget = NULL; + err = -ENOMEM; + goto out_compound; + } + kfree(forget); + forget = NULL; + + newent = d_splice_alias(inode, entry); + if (IS_ERR(newent)) { + err = PTR_ERR(newent); + goto out_compound; + } + if (newent) + entry = newent; + fuse_change_entry_timeout(entry, &outlookup); + fuse_advise_use_readdirplus(dir); + + err = finish_no_open(file, newent); + dput(newent); + kfree(compound); + return err; + } + + /* Anything other than "doesn't exist" from LOOKUP is terminal. */ + if (lookup_err && lookup_err != -ENOENT) { + err = lookup_err; + goto out_compound; + } + + /* + * LOOKUP said the entry doesn't exist; consume CREATE. + */ + if (create_err == -ENOSYS) { + /* Server can't FUSE_CREATE; let caller fall back to mknod. */ + fc->no_create = 1; + err = -ENOSYS; + goto out_compound; + } + if (create_err) { + err = create_err; + if (err == -EEXIST) + fuse_invalidate_entry(entry); + goto out_compound; + } + + err = -EIO; + if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) || + fuse_invalid_attr(&outentry.attr)) + goto out_compound; + + file->f_mode |= FMODE_CREATED; + + ff->fh = outopen.fh; + ff->nodeid = outentry.nodeid; + ff->open_flags = outopen.open_flags; + inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, + &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0); + if (!inode) { + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); + fuse_sync_release(NULL, ff, flags); + ff = NULL; + fuse_queue_forget(fc, forget, outentry.nodeid, 1); + forget = NULL; + err = -ENOMEM; + goto out_compound; + } + kfree(forget); + forget = NULL; + + /* + * Unlike the separate-op path, we did not pre-run fuse_lookup() on + * @entry, so it is still d_in_lookup() here. That means its + * d_u.d_in_lookup_hash node is on the parent's in-lookup chain -- + * and that field is unioned with d_u.d_alias, so the BUG_ON in + * d_instantiate() would fire. Use d_splice_alias() instead: it + * goes through __d_add(), which detects d_in_lookup and calls + * __d_lookup_done() before attaching the inode. + * + * For a regular file (asserted at function entry) d_splice_alias + * cannot return an error and cannot return a different dentry -- + * both of those only happen via the S_ISDIR branch of d_splice_alias. + * The IS_ERR and non-NULL return cases are therefore unreachable; + * we WARN if invariants are ever broken (e.g. someone removes the + * S_ISREG BUG_ON) rather than silently mishandling refcounts. + */ + newent = d_splice_alias(inode, entry); + if (WARN_ON_ONCE(IS_ERR(newent))) { + err = PTR_ERR(newent); + /* inode ref consumed by d_splice_alias on error */ + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); + fuse_sync_release(NULL, ff, flags); + ff = NULL; + goto out_compound; + } + WARN_ON_ONCE(newent); + + fuse_change_entry_timeout(entry, &outentry); + fuse_dir_changed(dir); + err = generic_file_open(inode, file); + if (!err) { + file->private_data = ff; + err = finish_open(file, entry, fuse_finish_open); + } + if (err) { + fi = get_fuse_inode(inode); + fuse_sync_release(fi, ff, flags); + } else { + if (fc->atomic_o_trunc && trunc) + truncate_pagecache(inode, 0); + else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) + invalidate_inode_pages2(inode->i_mapping); + } + dput(newent); + kfree(compound); + return err; + +out_compound: + kfree(compound); + if (ff) + fuse_file_free(ff); +out_ff: +out_forget: + kfree(forget); + return err; +} + static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, umode_t mode) @@ -750,6 +995,15 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, if (fuse_is_bad(dir)) return -EIO; + if (d_in_lookup(entry) && (flags & O_CREAT) && !fc->no_create && + fc->compound_lookup_create) { + err = fuse_compound_lookup_create(dir, entry, file, flags, mode); + if (err != -ENOSYS && err != -EOPNOTSUPP) + return err; + fc->compound_lookup_create = 0; + /* fall through to the separate-op path */ + } + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 654b5890c24c18..064d43cfa4f1bc 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -873,6 +873,9 @@ struct fuse_conn { /* Does the filesystem support compound operations? */ unsigned int compound_open_getattr:1; + /* Does the filesystem support compound atomic-open (LOOKUP+CREATE)? */ + unsigned int compound_lookup_create:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 670278526418b7..d550d2e404b973 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1187,6 +1187,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, /* module option for now */ fc->compound_open_getattr = enable_compound; + fc->compound_lookup_create = enable_compound; atomic64_set(&fc->attr_version, 1); atomic64_set(&fc->evict_ctr, 1);