diff --git a/fs/fuse/compound.c b/fs/fuse/compound.c index 5d84e3558a06f8..12ecd5ae5e8ddc 100644 --- a/fs/fuse/compound.c +++ b/fs/fuse/compound.c @@ -52,8 +52,18 @@ int fuse_compound_add(struct fuse_compound_req *compound, return 0; } -static void *fuse_copy_response_per_req(struct fuse_args *args, - char *resp) +/* + * Copy a slot's payload into the caller's out_args structs. + * + * @avail is the slot's actual payload size (op_hdr->len minus the slot's + * fuse_out_header). The caller is responsible for ensuring @resp..+@avail is + * within the response buffer. + * + * Returns 0 on success, -EIO if the slot is shorter than the declared args + * sum. Trailing bytes beyond the args sum are ignored (forward-compat). + */ +static int fuse_copy_response_per_req(struct fuse_args *args, + char *resp, size_t avail) { int i; size_t copied = 0; @@ -62,14 +72,17 @@ static void *fuse_copy_response_per_req(struct fuse_args *args, struct fuse_arg current_arg = args->out_args[i]; size_t arg_size = current_arg.size; - if (current_arg.value && arg_size > 0) { - memcpy(current_arg.value, - (char *)resp + copied, arg_size); - copied += arg_size; - } + if (!current_arg.value || arg_size == 0) + continue; + + if (copied + arg_size > avail) + return -EIO; + + memcpy(current_arg.value, resp + copied, arg_size); + copied += arg_size; } - return (char *)resp + copied; + return 0; } int fuse_compound_get_error(struct fuse_compound_req *compound, int op_idx) @@ -77,12 +90,20 @@ int fuse_compound_get_error(struct fuse_compound_req *compound, int op_idx) return compound->op_errors[op_idx]; } +/* + * Parse one slot's response. An empty slot (payload size 0) leaves the + * caller's out_args untouched -- by convention callers zero-init their + * out structs, so an empty slot means "no result for this op". Slots + * with payload must contain at least the args sum; extra trailing bytes + * are ignored. The returned pointer always advances by op_hdr->len. + */ static void *fuse_compound_parse_one_op(struct fuse_compound_req *compound, int op_index, void *op_out_data, void *response_end) { struct fuse_out_header *op_hdr = op_out_data; struct fuse_args *args = compound->op_args[op_index]; + size_t payload_size; if (op_hdr->len < sizeof(struct fuse_out_header)) return NULL; @@ -94,11 +115,15 @@ static void *fuse_compound_parse_one_op(struct fuse_compound_req *compound, if (op_hdr->error != 0) compound->op_errors[op_index] = op_hdr->error; - if (args && op_hdr->len > sizeof(struct fuse_out_header)) - return fuse_copy_response_per_req(args, op_out_data + - sizeof(struct fuse_out_header)); + payload_size = op_hdr->len - sizeof(struct fuse_out_header); + + if (args && payload_size > 0) { + if (fuse_copy_response_per_req(args, + (char *)op_out_data + sizeof(struct fuse_out_header), + payload_size) < 0) + return NULL; + } - /* No response data, just advance past the header */ return (char *)op_out_data + op_hdr->len; } @@ -232,10 +257,25 @@ ssize_t fuse_compound_send(struct fuse_compound_req *compound) actual_response_size = args.out_args[1].size; - if (actual_response_size < sizeof(struct fuse_compound_out)) { - pr_info_ratelimited("FUSE: compound response too small (%zu bytes, minimum %zu bytes)\n", - actual_response_size, - sizeof(struct fuse_compound_out)); + /* + * compound_header (out_args[0]) is fixed-size and already validated + * by fuse_simple_request; check that the server didn't claim more + * results than we requested, and that the payload at least leaves + * room for one fuse_out_header per claimed result. + */ + if (compound->result_header.count > compound->compound_header.count) { + pr_info_ratelimited("FUSE: compound response claims %u ops, request had %u\n", + compound->result_header.count, + compound->compound_header.count); + ret = -EINVAL; + goto out; + } + + if (actual_response_size < + compound->result_header.count * sizeof(struct fuse_out_header)) { + pr_info_ratelimited("FUSE: compound payload too small for %u ops (%zu bytes)\n", + compound->result_header.count, + actual_response_size); ret = -EINVAL; goto out; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 02cab38b93c939..1288fc528789f4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -613,6 +613,39 @@ static void free_ext_value(struct fuse_args *args) * If the filesystem doesn't support this, then fall back to separate * 'mknod' + 'open' requests. */ +static void fuse_create_init(struct fuse_conn *fc, struct fuse_args *args, + u64 nodeid, const struct qstr *name, u32 opcode, + unsigned int flags, umode_t mode, + struct fuse_create_in *inarg, + struct fuse_entry_out *outentry, + struct fuse_open_out *outopen) +{ + bool trunc = flags & O_TRUNC; + + memset(inarg, 0, sizeof(*inarg)); + memset(outentry, 0, sizeof(*outentry)); + memset(outopen, 0, sizeof(*outopen)); + inarg->flags = flags; + inarg->mode = mode; + inarg->umask = current_umask(); + if (fc->handle_killpriv_v2 && trunc && + !(flags & O_EXCL) && !capable(CAP_FSETID)) + inarg->open_flags |= FUSE_OPEN_KILL_SUIDGID; + + args->opcode = opcode; + args->nodeid = nodeid; + args->in_numargs = 2; + args->in_args[0].size = sizeof(*inarg); + args->in_args[0].value = inarg; + args->in_args[1].size = name->len + 1; + args->in_args[1].value = name->name; + args->out_numargs = 2; + args->out_args[0].size = sizeof(*outentry); + args->out_args[0].value = outentry; + args->out_args[1].size = sizeof(*outopen); + args->out_args[1].value = outopen; +} + static int fuse_create_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned int flags, umode_t mode, u32 opcode) @@ -646,29 +679,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, mode &= ~current_umask(); flags &= ~O_NOCTTY; - memset(&inarg, 0, sizeof(inarg)); - memset(&outentry, 0, sizeof(outentry)); - inarg.flags = flags; - inarg.mode = mode; - inarg.umask = current_umask(); - if (fm->fc->handle_killpriv_v2 && trunc && - !(flags & O_EXCL) && !capable(CAP_FSETID)) { - inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; - } - - args.opcode = opcode; - args.nodeid = get_node_id(dir); - args.in_numargs = 2; - args.in_args[0].size = sizeof(inarg); - args.in_args[0].value = &inarg; - args.in_args[1].size = entry->d_name.len + 1; - args.in_args[1].value = entry->d_name.name; - args.out_numargs = 2; - args.out_args[0].size = sizeof(outentry); - args.out_args[0].value = &outentry; - args.out_args[1].size = sizeof(outopen); - args.out_args[1].value = &outopen; + fuse_create_init(fm->fc, &args, get_node_id(dir), &entry->d_name, opcode, + flags, mode, &inarg, &outentry, &outopen); err = get_create_ext(&args, dir, entry, mode); if (err) @@ -726,6 +739,251 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); + +/* + * Compound LOOKUP+CREATE for atomic_open. + * + * Sends a single FUSE_COMPOUND containing FUSE_LOOKUP followed by FUSE_CREATE, + * collapsing the two round trips that fuse_atomic_open() would otherwise make + * (fuse_lookup() then fuse_create_open()) into one. + * + * Semantics match the separate-op path: + * - LOOKUP runs first. If it finds a positive entry, the kernel splices the + * resulting inode onto the dentry and lets the VFS retry via finish_no_open(); + * the CREATE result (if any) is ignored. + * - If LOOKUP returns ENOENT (or nodeid 0), the CREATE result is consumed: + * on success the new inode is instantiated and finish_open() is called; on + * EEXIST the dentry is invalidated; on ENOSYS the no_create flag is set so + * the caller falls back to the mknod path. + * + * Returns 0 on success (either the no_open or finish_open path completed), + * -ENOSYS if the FUSE_COMPOUND mechanism is not supported by the server (the + * caller will disable the feature flag and retry via the separate-op path), or + * any other negative errno for terminal errors. + */ +static int fuse_compound_lookup_create(struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, + umode_t mode) +{ + struct fuse_mount *fm = get_fuse_mount(dir); + struct fuse_conn *fc = fm->fc; + struct fuse_compound_req *compound; + struct fuse_args lookup_args = {}; + struct fuse_args create_args = {}; + struct fuse_entry_out outlookup; + struct fuse_entry_out outentry; + struct fuse_open_out outopen; + struct fuse_create_in create_in; + struct fuse_forget_link *forget; + struct fuse_file *ff; + struct fuse_inode *fi; + struct inode *inode; + struct dentry *newent; + bool trunc = flags & O_TRUNC; + int lookup_err, create_err; + int err; + + /* atomic_open only ever creates regular files */ + BUG_ON((mode & S_IFMT) != S_IFREG); + + forget = fuse_alloc_forget(); + if (!forget) + return -ENOMEM; + + ff = fuse_file_alloc(fm, true); + if (!ff) { + err = -ENOMEM; + goto out_forget; + } + + if (!fc->dont_mask) + mode &= ~current_umask(); + flags &= ~O_NOCTTY; + + compound = fuse_compound_alloc(fm, 0); + if (IS_ERR(compound)) { + err = PTR_ERR(compound); + goto out_ff; + } + + fuse_lookup_init(fc, &lookup_args, get_node_id(dir), &entry->d_name, + &outlookup); + err = fuse_compound_add(compound, &lookup_args); + if (err) + goto out_compound; + + fuse_create_init(fc, &create_args, get_node_id(dir), &entry->d_name, + FUSE_CREATE, flags, mode, + &create_in, &outentry, &outopen); + + err = get_create_ext(&create_args, dir, entry, mode); + if (err) + goto out_compound; + + err = fuse_compound_add(compound, &create_args); + if (err) { + free_ext_value(&create_args); + goto out_compound; + } + + err = fuse_compound_send(compound); + free_ext_value(&create_args); + if (err) { + /* FUSE_COMPOUND unsupported -- caller will disable the flag */ + goto out_compound; + } + + lookup_err = fuse_compound_get_error(compound, 0); + create_err = fuse_compound_get_error(compound, 1); + + /* + * LOOKUP succeeded with a positive nodeid: the file already existed. + * Mirror fuse_lookup_name() + fuse_lookup() post-processing, then take + * the no_open path (CREATE result, if executed, is discarded). + */ + if (!lookup_err && outlookup.nodeid) { + fuse_file_free(ff); + ff = NULL; + + err = -EIO; + if (fuse_invalid_attr(&outlookup.attr)) + goto out_compound; + if (outlookup.nodeid == FUSE_ROOT_ID) + goto out_compound; + + inode = fuse_iget(dir->i_sb, outlookup.nodeid, + outlookup.generation, &outlookup.attr, + ATTR_TIMEOUT(&outlookup), + fuse_get_attr_version(fc), + fuse_get_evict_ctr(fc)); + if (!inode) { + fuse_queue_forget(fc, forget, outlookup.nodeid, 1); + forget = NULL; + err = -ENOMEM; + goto out_compound; + } + kfree(forget); + forget = NULL; + + newent = d_splice_alias(inode, entry); + if (IS_ERR(newent)) { + err = PTR_ERR(newent); + goto out_compound; + } + if (newent) + entry = newent; + fuse_change_entry_timeout(entry, &outlookup); + fuse_advise_use_readdirplus(dir); + + err = finish_no_open(file, newent); + dput(newent); + kfree(compound); + return err; + } + + /* Anything other than "doesn't exist" from LOOKUP is terminal. */ + if (lookup_err && lookup_err != -ENOENT) { + err = lookup_err; + goto out_compound; + } + + /* + * LOOKUP said the entry doesn't exist; consume CREATE. + */ + if (create_err == -ENOSYS) { + /* Server can't FUSE_CREATE; let caller fall back to mknod. */ + fc->no_create = 1; + err = -ENOSYS; + goto out_compound; + } + if (create_err) { + err = create_err; + if (err == -EEXIST) + fuse_invalidate_entry(entry); + goto out_compound; + } + + err = -EIO; + if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) || + fuse_invalid_attr(&outentry.attr)) + goto out_compound; + + file->f_mode |= FMODE_CREATED; + + ff->fh = outopen.fh; + ff->nodeid = outentry.nodeid; + ff->open_flags = outopen.open_flags; + inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, + &outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0); + if (!inode) { + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); + fuse_sync_release(NULL, ff, flags); + ff = NULL; + fuse_queue_forget(fc, forget, outentry.nodeid, 1); + forget = NULL; + err = -ENOMEM; + goto out_compound; + } + kfree(forget); + forget = NULL; + + /* + * Unlike the separate-op path, we did not pre-run fuse_lookup() on + * @entry, so it is still d_in_lookup() here. That means its + * d_u.d_in_lookup_hash node is on the parent's in-lookup chain -- + * and that field is unioned with d_u.d_alias, so the BUG_ON in + * d_instantiate() would fire. Use d_splice_alias() instead: it + * goes through __d_add(), which detects d_in_lookup and calls + * __d_lookup_done() before attaching the inode. + * + * For a regular file (asserted at function entry) d_splice_alias + * cannot return an error and cannot return a different dentry -- + * both of those only happen via the S_ISDIR branch of d_splice_alias. + * The IS_ERR and non-NULL return cases are therefore unreachable; + * we WARN if invariants are ever broken (e.g. someone removes the + * S_ISREG BUG_ON) rather than silently mishandling refcounts. + */ + newent = d_splice_alias(inode, entry); + if (WARN_ON_ONCE(IS_ERR(newent))) { + err = PTR_ERR(newent); + /* inode ref consumed by d_splice_alias on error */ + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); + fuse_sync_release(NULL, ff, flags); + ff = NULL; + goto out_compound; + } + WARN_ON_ONCE(newent); + + fuse_change_entry_timeout(entry, &outentry); + fuse_dir_changed(dir); + err = generic_file_open(inode, file); + if (!err) { + file->private_data = ff; + err = finish_open(file, entry, fuse_finish_open); + } + if (err) { + fi = get_fuse_inode(inode); + fuse_sync_release(fi, ff, flags); + } else { + if (fc->atomic_o_trunc && trunc) + truncate_pagecache(inode, 0); + else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) + invalidate_inode_pages2(inode->i_mapping); + } + dput(newent); + kfree(compound); + return err; + +out_compound: + kfree(compound); + if (ff) + fuse_file_free(ff); +out_ff: +out_forget: + kfree(forget); + return err; +} + static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, umode_t mode) @@ -737,6 +995,15 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, if (fuse_is_bad(dir)) return -EIO; + if (d_in_lookup(entry) && (flags & O_CREAT) && !fc->no_create && + fc->compound_lookup_create) { + err = fuse_compound_lookup_create(dir, entry, file, flags, mode); + if (err != -ENOSYS && err != -EOPNOTSUPP) + return err; + fc->compound_lookup_create = 0; + /* fall through to the separate-op path */ + } + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 56303193e8e07b..10c83a9eecb55d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -243,8 +243,10 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, err = fuse_compound_open_getattr(fm, nodeid, open_flags, opcode, ff, &attr_outarg, &outarg); - if (err == -ENOSYS) + if (err == -ENOSYS || err == -EOPNOTSUPP) { fc->compound_open_getattr = 0; + err = -ENOSYS; + } if (!err) fuse_change_attributes(inode, &attr_outarg.attr, NULL, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 654b5890c24c18..064d43cfa4f1bc 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -873,6 +873,9 @@ struct fuse_conn { /* Does the filesystem support compound operations? */ unsigned int compound_open_getattr:1; + /* Does the filesystem support compound atomic-open (LOOKUP+CREATE)? */ + unsigned int compound_lookup_create:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 670278526418b7..d550d2e404b973 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1187,6 +1187,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, /* module option for now */ fc->compound_open_getattr = enable_compound; + fc->compound_lookup_create = enable_compound; atomic64_set(&fc->attr_version, 1); atomic64_set(&fc->evict_ctr, 1);