diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 67581ce277b560..02cab38b93c939 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1183,8 +1183,13 @@ static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr) attr->blksize = sx->blksize; } +/* + * @param sx_mask request mask send to to fuse-server + * @param mandatory_sx_mask subset of (or complete) sx_mask that the server + * has to fulfill +*/ static int fuse_do_statx(struct inode *inode, struct file *file, - struct kstat *stat) + struct kstat *stat, u32 sx_mask, u32 mandatory_sx_mask) { int err; struct fuse_attr attr; @@ -1195,6 +1200,12 @@ static int fuse_do_statx(struct inode *inode, struct file *file, u64 attr_version = fuse_get_attr_version(fm->fc); FUSE_ARGS(args); + /* + * mandatory_sx_mask should be a subset of sx_mask. + * If it's not, we have a logic error somewhere in the call chain. + */ + WARN_ON_ONCE((mandatory_sx_mask & sx_mask) != mandatory_sx_mask); + memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); /* Directories have separate file-handle space */ @@ -1204,9 +1215,12 @@ static int fuse_do_statx(struct inode *inode, struct file *file, inarg.getattr_flags |= FUSE_GETATTR_FH; inarg.fh = ff->fh; } - /* For now leave sync hints as the default, request all stats. */ + /* + * For permission checks, we only need mode, uid, gid. + * This is an optimization to avoid fetching all stats when not needed. + */ inarg.sx_flags = 0; - inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME; + inarg.sx_mask = sx_mask; args.opcode = FUSE_STATX; args.nodeid = get_node_id(inode); args.in_numargs = 1; @@ -1220,6 +1234,17 @@ static int fuse_do_statx(struct inode *inode, struct file *file, return err; sx = &outarg.stat; + + /* + * Verify the server returned at least what we requested. + * The server may return more attributes than requested (which is fine), + * but must not return fewer. + */ + if ((sx->mask & mandatory_sx_mask) != mandatory_sx_mask) { + fuse_make_bad(inode); + return -EIO; + } + if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) || ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) || inode_wrong_type(inode, sx->mode)))) { @@ -1228,7 +1253,7 @@ static int fuse_do_statx(struct inode *inode, struct file *file, } fuse_statx_to_attr(&outarg.stat, &attr); - if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) { + if (sx->mask & STATX_BASIC_STATS) { fuse_change_attributes(inode, &attr, &outarg.stat, ATTR_TIMEOUT(&outarg), attr_version); } @@ -1293,30 +1318,31 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, bool sync; u32 inval_mask = READ_ONCE(fi->inval_mask); u32 cache_mask = fuse_get_cache_mask(inode); - + u32 mandatory_sx_mask = request_mask & STATX_BASIC_STATS; + u32 sx_mask = request_mask; /* FUSE only supports basic stats and possibly btime */ - request_mask &= STATX_BASIC_STATS | STATX_BTIME; + sx_mask &= STATX_BASIC_STATS | STATX_BTIME; retry: if (fc->no_statx) - request_mask &= STATX_BASIC_STATS; + sx_mask &= STATX_BASIC_STATS; - if (!request_mask) + if (!sx_mask) sync = false; else if (flags & AT_STATX_FORCE_SYNC) sync = true; else if (flags & AT_STATX_DONT_SYNC) sync = false; - else if (request_mask & inval_mask & ~cache_mask) + else if (sx_mask & inval_mask & ~cache_mask) sync = true; else sync = time_before64(fi->i_time, get_jiffies_64()); if (sync) { forget_all_cached_acls(inode); - /* Try statx if BTIME is requested */ - if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) { - err = fuse_do_statx(inode, file, stat); + if (!fc->no_statx) { + err = fuse_do_statx(inode, file, stat, sx_mask, + mandatory_sx_mask); if (err == -ENOSYS) { fc->no_statx = 1; err = 0; @@ -1326,7 +1352,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, err = fuse_do_getattr(inode, stat, file); } } else if (stat) { - generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); + generic_fillattr(&nop_mnt_idmap, sx_mask, inode, stat); stat->mode = fi->orig_i_mode; stat->ino = fi->orig_ino; if (test_bit(FUSE_I_BTIME, &fi->state)) { @@ -1478,13 +1504,14 @@ static int fuse_access(struct inode *inode, int mask) return err; } -static int fuse_perm_getattr(struct inode *inode, int mask) +static int fuse_perm_getattr(struct inode *inode, int mask, int perm_mask) { if (mask & MAY_NOT_BLOCK) return -ECHILD; forget_all_cached_acls(inode); - return fuse_do_getattr(inode, NULL, NULL); + return fuse_update_get_attr(inode, NULL, NULL, perm_mask, + AT_STATX_FORCE_SYNC); } /* @@ -1506,6 +1533,7 @@ static int fuse_permission(struct mnt_idmap *idmap, struct fuse_conn *fc = get_fuse_conn(inode); bool refreshed = false; int err = 0; + int perm_mask = STATX_MODE | STATX_UID | STATX_GID; if (fuse_is_bad(inode)) return -EIO; @@ -1519,13 +1547,12 @@ static int fuse_permission(struct mnt_idmap *idmap, if (fc->default_permissions || ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { struct fuse_inode *fi = get_fuse_inode(inode); - u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID; if (perm_mask & READ_ONCE(fi->inval_mask) || time_before64(fi->i_time, get_jiffies_64())) { refreshed = true; - err = fuse_perm_getattr(inode, mask); + err = fuse_perm_getattr(inode, mask, perm_mask); if (err) return err; } @@ -1538,7 +1565,7 @@ static int fuse_permission(struct mnt_idmap *idmap, attributes. This is also needed, because the root node will at first have no permissions */ if (err == -EACCES && !refreshed) { - err = fuse_perm_getattr(inode, mask); + err = fuse_perm_getattr(inode, mask, perm_mask); if (!err) err = generic_permission(&nop_mnt_idmap, inode, mask); @@ -1555,7 +1582,7 @@ static int fuse_permission(struct mnt_idmap *idmap, if (refreshed) return -EACCES; - err = fuse_perm_getattr(inode, mask); + err = fuse_perm_getattr(inode, mask, perm_mask); if (!err && !(inode->i_mode & S_IXUGO)) return -EACCES; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2a44160d74538d..25dbd6e042c2ce 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -216,6 +216,132 @@ static ino_t fuse_squash_ino(u64 ino64) return ino; } +/* + * Handle statx-specific attribute updates with partial attribute support. + */ +static void fuse_change_attributes_common_sx(struct inode *inode, + struct fuse_attr *attr, + struct fuse_statx *sx, + u64 attr_valid, u32 cache_mask, + u64 evict_ctr) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + u32 returned_attrs = sx->mask & STATX_BASIC_STATS; + + lockdep_assert_held(&fi->lock); + + /* + * Clear returned basic stats from invalid mask. + * + * Don't do this if this is coming from a fuse_iget() call and there + * might have been a racing evict which would've invalidated the result + * if the attr_version would've been preserved. + * + * !evict_ctr -> this is create + * fi->attr_version != 0 -> this is not a new inode + * evict_ctr == fuse_get_evict_ctr() -> no evicts while during request + */ + if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc)) + set_mask_bits(&fi->inval_mask, returned_attrs, 0); + + fi->attr_version = atomic64_inc_return(&fc->attr_version); + + /* + * Only update i_time if we got all the attributes we care about. + * + * With writeback_cache (cache_mask set): cache_mask attributes are + * managed locally and their values from the server are ignored. + * So we only need all the OTHER attributes (non-cache_mask). + */ + if (cache_mask) { + /* writeback_cache: ignore cache_mask attrs, check everything else */ + if ((returned_attrs | cache_mask) == STATX_BASIC_STATS) + fi->i_time = attr_valid; + } else { + /* no writeback_cache: need all basic stats */ + if (returned_attrs == STATX_BASIC_STATS) + fi->i_time = attr_valid; + } + + /* + * Only update inode fields for attributes that were actually returned. + * TYPE is part of i_mode but already set during inode creation. + */ + if (returned_attrs & STATX_INO) + inode->i_ino = fuse_squash_ino(attr->ino); + if (returned_attrs & STATX_MODE) + inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); + if (returned_attrs & STATX_NLINK) + set_nlink(inode, attr->nlink); + if (returned_attrs & STATX_UID) + inode->i_uid = make_kuid(fc->user_ns, attr->uid); + if (returned_attrs & STATX_GID) + inode->i_gid = make_kgid(fc->user_ns, attr->gid); + if (returned_attrs & STATX_BLOCKS) + inode->i_blocks = attr->blocks; + + if (returned_attrs & STATX_ATIME) { + attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1); + inode_set_atime(inode, attr->atime, attr->atimensec); + } + /* mtime from server may be stale due to local buffered write */ + if ((returned_attrs & STATX_MTIME) && !(cache_mask & STATX_MTIME)) { + attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1); + inode_set_mtime(inode, attr->mtime, attr->mtimensec); + } + if ((returned_attrs & STATX_CTIME) && !(cache_mask & STATX_CTIME)) { + attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1); + inode_set_ctime(inode, attr->ctime, attr->ctimensec); + } + if (sx) { + /* Sanitize nsecs */ + sx->btime.tv_nsec = + min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); + + /* + * Btime has been queried, cache is valid (whether or not btime + * is available or not) so clear STATX_BTIME from inval_mask. + * + * Availability of the btime attribute is indicated in + * FUSE_I_BTIME + */ + set_mask_bits(&fi->inval_mask, STATX_BTIME, 0); + if (sx->mask & STATX_BTIME) { + set_bit(FUSE_I_BTIME, &fi->state); + fi->i_btime.tv_sec = sx->btime.tv_sec; + fi->i_btime.tv_nsec = sx->btime.tv_nsec; + } + } + + /* Common fields for both statx and getattr */ + if (attr->blksize != 0) + inode->i_blkbits = ilog2(attr->blksize); + else + inode->i_blkbits = inode->i_sb->s_blocksize_bits; + + /* + * Don't set the sticky bit in i_mode, unless we want the VFS + * to check permissions. This prevents failures due to the + * check in may_delete(). + */ + fi->orig_i_mode = inode->i_mode; + if (!fc->default_permissions) + inode->i_mode &= ~S_ISVTX; + + fi->orig_ino = attr->ino; + + /* + * We are refreshing inode data and it is possible that another + * client set suid/sgid or security.capability xattr. So clear + * S_NOSEC. Ideally, we could have cleared it only if suid/sgid + * was set or if security.capability xattr was set. But we don't + * know if security.capability has been set or not. So clear it + * anyway. Its less efficient but should be safe. + */ + inode->i_flags &= ~S_NOSEC; +} + void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, struct fuse_statx *sx, u64 attr_valid, u32 cache_mask, @@ -226,6 +352,12 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, lockdep_assert_held(&fi->lock); + if (sx) { + return fuse_change_attributes_common_sx(inode, attr, sx, + attr_valid, cache_mask, + evict_ctr); + } + /* * Clear basic stats from invalid mask. * @@ -329,8 +461,10 @@ static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr u32 cache_mask; loff_t oldsize; struct timespec64 old_mtime; + bool have_size = !sx || (sx->mask & STATX_SIZE); spin_lock(&fi->lock); + /* * In case of writeback_cache enabled, writes update mtime, ctime and * may update i_size. In these cases trust the cached value in the @@ -364,19 +498,26 @@ static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr * In case of writeback_cache enabled, the cached writes beyond EOF * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. + * Only update i_size if SIZE was actually returned by the server. */ - if (!(cache_mask & STATX_SIZE)) + if (have_size && !(cache_mask & STATX_SIZE)) i_size_write(inode, attr->size); spin_unlock(&fi->lock); + /* + * Only do page cache invalidation when cache_mask is not set + * (writeback_cache disabled) AND the relevant attributes (SIZE/MTIME) + * were actually returned by the server. + */ if (!cache_mask && S_ISREG(inode->i_mode)) { bool inval = false; + bool have_mtime = !sx || (sx->mask & STATX_MTIME); - if (oldsize != attr->size) { + if (have_size && oldsize != attr->size) { truncate_pagecache(inode, attr->size); if (!fc->explicit_inval_data) inval = true; - } else if (fc->auto_inval_data) { + } else if (have_mtime && fc->auto_inval_data) { struct timespec64 new_mtime = { .tv_sec = attr->mtime, .tv_nsec = attr->mtimensec,