From 0553f4afcd753ada1a3fadaa74994a186ceddcad Mon Sep 17 00:00:00 2001 From: Horst Birthelmer Date: Wed, 22 Apr 2026 10:04:53 +0200 Subject: [PATCH] fuse: handle U64_MAX correctly in DLM locked range index When merging and checking overlap U64_MAX was not handled this will create problems on multiple DLM results that cover the whole range. Add a fast path for adding and checking locked ranges when the whole range is covered. Refactor dlm range handling to make it more readable. Signed-off-by: Horst Birthelmer --- fs/fuse/fuse_dlm_cache.c | 282 ++++++++++++++++++++++----------------- fs/fuse/fuse_dlm_cache.h | 12 +- 2 files changed, 167 insertions(+), 127 deletions(-) diff --git a/fs/fuse/fuse_dlm_cache.c b/fs/fuse/fuse_dlm_cache.c index d765dd8018cc6a..21902e26f6808a 100644 --- a/fs/fuse/fuse_dlm_cache.c +++ b/fs/fuse/fuse_dlm_cache.c @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -16,11 +17,11 @@ struct fuse_dlm_range { /* Interval tree node */ struct rb_node rb; /* Start page offset (inclusive) */ - uint64_t start; + u64 start; /* End page offset (inclusive) */ - uint64_t end; + u64 end; /* Subtree end value for interval tree */ - uint64_t __subtree_end; + u64 __subtree_end; /* Lock mode */ enum fuse_page_lock_mode mode; /* Temporary list entry for operations */ @@ -32,17 +33,17 @@ struct fuse_dlm_range { #define FUSE_PCACHE_LK_WRITE 2 /* Exclusive write lock */ /* Interval tree definitions for page ranges */ -static inline uint64_t fuse_dlm_range_start(struct fuse_dlm_range *range) +static inline u64 fuse_dlm_range_start(struct fuse_dlm_range *range) { return range->start; } -static inline uint64_t fuse_dlm_range_last(struct fuse_dlm_range *range) +static inline u64 fuse_dlm_range_last(struct fuse_dlm_range *range) { return range->end; } -INTERVAL_TREE_DEFINE(struct fuse_dlm_range, rb, uint64_t, __subtree_end, +INTERVAL_TREE_DEFINE(struct fuse_dlm_range, rb, u64, __subtree_end, fuse_dlm_range_start, fuse_dlm_range_last, static, fuse_page_it); @@ -58,9 +59,6 @@ int fuse_dlm_cache_init(struct fuse_inode *inode) { struct fuse_dlm_cache *cache = &inode->dlm_locked_areas; - if (!cache) - return -EINVAL; - init_rwsem(&cache->lock); cache->ranges = RB_ROOT_CACHED; @@ -79,9 +77,6 @@ void fuse_dlm_cache_release_locks(struct fuse_inode *inode) struct fuse_dlm_range *range; struct rb_node *node; - if (!cache) - return; - /* Release all locks */ down_write(&cache->lock); while ((node = rb_first_cached(&cache->ranges)) != NULL) { @@ -101,8 +96,7 @@ void fuse_dlm_cache_release_locks(struct fuse_inode *inode) * Return: Pointer to the first overlapping range, or NULL if none found */ static struct fuse_dlm_range * -fuse_dlm_find_overlapping(struct fuse_dlm_cache *cache, uint64_t start, - uint64_t end) +fuse_dlm_find_overlapping(struct fuse_dlm_cache *cache, u64 start, u64 end) { return fuse_page_it_iter_first(&cache->ranges, start, end); } @@ -116,30 +110,25 @@ fuse_dlm_find_overlapping(struct fuse_dlm_cache *cache, uint64_t start, * Attempt to merge ranges within and adjacent to the specified region * that have the same lock mode. */ -static void fuse_dlm_try_merge(struct fuse_dlm_cache *cache, uint64_t start, - uint64_t end) +static void fuse_dlm_try_merge(struct fuse_dlm_cache *cache, u64 start, + u64 end) { struct fuse_dlm_range *range, *next; - struct rb_node *node; - - if (!cache) - return; + u64 search_start = (start > 0) ? start - 1 : 0; + u64 search_end = (end < U64_MAX) ? end + 1 : U64_MAX; - /* Find the first range that might need merging */ - range = NULL; - node = rb_first_cached(&cache->ranges); - while (node) { - range = rb_entry(node, struct fuse_dlm_range, rb); - if (range->end >= start - 1) - break; - node = rb_next(node); - } + /* + * Expand the search region by 1 on each side so adjacent ranges + * (ending at start-1 or starting at end+1) become candidates for + * merging alongside overlapping ranges. + */ + range = fuse_dlm_find_overlapping(cache, search_start, search_end); - if (!range || range->start > end + 1) + if (!range || (end != U64_MAX && range->start > end + 1)) return; /* Try to merge ranges in and around the specified region */ - while (range && range->start <= end + 1) { + while (range && (end == U64_MAX || range->start <= end + 1)) { /* Get next range before we potentially modify the tree */ next = NULL; if (rb_next(&range->rb)) { @@ -148,17 +137,25 @@ static void fuse_dlm_try_merge(struct fuse_dlm_cache *cache, uint64_t start, } /* Try to merge with next range if adjacent and same mode */ - if (next && range->mode == next->mode && - range->end + 1 == next->start) { - /* Merge ranges */ - range->end = next->end; - - /* Remove next from tree */ - fuse_page_it_remove(next, &cache->ranges); - kfree(next); - - /* Continue with the same range */ - continue; + if (next && range->mode == next->mode) { + if (range->end == U64_MAX) { + /* This should never happen - can't have ranges after U64_MAX */ + WARN_ON_ONCE(1); + break; + } + if (range->end + 1 == next->start) { + /* + * Adjacent ranges with same mode, merge them. + * Re-insert range after mutating ->end so the + * augmented __subtree_end values stay consistent. + */ + fuse_page_it_remove(range, &cache->ranges); + fuse_page_it_remove(next, &cache->ranges); + range->end = next->end; + fuse_page_it_insert(range, &cache->ranges); + kfree(next); + continue; + } } /* Move to next range */ @@ -182,8 +179,8 @@ static void fuse_dlm_try_merge(struct fuse_dlm_cache *cache, uint64_t start, * * Return: 0 on success, negative error code on failure */ -int fuse_dlm_lock_range(struct fuse_inode *inode, uint64_t start, - uint64_t end, enum fuse_page_lock_mode mode) +int fuse_dlm_lock_range(struct fuse_inode *inode, u64 start, u64 end, + enum fuse_page_lock_mode mode) { struct fuse_dlm_cache *cache = &inode->dlm_locked_areas; struct fuse_dlm_range *range, *new_range, *next; @@ -191,30 +188,41 @@ int fuse_dlm_lock_range(struct fuse_inode *inode, uint64_t start, int ret = 0; LIST_HEAD(to_lock); LIST_HEAD(to_upgrade); - uint64_t current_start = start; + u64 current_start = start; + bool reached_u64_max = false; - if (!cache || start > end) + if (start > end) return -EINVAL; - /* Convert to lock mode */ lock_mode = (mode == FUSE_PAGE_LOCK_READ) ? FUSE_PCACHE_LK_READ : - FUSE_PCACHE_LK_WRITE; + FUSE_PCACHE_LK_WRITE; down_write(&cache->lock); - /* Find all ranges that overlap with [start, end] */ range = fuse_page_it_iter_first(&cache->ranges, start, end); + + /* + * Fast path: an existing range already fully covers [start, end] + * with the same or a stronger mode. + */ + if (range && range->start <= start && range->end >= end) { + bool same_mode = (range->mode == lock_mode); + bool stronger_mode = (lock_mode == FUSE_PCACHE_LK_READ && + range->mode == FUSE_PCACHE_LK_WRITE); + + if (same_mode || stronger_mode) + goto out; + } + while (range) { - /* Get next overlapping range before we potentially modify the tree */ + /* Fetch next before we potentially modify the tree */ next = fuse_page_it_iter_next(range, start, end); - /* Check lock compatibility */ if (lock_mode == FUSE_PCACHE_LK_WRITE && lock_mode != range->mode) { /* we own the lock but have to update it. */ list_add_tail(&range->list, &to_upgrade); } - /* If WRITE lock already exists - nothing to do */ /* If there's a gap before this range, we need to add the missing range */ if (current_start < range->start) { @@ -232,15 +240,18 @@ int fuse_dlm_lock_range(struct fuse_inode *inode, uint64_t start, list_add_tail(&new_range->list, &to_lock); } - /* Move current_start past this range */ + /* Range reaches U64_MAX; no trailing gap is possible */ + if (range->end == U64_MAX) { + reached_u64_max = true; + break; + } current_start = max(current_start, range->end + 1); - /* Move to next range */ range = next; } /* If there's a gap after the last range to the end, extend the range */ - if (current_start <= end) { + if (!reached_u64_max && current_start <= end) { new_range = kmalloc(sizeof(*new_range), GFP_KERNEL); if (!new_range) { ret = -ENOMEM; @@ -255,41 +266,27 @@ int fuse_dlm_lock_range(struct fuse_inode *inode, uint64_t start, list_add_tail(&new_range->list, &to_lock); } - /* update locks, if any lock is in this list it has the wrong mode */ - list_for_each_entry(range, &to_upgrade, list) { - /* Update the lock mode */ + list_for_each_entry(range, &to_upgrade, list) range->mode = lock_mode; - } - /* Add all new ranges to the tree */ - list_for_each_entry(new_range, &to_lock, list) { - /* Add to interval tree */ + list_for_each_entry(new_range, &to_lock, list) fuse_page_it_insert(new_range, &cache->ranges); - } - /* Try to merge adjacent ranges with the same mode */ fuse_dlm_try_merge(cache, start, end); +out: up_write(&cache->lock); return 0; out_free: /* Free any ranges we allocated but didn't insert */ while (!list_empty(&to_lock)) { - new_range = - list_first_entry(&to_lock, struct fuse_dlm_range, list); + new_range = list_first_entry(&to_lock, struct fuse_dlm_range, + list); list_del(&new_range->list); kfree(new_range); } - /* Restore original lock modes for any partially upgraded locks */ - list_for_each_entry(range, &to_upgrade, list) { - if (lock_mode == FUSE_PCACHE_LK_WRITE) { - /* We upgraded this lock but failed later, downgrade it back */ - range->mode = FUSE_PCACHE_LK_READ; - } - } - up_write(&cache->lock); return ret; } @@ -304,13 +301,13 @@ int fuse_dlm_lock_range(struct fuse_inode *inode, uint64_t start, * * Return: 0 on success, negative error code on failure */ -static int fuse_dlm_punch_hole(struct fuse_dlm_cache *cache, uint64_t start, - uint64_t end) +static int fuse_dlm_punch_hole(struct fuse_dlm_cache *cache, u64 start, + u64 end) { struct fuse_dlm_range *range, *new_range; int ret = 0; - if (!cache || start > end) + if (start > end) return -EINVAL; /* Find a range that contains [start, end] */ @@ -322,13 +319,37 @@ static int fuse_dlm_punch_hole(struct fuse_dlm_cache *cache, uint64_t start, /* If the hole is at the beginning of the range */ if (start == range->start) { - range->start = end + 1; + if (end == U64_MAX) { + /* Hole goes to end of address space, remove entire range */ + fuse_page_it_remove(range, &cache->ranges); + kfree(range); + } else { + /* + * Mutating ->start changes the BST key; re-insert so + * ordering and augmented values stay consistent. + */ + fuse_page_it_remove(range, &cache->ranges); + range->start = end + 1; + fuse_page_it_insert(range, &cache->ranges); + } goto out; } /* If the hole is at the end of the range */ if (end == range->end) { - range->end = start - 1; + if (start == 0) { + /* Hole starts at 0, remove entire range */ + fuse_page_it_remove(range, &cache->ranges); + kfree(range); + } else { + /* + * Mutating ->end changes the augmented __subtree_end; + * re-insert so the augmented values stay consistent. + */ + fuse_page_it_remove(range, &cache->ranges); + range->end = start - 1; + fuse_page_it_insert(range, &cache->ranges); + } goto out; } @@ -367,16 +388,12 @@ static int fuse_dlm_punch_hole(struct fuse_dlm_cache *cache, uint64_t start, * * Return: 0 on success, negative error code on failure */ -int fuse_dlm_unlock_range(struct fuse_inode *inode, - uint64_t start, uint64_t end) +int fuse_dlm_unlock_range(struct fuse_inode *inode, u64 start, u64 end) { struct fuse_dlm_cache *cache = &inode->dlm_locked_areas; struct fuse_dlm_range *range, *next; int ret = 0; - if (!cache) - return -EINVAL; - if (start == 0 && end == 0) { fuse_dlm_cache_release_locks(inode); return 0; @@ -399,11 +416,24 @@ int fuse_dlm_unlock_range(struct fuse_inode *inode, /* After punching a hole, we're done */ break; } else if (start > range->start) { - /* Adjust the end of the range */ + /* + * Adjust the end of the range (start is > 0, so + * start - 1 is safe). Re-insert so the augmented + * __subtree_end values stay consistent. + */ + fuse_page_it_remove(range, &cache->ranges); range->end = start - 1; + fuse_page_it_insert(range, &cache->ranges); } else if (end < range->end) { - /* Adjust the start of the range */ + /* + * Adjust the start of the range (end is < U64_MAX, + * so end + 1 is safe). ->start is the BST key, so + * re-insert to keep ordering and augmented values + * consistent. + */ + fuse_page_it_remove(range, &cache->ranges); range->start = end + 1; + fuse_page_it_insert(range, &cache->ranges); } else { /* Complete overlap, remove the range */ fuse_page_it_remove(range, &cache->ranges); @@ -430,15 +460,16 @@ int fuse_dlm_unlock_range(struct fuse_inode *inode, * * Return: true if the entire range is locked, false otherwise */ -bool fuse_dlm_range_is_locked(struct fuse_inode *inode, uint64_t start, - uint64_t end, enum fuse_page_lock_mode mode) +bool fuse_dlm_range_is_locked(struct fuse_inode *inode, u64 start, u64 end, + enum fuse_page_lock_mode mode) { struct fuse_dlm_cache *cache = &inode->dlm_locked_areas; struct fuse_dlm_range *range; int lock_mode = 0; - uint64_t current_start = start; + u64 current_start = start; + bool ret = false; - if (!cache || start > end) + if (start > end) return false; /* Convert to lock mode if specified */ @@ -452,20 +483,34 @@ bool fuse_dlm_range_is_locked(struct fuse_inode *inode, uint64_t start, /* Find the first range that overlaps with [start, end] */ range = fuse_dlm_find_overlapping(cache, start, end); - /* Check if the entire range is covered */ + /* + * Fast path: check if a single range covers the entire request. + * This is common when we grant [0, U64_MAX] locks. + */ + if (range && range->start <= start && range->end >= end) { + /* Range fully covers [start, end] */ + if (!lock_mode || range->mode == lock_mode) { + ret = true; + goto out; + } + /* Wrong lock mode */ + goto out; + } + + /* Slow path: check if multiple ranges cover the request */ while (range && current_start <= end) { /* If we're checking for a specific mode, verify it matches */ - if (lock_mode && range->mode != lock_mode) { - /* Wrong lock mode */ - up_read(&cache->lock); - return false; - } + if (lock_mode && range->mode != lock_mode) + goto out; /* Check if there's a gap before this range */ - if (current_start < range->start) { - /* Found a gap */ - up_read(&cache->lock); - return false; + if (current_start < range->start) + goto out; + + if (range->end == U64_MAX) { + /* Range covers to end of address space */ + ret = true; + goto out; } /* Move current_start past this range */ @@ -476,14 +521,11 @@ bool fuse_dlm_range_is_locked(struct fuse_inode *inode, uint64_t start, } /* Check if we covered the entire range */ - if (current_start <= end) { - /* There's a gap at the end */ - up_read(&cache->lock); - return false; - } + ret = (current_start > end); +out: up_read(&cache->lock); - return true; + return ret; } /** @@ -497,7 +539,7 @@ void fuse_get_dlm_write_lock(struct file *file, loff_t offset, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_mount *fm = ff->fm; - uint64_t end = (offset + length - 1) | (PAGE_SIZE - 1); + u64 end = (offset + length - 1) | (PAGE_SIZE - 1); /* note that the offset and length don't have to be page aligned here * but since we only get here on writeback caching we will send out @@ -541,18 +583,16 @@ void fuse_get_dlm_write_lock(struct file *file, loff_t offset, if (err) return; - else - if (inarg.start < outarg.start || - inarg.end > outarg.end) { - /* fuse server is seriously broken */ - pr_warn("fuse: dlm lock request for %llu:%llu returned %llu:%llu bytes\n", - inarg.start, inarg.end, outarg.start, outarg.end); - fuse_abort_conn(fc); - return; - } else { - /* ignore any errors here, there is no way we can react appropriately */ - fuse_dlm_lock_range(fi, outarg.start, - outarg.end, - FUSE_PAGE_LOCK_WRITE); - } + + if (outarg.start > inarg.start || outarg.end < inarg.end) { + /* fuse server is seriously broken */ + pr_warn("fuse: dlm lock request for %llu:%llu returned %llu:%llu bytes\n", + inarg.start, inarg.end, outarg.start, outarg.end); + fuse_abort_conn(fc); + return; + } + + /* ignore any errors here, there is no way we can react appropriately */ + fuse_dlm_lock_range(fi, outarg.start, outarg.end, + FUSE_PAGE_LOCK_WRITE); } diff --git a/fs/fuse/fuse_dlm_cache.h b/fs/fuse/fuse_dlm_cache.h index 438d31d28b666e..0f61495e6f220b 100644 --- a/fs/fuse/fuse_dlm_cache.h +++ b/fs/fuse/fuse_dlm_cache.h @@ -32,16 +32,16 @@ int fuse_dlm_cache_init(struct fuse_inode *inode); void fuse_dlm_cache_release_locks(struct fuse_inode *inode); /* Lock a range of pages */ -int fuse_dlm_lock_range(struct fuse_inode *inode, uint64_t start, - uint64_t end, enum fuse_page_lock_mode mode); +int fuse_dlm_lock_range(struct fuse_inode *inode, u64 start, + u64 end, enum fuse_page_lock_mode mode); /* Unlock a range of pages */ -int fuse_dlm_unlock_range(struct fuse_inode *inode, uint64_t start, - uint64_t end); +int fuse_dlm_unlock_range(struct fuse_inode *inode, u64 start, + u64 end); /* Check if a page range is already locked */ -bool fuse_dlm_range_is_locked(struct fuse_inode *inode, uint64_t start, - uint64_t end, enum fuse_page_lock_mode mode); +bool fuse_dlm_range_is_locked(struct fuse_inode *inode, u64 start, + u64 end, enum fuse_page_lock_mode mode); /* this is the interface to the filesystem */ void fuse_get_dlm_write_lock(struct file *file, loff_t offset,