Skip to content

Commit

Permalink
Grab the rangelock unconditionally in zfs_getpages()
Browse files Browse the repository at this point in the history
As a deadlock avoidance measure, zfs_getpages() would only try to
acquire a rangelock, falling back to a single-page read if this was not
possible.  However, this is incompatible with direct I/O.

Instead, release the busy lock before trying to acquire the rangelock in
blocking mode.  This means that it's possible for the page to be
replaced, so we have to re-lookup.

Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Brian Atkinson <[email protected]>
Signed-off-by: Mark Johnston <[email protected]>
Closes #16643
  • Loading branch information
markjdb authored and behlendorf committed Nov 13, 2024
1 parent 25eb538 commit 1786825
Showing 1 changed file with 51 additions and 17 deletions.
68 changes: 51 additions & 17 deletions module/os/freebsd/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -3930,6 +3930,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
return (zfs_vm_pagerret_error);

object = ma[0]->object;
start = IDX_TO_OFF(ma[0]->pindex);
end = IDX_TO_OFF(ma[count - 1]->pindex + 1);

Expand All @@ -3938,33 +3939,47 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
* Note that we need to handle the case of the block size growing.
*/
for (;;) {
uint64_t len;

blksz = zp->z_blksz;
len = roundup(end, blksz) - rounddown(start, blksz);

lr = zfs_rangelock_tryenter(&zp->z_rangelock,
rounddown(start, blksz),
roundup(end, blksz) - rounddown(start, blksz), RL_READER);
rounddown(start, blksz), len, RL_READER);
if (lr == NULL) {
if (rahead != NULL) {
*rahead = 0;
rahead = NULL;
}
if (rbehind != NULL) {
*rbehind = 0;
rbehind = NULL;
/*
* Avoid a deadlock with update_pages(). We need to
* hold the range lock when copying from the DMU, so
* give up the busy lock to allow update_pages() to
* proceed. We might need to allocate new pages, which
* isn't quite right since this allocation isn't subject
* to the page fault handler's OOM logic, but this is
* the best we can do for now.
*/
for (int i = 0; i < count; i++) {
ASSERT(vm_page_none_valid(ma[i]));
vm_page_xunbusy(ma[i]);
}
break;

lr = zfs_rangelock_enter(&zp->z_rangelock,
rounddown(start, blksz), len, RL_READER);

zfs_vmobject_wlock(object);
(void) vm_page_grab_pages(object, OFF_TO_IDX(start),
VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_ZERO,
ma, count);
zfs_vmobject_wunlock(object);
}
if (blksz == zp->z_blksz)
break;
zfs_rangelock_exit(lr);
}

object = ma[0]->object;
zfs_vmobject_wlock(object);
obj_size = object->un_pager.vnp.vnp_size;
zfs_vmobject_wunlock(object);
if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
if (lr != NULL)
zfs_rangelock_exit(lr);
zfs_rangelock_exit(lr);
zfs_exit(zfsvfs, FTAG);
return (zfs_vm_pagerret_bad);
}
Expand All @@ -3989,11 +4004,30 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
* ZFS will panic if we request DMU to read beyond the end of the last
* allocated block.
*/
error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
&pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
for (int i = 0; i < count; i++) {
int count1, j, last_size;

if (lr != NULL)
zfs_rangelock_exit(lr);
if (vm_page_any_valid(ma[i])) {
ASSERT(vm_page_all_valid(ma[i]));
continue;
}
for (j = i + 1; j < count; j++) {
if (vm_page_any_valid(ma[j])) {
ASSERT(vm_page_all_valid(ma[j]));
break;
}
}
count1 = j - i;
last_size = j == count ?
MIN(end, obj_size) - (end - PAGE_SIZE) : PAGE_SIZE;
error = dmu_read_pages(zfsvfs->z_os, zp->z_id, &ma[i], count1,
i == 0 ? &pgsin_b : NULL, j == count ? &pgsin_a : NULL,
last_size);
if (error != 0)
break;
}

zfs_rangelock_exit(lr);
ZFS_ACCESSTIME_STAMP(zfsvfs, zp);

dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
Expand Down

0 comments on commit 1786825

Please sign in to comment.