Skip to content

Commit bcface0

Browse files
author
Andreas Gruenbacher
committed
gfs2: Retries missing in gfs2_{rename,exchange}
JIRA: https://issues.redhat.com/browse/RHEL-135362 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git/ retry-rename Fix a bug in gfs2's asynchronous glock handling for rename and exchange operations. The original async implementation from commit ad26967 ("gfs2: Use async glocks for rename") mentioned that retries were needed but never implemented them, causing operations to fail with -ESTALE instead of retrying on timeout. Also makes the waiting interruptible. In addition, the timeouts used were too high for situations in which timing out is a rare but expected scenario. Switch to shorter timeouts with randomization and exponentional backoff. Fixes: ad26967 ("gfs2: Use async glocks for rename") Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> (cherry picked from commit a9f0fe18fe0b5b5b8f89b0f487e9aa045e0627bb)
1 parent 3063047 commit bcface0

File tree

3 files changed

+43
-14
lines changed

3 files changed

+43
-14
lines changed

fs/gfs2/glock.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,31 +1351,45 @@ static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs)
13511351
* gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
13521352
* @num_gh: the number of holders in the array
13531353
* @ghs: the glock holder array
1354+
* @retries: number of retries attempted so far
13541355
*
13551356
* Returns: 0 on success, meaning all glocks have been granted and are held.
13561357
* -ESTALE if the request timed out, meaning all glocks were released,
13571358
* and the caller should retry the operation.
13581359
*/
13591360

1360-
int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
1361+
int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs,
1362+
unsigned int retries)
13611363
{
13621364
struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd;
1363-
int i, ret = 0, timeout = 0;
13641365
unsigned long start_time = jiffies;
1366+
int i, ret = 0;
1367+
long timeout;
13651368

13661369
might_sleep();
1367-
/*
1368-
* Total up the (minimum hold time * 2) of all glocks and use that to
1369-
* determine the max amount of time we should wait.
1370-
*/
1371-
for (i = 0; i < num_gh; i++)
1372-
timeout += ghs[i].gh_gl->gl_hold_time << 1;
13731370

1374-
if (!wait_event_timeout(sdp->sd_async_glock_wait,
1371+
timeout = GL_GLOCK_MIN_HOLD;
1372+
if (retries) {
1373+
unsigned int max_shift;
1374+
long incr;
1375+
1376+
/* Add a random delay and increase the timeout exponentially. */
1377+
max_shift = BITS_PER_LONG - 2 - __ffs(GL_GLOCK_HOLD_INCR);
1378+
incr = min(GL_GLOCK_HOLD_INCR << min(retries - 1, max_shift),
1379+
10 * HZ - GL_GLOCK_MIN_HOLD);
1380+
schedule_timeout_interruptible(get_random_long() % (incr / 3));
1381+
if (signal_pending(current))
1382+
goto interrupted;
1383+
timeout += (incr / 3) + get_random_long() % (incr / 3);
1384+
}
1385+
1386+
if (!wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
13751387
!glocks_pending(num_gh, ghs), timeout)) {
13761388
ret = -ESTALE; /* request timed out. */
13771389
goto out;
13781390
}
1391+
if (signal_pending(current))
1392+
goto interrupted;
13791393

13801394
for (i = 0; i < num_gh; i++) {
13811395
struct gfs2_holder *gh = &ghs[i];
@@ -1399,6 +1413,10 @@ int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
13991413
}
14001414
}
14011415
return ret;
1416+
1417+
interrupted:
1418+
ret = -EINTR;
1419+
goto out;
14021420
}
14031421

14041422
/**

fs/gfs2/glock.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,8 @@ int gfs2_glock_poll(struct gfs2_holder *gh);
219219
int gfs2_instantiate(struct gfs2_holder *gh);
220220
int gfs2_glock_holder_ready(struct gfs2_holder *gh);
221221
int gfs2_glock_wait(struct gfs2_holder *gh);
222-
int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
222+
int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs,
223+
unsigned int retries);
223224
void gfs2_glock_dq(struct gfs2_holder *gh);
224225
void gfs2_glock_dq_wait(struct gfs2_holder *gh);
225226
void gfs2_glock_dq_uninit(struct gfs2_holder *gh);

fs/gfs2/inode.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1485,7 +1485,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
14851485
unsigned int num_gh;
14861486
int dir_rename = 0;
14871487
struct gfs2_diradd da = { .nr_blocks = 0, .save_loc = 0, };
1488-
unsigned int x;
1488+
unsigned int retries = 0, x;
14891489
int error;
14901490

14911491
gfs2_holder_mark_uninitialized(&r_gh);
@@ -1535,12 +1535,17 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
15351535
num_gh++;
15361536
}
15371537

1538+
again:
15381539
for (x = 0; x < num_gh; x++) {
15391540
error = gfs2_glock_nq(ghs + x);
15401541
if (error)
15411542
goto out_gunlock;
15421543
}
1543-
error = gfs2_glock_async_wait(num_gh, ghs);
1544+
error = gfs2_glock_async_wait(num_gh, ghs, retries);
1545+
if (error == -ESTALE) {
1546+
retries++;
1547+
goto again;
1548+
}
15441549
if (error)
15451550
goto out_gunlock;
15461551

@@ -1729,7 +1734,7 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
17291734
struct gfs2_sbd *sdp = GFS2_SB(odir);
17301735
struct gfs2_holder ghs[4], r_gh;
17311736
unsigned int num_gh;
1732-
unsigned int x;
1737+
unsigned int retries = 0, x;
17331738
umode_t old_mode = oip->i_inode.i_mode;
17341739
umode_t new_mode = nip->i_inode.i_mode;
17351740
int error;
@@ -1773,13 +1778,18 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
17731778
gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs + num_gh);
17741779
num_gh++;
17751780

1781+
again:
17761782
for (x = 0; x < num_gh; x++) {
17771783
error = gfs2_glock_nq(ghs + x);
17781784
if (error)
17791785
goto out_gunlock;
17801786
}
17811787

1782-
error = gfs2_glock_async_wait(num_gh, ghs);
1788+
error = gfs2_glock_async_wait(num_gh, ghs, retries);
1789+
if (error == -ESTALE) {
1790+
retries++;
1791+
goto again;
1792+
}
17831793
if (error)
17841794
goto out_gunlock;
17851795

0 commit comments

Comments
 (0)