NFSv4/flexfiles: Fix handling of NFS level errors in I/O

[ Upstream commit 38074de35b015df5623f524d6f2b49a0cd395c40 ]

Allow the flexfiles error handling to recognise NFS level errors (as
opposed to RPC level errors) and handle them separately. The main
motivator is the NFSERR_PERM errors that get returned if the NFS client
connects to the data server through a port number that is lower than
1024. In that case, the client should disconnect and retry a READ on a
different data server, or it should retry a WRITE after reconnecting.

Reviewed-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Fixes: d67ae825a5 ("pnfs/flexfiles: Add the FlexFile Layout Driver")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Trond Myklebust
2025-06-19 15:16:11 -04:00
committed by Greg Kroah-Hartman
parent e3eed01347
commit 204bdc7a8b

View File

@@ -1096,6 +1096,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
} }
static int ff_layout_async_handle_error_v4(struct rpc_task *task, static int ff_layout_async_handle_error_v4(struct rpc_task *task,
u32 op_status,
struct nfs4_state *state, struct nfs4_state *state,
struct nfs_client *clp, struct nfs_client *clp,
struct pnfs_layout_segment *lseg, struct pnfs_layout_segment *lseg,
@@ -1106,32 +1107,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
switch (task->tk_status) { switch (op_status) {
case -NFS4ERR_BADSESSION: case NFS4_OK:
case -NFS4ERR_BADSLOT: case NFS4ERR_NXIO:
case -NFS4ERR_BAD_HIGH_SLOT: break;
case -NFS4ERR_DEADSESSION: case NFSERR_PERM:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: if (!task->tk_xprt)
case -NFS4ERR_SEQ_FALSE_RETRY: break;
case -NFS4ERR_SEQ_MISORDERED: xprt_force_disconnect(task->tk_xprt);
goto out_retry;
case NFS4ERR_BADSESSION:
case NFS4ERR_BADSLOT:
case NFS4ERR_BAD_HIGH_SLOT:
case NFS4ERR_DEADSESSION:
case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case NFS4ERR_SEQ_FALSE_RETRY:
case NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid " dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status, "flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags); clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
break; goto out_retry;
case -NFS4ERR_DELAY: case NFS4ERR_DELAY:
case -NFS4ERR_GRACE: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
fallthrough;
case NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
break; goto out_retry;
case -NFS4ERR_RETRY_UNCACHED_REP: case NFS4ERR_RETRY_UNCACHED_REP:
break; goto out_retry;
/* Invalidate Layout errors */ /* Invalidate Layout errors */
case -NFS4ERR_PNFS_NO_LAYOUT: case NFS4ERR_PNFS_NO_LAYOUT:
case -ESTALE: /* mapped NFS4ERR_STALE */ case NFS4ERR_STALE:
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ case NFS4ERR_BADHANDLE:
case -EISDIR: /* mapped NFS4ERR_ISDIR */ case NFS4ERR_ISDIR:
case -NFS4ERR_FHEXPIRED: case NFS4ERR_FHEXPIRED:
case -NFS4ERR_WRONG_TYPE: case NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__, dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status); task->tk_status);
/* /*
@@ -1144,6 +1155,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
pnfs_destroy_layout(NFS_I(inode)); pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq); rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset; goto reset;
default:
break;
}
switch (task->tk_status) {
/* RPC connection errors */ /* RPC connection errors */
case -ECONNREFUSED: case -ECONNREFUSED:
case -EHOSTDOWN: case -EHOSTDOWN:
@@ -1159,26 +1175,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client, nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid); &devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq); rpc_wake_up(&tbl->slot_tbl_waitq);
fallthrough; break;
default: default:
break;
}
if (ff_layout_avoid_mds_available_ds(lseg)) if (ff_layout_avoid_mds_available_ds(lseg))
return -NFS4ERR_RESET_TO_PNFS; return -NFS4ERR_RESET_TO_PNFS;
reset: reset:
dprintk("%s Retry through MDS. Error %d\n", __func__, dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status); task->tk_status);
return -NFS4ERR_RESET_TO_MDS; return -NFS4ERR_RESET_TO_MDS;
}
out_retry:
task->tk_status = 0; task->tk_status = 0;
return -EAGAIN; return -EAGAIN;
} }
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task, static int ff_layout_async_handle_error_v3(struct rpc_task *task,
u32 op_status,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg, struct pnfs_layout_segment *lseg,
u32 idx) u32 idx)
{ {
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
switch (op_status) {
case NFS_OK:
case NFSERR_NXIO:
break;
case NFSERR_PERM:
if (!task->tk_xprt)
break;
xprt_force_disconnect(task->tk_xprt);
goto out_retry;
case NFSERR_ACCES:
case NFSERR_BADHANDLE:
case NFSERR_FBIG:
case NFSERR_IO:
case NFSERR_NOSPC:
case NFSERR_ROFS:
case NFSERR_STALE:
goto out_reset_to_pnfs;
case NFSERR_JUKEBOX:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
goto out_retry;
default:
break;
}
switch (task->tk_status) { switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */ /* File access problems. Don't mark the device as unavailable */
case -EACCES: case -EACCES:
@@ -1197,6 +1243,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client, nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid); &devid->deviceid);
} }
out_reset_to_pnfs:
/* FIXME: Need to prevent infinite looping here. */ /* FIXME: Need to prevent infinite looping here. */
return -NFS4ERR_RESET_TO_PNFS; return -NFS4ERR_RESET_TO_PNFS;
out_retry: out_retry:
@@ -1207,6 +1254,7 @@ out_retry:
} }
static int ff_layout_async_handle_error(struct rpc_task *task, static int ff_layout_async_handle_error(struct rpc_task *task,
u32 op_status,
struct nfs4_state *state, struct nfs4_state *state,
struct nfs_client *clp, struct nfs_client *clp,
struct pnfs_layout_segment *lseg, struct pnfs_layout_segment *lseg,
@@ -1225,10 +1273,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) { switch (vers) {
case 3: case 3:
return ff_layout_async_handle_error_v3(task, lseg, idx); return ff_layout_async_handle_error_v3(task, op_status, clp,
case 4:
return ff_layout_async_handle_error_v4(task, state, clp,
lseg, idx); lseg, idx);
case 4:
return ff_layout_async_handle_error_v4(task, op_status, state,
clp, lseg, idx);
default: default:
/* should never happen */ /* should never happen */
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
@@ -1281,6 +1330,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
switch (status) { switch (status) {
case NFS4ERR_DELAY: case NFS4ERR_DELAY:
case NFS4ERR_GRACE: case NFS4ERR_GRACE:
case NFS4ERR_PERM:
break; break;
case NFS4ERR_NXIO: case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx); ff_layout_mark_ds_unreachable(lseg, idx);
@@ -1313,7 +1363,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
trace_ff_layout_read_error(hdr); trace_ff_layout_read_error(hdr);
} }
err = ff_layout_async_handle_error(task, hdr->args.context->state, err = ff_layout_async_handle_error(task, hdr->res.op_status,
hdr->args.context->state,
hdr->ds_clp, hdr->lseg, hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx); hdr->pgio_mirror_idx);
@@ -1483,7 +1534,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
trace_ff_layout_write_error(hdr); trace_ff_layout_write_error(hdr);
} }
err = ff_layout_async_handle_error(task, hdr->args.context->state, err = ff_layout_async_handle_error(task, hdr->res.op_status,
hdr->args.context->state,
hdr->ds_clp, hdr->lseg, hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx); hdr->pgio_mirror_idx);
@@ -1529,8 +1581,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
trace_ff_layout_commit_error(data); trace_ff_layout_commit_error(data);
} }
err = ff_layout_async_handle_error(task, NULL, data->ds_clp, err = ff_layout_async_handle_error(task, data->res.op_status,
data->lseg, data->ds_commit_index); NULL, data->ds_clp, data->lseg,
data->ds_commit_index);
trace_nfs4_pnfs_commit_ds(data, err); trace_nfs4_pnfs_commit_ds(data, err);
switch (err) { switch (err) {