NFSv4/flexfiles: Fix handling of NFS level errors in I/O
[ Upstream commit 38074de35b015df5623f524d6f2b49a0cd395c40 ]
Allow the flexfiles error handling to recognise NFS level errors (as
opposed to RPC level errors) and handle them separately. The main
motivator is the NFSERR_PERM errors that get returned if the NFS client
connects to the data server through a port number that is lower than
1024. In that case, the client should disconnect and retry a READ on a
different data server, or it should retry a WRITE after reconnecting.
Reviewed-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Fixes: d67ae825a5
("pnfs/flexfiles: Add the FlexFile Layout Driver")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
e3eed01347
commit
204bdc7a8b
@@ -1096,6 +1096,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
|
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
|
||||||
|
u32 op_status,
|
||||||
struct nfs4_state *state,
|
struct nfs4_state *state,
|
||||||
struct nfs_client *clp,
|
struct nfs_client *clp,
|
||||||
struct pnfs_layout_segment *lseg,
|
struct pnfs_layout_segment *lseg,
|
||||||
@@ -1106,32 +1107,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
|
|||||||
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
|
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
|
||||||
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
|
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
|
||||||
|
|
||||||
switch (task->tk_status) {
|
switch (op_status) {
|
||||||
case -NFS4ERR_BADSESSION:
|
case NFS4_OK:
|
||||||
case -NFS4ERR_BADSLOT:
|
case NFS4ERR_NXIO:
|
||||||
case -NFS4ERR_BAD_HIGH_SLOT:
|
break;
|
||||||
case -NFS4ERR_DEADSESSION:
|
case NFSERR_PERM:
|
||||||
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
|
if (!task->tk_xprt)
|
||||||
case -NFS4ERR_SEQ_FALSE_RETRY:
|
break;
|
||||||
case -NFS4ERR_SEQ_MISORDERED:
|
xprt_force_disconnect(task->tk_xprt);
|
||||||
|
goto out_retry;
|
||||||
|
case NFS4ERR_BADSESSION:
|
||||||
|
case NFS4ERR_BADSLOT:
|
||||||
|
case NFS4ERR_BAD_HIGH_SLOT:
|
||||||
|
case NFS4ERR_DEADSESSION:
|
||||||
|
case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
|
||||||
|
case NFS4ERR_SEQ_FALSE_RETRY:
|
||||||
|
case NFS4ERR_SEQ_MISORDERED:
|
||||||
dprintk("%s ERROR %d, Reset session. Exchangeid "
|
dprintk("%s ERROR %d, Reset session. Exchangeid "
|
||||||
"flags 0x%x\n", __func__, task->tk_status,
|
"flags 0x%x\n", __func__, task->tk_status,
|
||||||
clp->cl_exchange_flags);
|
clp->cl_exchange_flags);
|
||||||
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
|
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
|
||||||
break;
|
goto out_retry;
|
||||||
case -NFS4ERR_DELAY:
|
case NFS4ERR_DELAY:
|
||||||
case -NFS4ERR_GRACE:
|
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
|
||||||
|
fallthrough;
|
||||||
|
case NFS4ERR_GRACE:
|
||||||
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
|
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
|
||||||
break;
|
goto out_retry;
|
||||||
case -NFS4ERR_RETRY_UNCACHED_REP:
|
case NFS4ERR_RETRY_UNCACHED_REP:
|
||||||
break;
|
goto out_retry;
|
||||||
/* Invalidate Layout errors */
|
/* Invalidate Layout errors */
|
||||||
case -NFS4ERR_PNFS_NO_LAYOUT:
|
case NFS4ERR_PNFS_NO_LAYOUT:
|
||||||
case -ESTALE: /* mapped NFS4ERR_STALE */
|
case NFS4ERR_STALE:
|
||||||
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
|
case NFS4ERR_BADHANDLE:
|
||||||
case -EISDIR: /* mapped NFS4ERR_ISDIR */
|
case NFS4ERR_ISDIR:
|
||||||
case -NFS4ERR_FHEXPIRED:
|
case NFS4ERR_FHEXPIRED:
|
||||||
case -NFS4ERR_WRONG_TYPE:
|
case NFS4ERR_WRONG_TYPE:
|
||||||
dprintk("%s Invalid layout error %d\n", __func__,
|
dprintk("%s Invalid layout error %d\n", __func__,
|
||||||
task->tk_status);
|
task->tk_status);
|
||||||
/*
|
/*
|
||||||
@@ -1144,6 +1155,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
|
|||||||
pnfs_destroy_layout(NFS_I(inode));
|
pnfs_destroy_layout(NFS_I(inode));
|
||||||
rpc_wake_up(&tbl->slot_tbl_waitq);
|
rpc_wake_up(&tbl->slot_tbl_waitq);
|
||||||
goto reset;
|
goto reset;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (task->tk_status) {
|
||||||
/* RPC connection errors */
|
/* RPC connection errors */
|
||||||
case -ECONNREFUSED:
|
case -ECONNREFUSED:
|
||||||
case -EHOSTDOWN:
|
case -EHOSTDOWN:
|
||||||
@@ -1159,26 +1175,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
|
|||||||
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
|
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
|
||||||
&devid->deviceid);
|
&devid->deviceid);
|
||||||
rpc_wake_up(&tbl->slot_tbl_waitq);
|
rpc_wake_up(&tbl->slot_tbl_waitq);
|
||||||
fallthrough;
|
break;
|
||||||
default:
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (ff_layout_avoid_mds_available_ds(lseg))
|
if (ff_layout_avoid_mds_available_ds(lseg))
|
||||||
return -NFS4ERR_RESET_TO_PNFS;
|
return -NFS4ERR_RESET_TO_PNFS;
|
||||||
reset:
|
reset:
|
||||||
dprintk("%s Retry through MDS. Error %d\n", __func__,
|
dprintk("%s Retry through MDS. Error %d\n", __func__,
|
||||||
task->tk_status);
|
task->tk_status);
|
||||||
return -NFS4ERR_RESET_TO_MDS;
|
return -NFS4ERR_RESET_TO_MDS;
|
||||||
}
|
|
||||||
|
out_retry:
|
||||||
task->tk_status = 0;
|
task->tk_status = 0;
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
|
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
|
||||||
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
|
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
|
||||||
|
u32 op_status,
|
||||||
|
struct nfs_client *clp,
|
||||||
struct pnfs_layout_segment *lseg,
|
struct pnfs_layout_segment *lseg,
|
||||||
u32 idx)
|
u32 idx)
|
||||||
{
|
{
|
||||||
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
|
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
|
||||||
|
|
||||||
|
switch (op_status) {
|
||||||
|
case NFS_OK:
|
||||||
|
case NFSERR_NXIO:
|
||||||
|
break;
|
||||||
|
case NFSERR_PERM:
|
||||||
|
if (!task->tk_xprt)
|
||||||
|
break;
|
||||||
|
xprt_force_disconnect(task->tk_xprt);
|
||||||
|
goto out_retry;
|
||||||
|
case NFSERR_ACCES:
|
||||||
|
case NFSERR_BADHANDLE:
|
||||||
|
case NFSERR_FBIG:
|
||||||
|
case NFSERR_IO:
|
||||||
|
case NFSERR_NOSPC:
|
||||||
|
case NFSERR_ROFS:
|
||||||
|
case NFSERR_STALE:
|
||||||
|
goto out_reset_to_pnfs;
|
||||||
|
case NFSERR_JUKEBOX:
|
||||||
|
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
|
||||||
|
goto out_retry;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
switch (task->tk_status) {
|
switch (task->tk_status) {
|
||||||
/* File access problems. Don't mark the device as unavailable */
|
/* File access problems. Don't mark the device as unavailable */
|
||||||
case -EACCES:
|
case -EACCES:
|
||||||
@@ -1197,6 +1243,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
|
|||||||
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
|
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
|
||||||
&devid->deviceid);
|
&devid->deviceid);
|
||||||
}
|
}
|
||||||
|
out_reset_to_pnfs:
|
||||||
/* FIXME: Need to prevent infinite looping here. */
|
/* FIXME: Need to prevent infinite looping here. */
|
||||||
return -NFS4ERR_RESET_TO_PNFS;
|
return -NFS4ERR_RESET_TO_PNFS;
|
||||||
out_retry:
|
out_retry:
|
||||||
@@ -1207,6 +1254,7 @@ out_retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int ff_layout_async_handle_error(struct rpc_task *task,
|
static int ff_layout_async_handle_error(struct rpc_task *task,
|
||||||
|
u32 op_status,
|
||||||
struct nfs4_state *state,
|
struct nfs4_state *state,
|
||||||
struct nfs_client *clp,
|
struct nfs_client *clp,
|
||||||
struct pnfs_layout_segment *lseg,
|
struct pnfs_layout_segment *lseg,
|
||||||
@@ -1225,10 +1273,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
|
|||||||
|
|
||||||
switch (vers) {
|
switch (vers) {
|
||||||
case 3:
|
case 3:
|
||||||
return ff_layout_async_handle_error_v3(task, lseg, idx);
|
return ff_layout_async_handle_error_v3(task, op_status, clp,
|
||||||
case 4:
|
|
||||||
return ff_layout_async_handle_error_v4(task, state, clp,
|
|
||||||
lseg, idx);
|
lseg, idx);
|
||||||
|
case 4:
|
||||||
|
return ff_layout_async_handle_error_v4(task, op_status, state,
|
||||||
|
clp, lseg, idx);
|
||||||
default:
|
default:
|
||||||
/* should never happen */
|
/* should never happen */
|
||||||
WARN_ON_ONCE(1);
|
WARN_ON_ONCE(1);
|
||||||
@@ -1281,6 +1330,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
|
|||||||
switch (status) {
|
switch (status) {
|
||||||
case NFS4ERR_DELAY:
|
case NFS4ERR_DELAY:
|
||||||
case NFS4ERR_GRACE:
|
case NFS4ERR_GRACE:
|
||||||
|
case NFS4ERR_PERM:
|
||||||
break;
|
break;
|
||||||
case NFS4ERR_NXIO:
|
case NFS4ERR_NXIO:
|
||||||
ff_layout_mark_ds_unreachable(lseg, idx);
|
ff_layout_mark_ds_unreachable(lseg, idx);
|
||||||
@@ -1313,7 +1363,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
|
|||||||
trace_ff_layout_read_error(hdr);
|
trace_ff_layout_read_error(hdr);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = ff_layout_async_handle_error(task, hdr->args.context->state,
|
err = ff_layout_async_handle_error(task, hdr->res.op_status,
|
||||||
|
hdr->args.context->state,
|
||||||
hdr->ds_clp, hdr->lseg,
|
hdr->ds_clp, hdr->lseg,
|
||||||
hdr->pgio_mirror_idx);
|
hdr->pgio_mirror_idx);
|
||||||
|
|
||||||
@@ -1483,7 +1534,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
|
|||||||
trace_ff_layout_write_error(hdr);
|
trace_ff_layout_write_error(hdr);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = ff_layout_async_handle_error(task, hdr->args.context->state,
|
err = ff_layout_async_handle_error(task, hdr->res.op_status,
|
||||||
|
hdr->args.context->state,
|
||||||
hdr->ds_clp, hdr->lseg,
|
hdr->ds_clp, hdr->lseg,
|
||||||
hdr->pgio_mirror_idx);
|
hdr->pgio_mirror_idx);
|
||||||
|
|
||||||
@@ -1529,8 +1581,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
|
|||||||
trace_ff_layout_commit_error(data);
|
trace_ff_layout_commit_error(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
|
err = ff_layout_async_handle_error(task, data->res.op_status,
|
||||||
data->lseg, data->ds_commit_index);
|
NULL, data->ds_clp, data->lseg,
|
||||||
|
data->ds_commit_index);
|
||||||
|
|
||||||
trace_nfs4_pnfs_commit_ds(data, err);
|
trace_nfs4_pnfs_commit_ds(data, err);
|
||||||
switch (err) {
|
switch (err) {
|
||||||
|
Reference in New Issue
Block a user