Merge tag 'nfs-for-4.7-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

Pull NFS client updates from Anna Schumaker:
 "Highlights include:

  Features:
   - Add support for the NFS v4.2 COPY operation
   - Add support for NFS/RDMA over IPv6

  Bugfixes and cleanups:
   - Avoid race that crashes nfs_init_commit()
   - Fix oops in callback path
   - Fix LOCK/OPEN race when unlinking an open file
   - Choose correct stateids when using delegations in setattr, read and
     write
   - Don't send empty SETATTR after OPEN_CREATE
   - xprtrdma: Prevent server from writing a reply into memory client
     has released
   - xprtrdma: Support using Read list and Reply chunk in one RPC call"

* tag 'nfs-for-4.7-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (61 commits)
  pnfs: pnfs_update_layout needs to consider if strict iomode checking is on
  nfs/flexfiles: Use the layout segment for reading unless it a IOMODE_RW and reading is disabled
  nfs/flexfiles: Helper function to detect FF_FLAGS_NO_READ_IO
  nfs: avoid race that crashes nfs_init_commit
  NFS: checking for NULL instead of IS_ERR() in nfs_commit_file()
  pnfs: make pnfs_layout_process more robust
  pnfs: rework LAYOUTGET retry handling
  pnfs: lift retry logic from send_layoutget to pnfs_update_layout
  pnfs: fix bad error handling in send_layoutget
  flexfiles: add kerneldoc header to nfs4_ff_layout_prepare_ds
  flexfiles: remove pointless setting of NFS_LAYOUT_RETURN_REQUESTED
  pnfs: only tear down lsegs that precede seqid in LAYOUTRETURN args
  pnfs: keep track of the return sequence number in pnfs_layout_hdr
  pnfs: record sequence in pnfs_layout_segment when it's created
  pnfs: don't merge new ff lsegs with ones that have LAYOUTRETURN bit set
  pNFS/flexfiles: When initing reads or writes, we might have to retry connecting to DSes
  pNFS/flexfiles: When checking for available DSes, conditionally check for MDS io
  pNFS/flexfile: Fix erroneous fall back to read/write through the MDS
  NFS: Reclaim writes via writepage are opportunistic
  NFSv4: Use the right stateid for delegations in setattr, read and write
  ...
This commit is contained in:
Linus Torvalds
2016-05-26 10:33:33 -07:00
49 changed files with 1758 additions and 893 deletions

View File

@@ -74,6 +74,17 @@
#define NFS4_POLL_RETRY_MIN (HZ/10)
#define NFS4_POLL_RETRY_MAX (15*HZ)
/* file attributes which can be mapped to nfs attributes */
#define NFS4_VALID_ATTRS (ATTR_MODE \
| ATTR_UID \
| ATTR_GID \
| ATTR_SIZE \
| ATTR_ATIME \
| ATTR_MTIME \
| ATTR_CTIME \
| ATTR_ATIME_SET \
| ATTR_MTIME_SET)
struct nfs4_opendata;
static int _nfs4_proc_open(struct nfs4_opendata *data);
static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
@@ -416,6 +427,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
case -NFS4ERR_GRACE:
case -NFS4ERR_RECALLCONFLICT:
exception->delay = 1;
return 0;
@@ -2558,15 +2570,20 @@ static int _nfs4_do_open(struct inode *dir,
if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) &&
(opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) {
nfs4_exclusive_attrset(opendata, sattr, &label);
nfs_fattr_init(opendata->o_res.f_attr);
status = nfs4_do_setattr(state->inode, cred,
opendata->o_res.f_attr, sattr,
state, label, olabel);
if (status == 0) {
nfs_setattr_update_inode(state->inode, sattr,
opendata->o_res.f_attr);
nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
/*
* send create attributes which was not set by open
* with an extra setattr.
*/
if (sattr->ia_valid & NFS4_VALID_ATTRS) {
nfs_fattr_init(opendata->o_res.f_attr);
status = nfs4_do_setattr(state->inode, cred,
opendata->o_res.f_attr, sattr,
state, label, olabel);
if (status == 0) {
nfs_setattr_update_inode(state->inode, sattr,
opendata->o_res.f_attr);
nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
}
}
}
if (opened && opendata->file_created)
@@ -2676,6 +2693,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
.rpc_resp = &res,
.rpc_cred = cred,
};
struct rpc_cred *delegation_cred = NULL;
unsigned long timestamp = jiffies;
fmode_t fmode;
bool truncate;
@@ -2691,7 +2709,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
truncate = (sattr->ia_valid & ATTR_SIZE) ? true : false;
fmode = truncate ? FMODE_WRITE : FMODE_READ;
if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) {
if (nfs4_copy_delegation_stateid(inode, fmode, &arg.stateid, &delegation_cred)) {
/* Use that stateid */
} else if (truncate && state != NULL) {
struct nfs_lockowner lockowner = {
@@ -2700,13 +2718,17 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
};
if (!nfs4_valid_open_stateid(state))
return -EBADF;
if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
&lockowner) == -EIO)
if (nfs4_select_rw_stateid(state, FMODE_WRITE, &lockowner,
&arg.stateid, &delegation_cred) == -EIO)
return -EBADF;
} else
nfs4_stateid_copy(&arg.stateid, &zero_stateid);
if (delegation_cred)
msg.rpc_cred = delegation_cred;
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
put_rpccred(delegation_cred);
if (status == 0 && state != NULL)
renew_lease(server, timestamp);
trace_nfs4_setattr(inode, &arg.stateid, status);
@@ -4285,7 +4307,7 @@ int nfs4_set_rw_stateid(nfs4_stateid *stateid,
if (l_ctx != NULL)
lockowner = &l_ctx->lockowner;
return nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner);
return nfs4_select_rw_stateid(ctx->state, fmode, lockowner, stateid, NULL);
}
EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid);
@@ -6054,6 +6076,7 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs4_state_owner *sp = state->owner;
unsigned char fl_flags = request->fl_flags;
int status = -ENOLCK;
@@ -6068,6 +6091,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
status = do_vfs_lock(state->inode, request);
if (status < 0)
goto out;
mutex_lock(&sp->so_delegreturn_mutex);
down_read(&nfsi->rwsem);
if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
/* Yes: cache locks! */
@@ -6075,9 +6099,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
request->fl_flags = fl_flags & ~FL_SLEEP;
status = do_vfs_lock(state->inode, request);
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
goto out;
}
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
out:
request->fl_flags = fl_flags;
@@ -7351,9 +7377,11 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
* always set csa_cachethis to FALSE because the current implementation
* of the back channel DRC only supports caching the CB_SEQUENCE operation.
*/
static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args,
struct rpc_clnt *clnt)
{
unsigned int max_rqst_sz, max_resp_sz;
unsigned int max_bc_payload = rpc_max_bc_payload(clnt);
max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead;
max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead;
@@ -7371,8 +7399,8 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
args->fc_attrs.max_ops, args->fc_attrs.max_reqs);
/* Back channel attributes */
args->bc_attrs.max_rqst_sz = PAGE_SIZE;
args->bc_attrs.max_resp_sz = PAGE_SIZE;
args->bc_attrs.max_rqst_sz = max_bc_payload;
args->bc_attrs.max_resp_sz = max_bc_payload;
args->bc_attrs.max_resp_sz_cached = 0;
args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS;
args->bc_attrs.max_reqs = NFS41_BC_MAX_CALLBACKS;
@@ -7476,7 +7504,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp,
};
int status;
nfs4_init_channel_attrs(&args);
nfs4_init_channel_attrs(&args, clp->cl_rpcclient);
args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN);
status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
@@ -7820,40 +7848,34 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
struct nfs4_layoutget *lgp = calldata;
struct nfs_server *server = NFS_SERVER(lgp->args.inode);
struct nfs4_session *session = nfs4_get_session(server);
int ret;
dprintk("--> %s\n", __func__);
/* Note the is a race here, where a CB_LAYOUTRECALL can come in
* right now covering the LAYOUTGET we are about to send.
* However, that is not so catastrophic, and there seems
* to be no way to prevent it completely.
*/
if (nfs41_setup_sequence(session, &lgp->args.seq_args,
&lgp->res.seq_res, task))
return;
ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid,
NFS_I(lgp->args.inode)->layout,
&lgp->args.range,
lgp->args.ctx->state);
if (ret < 0)
rpc_exit(task, ret);
nfs41_setup_sequence(session, &lgp->args.seq_args,
&lgp->res.seq_res, task);
dprintk("<-- %s\n", __func__);
}
static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
dprintk("--> %s\n", __func__);
nfs41_sequence_done(task, &lgp->res.seq_res);
dprintk("<-- %s\n", __func__);
}
static int
nfs4_layoutget_handle_exception(struct rpc_task *task,
struct nfs4_layoutget *lgp, struct nfs4_exception *exception)
{
struct inode *inode = lgp->args.inode;
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
struct nfs4_state *state = NULL;
unsigned long timeo, now, giveup;
int status = task->tk_status;
dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
if (!nfs41_sequence_done(task, &lgp->res.seq_res))
goto out;
switch (task->tk_status) {
switch (status) {
case 0:
goto out;
@@ -7863,57 +7885,43 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
* retry go inband.
*/
case -NFS4ERR_LAYOUTUNAVAILABLE:
task->tk_status = -ENODATA;
status = -ENODATA;
goto out;
/*
* NFS4ERR_BADLAYOUT means the MDS cannot return a layout of
* length lgp->args.minlength != 0 (see RFC5661 section 18.43.3).
*/
case -NFS4ERR_BADLAYOUT:
goto out_overflow;
status = -EOVERFLOW;
goto out;
/*
* NFS4ERR_LAYOUTTRYLATER is a conflict with another client
* (or clients) writing to the same RAID stripe except when
* the minlength argument is 0 (see RFC5661 section 18.43.3).
*
* Treat it like we would RECALLCONFLICT -- we retry for a little
* while, and then eventually give up.
*/
case -NFS4ERR_LAYOUTTRYLATER:
if (lgp->args.minlength == 0)
goto out_overflow;
/*
* NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
* existing layout before getting a new one).
*/
case -NFS4ERR_RECALLCONFLICT:
timeo = rpc_get_timeout(task->tk_client);
giveup = lgp->args.timestamp + timeo;
now = jiffies;
if (time_after(giveup, now)) {
unsigned long delay;
/* Delay for:
* - Not less then NFS4_POLL_RETRY_MIN.
* - One last time a jiffie before we give up
* - exponential backoff (time_now minus start_attempt)
*/
delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
min((giveup - now - 1),
now - lgp->args.timestamp));
dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
__func__, delay);
rpc_delay(task, delay);
/* Do not call nfs4_async_handle_error() */
goto out_restart;
if (lgp->args.minlength == 0) {
status = -EOVERFLOW;
goto out;
}
break;
/* Fallthrough */
case -NFS4ERR_RECALLCONFLICT:
nfs4_handle_exception(server, -NFS4ERR_RECALLCONFLICT,
exception);
status = -ERECALLCONFLICT;
goto out;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
exception->timeout = 0;
spin_lock(&inode->i_lock);
if (nfs4_stateid_match(&lgp->args.stateid,
&lgp->args.ctx->state->stateid)) {
spin_unlock(&inode->i_lock);
/* If the open stateid was bad, then recover it. */
state = lgp->args.ctx->state;
exception->state = lgp->args.ctx->state;
break;
}
lo = NFS_I(inode)->layout;
@@ -7926,25 +7934,21 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
* with the current stateid.
*/
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
pnfs_mark_matching_lsegs_invalid(lo, &head, NULL);
pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head);
} else
spin_unlock(&inode->i_lock);
goto out_restart;
status = -EAGAIN;
goto out;
}
if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN)
goto out_restart;
status = nfs4_handle_exception(server, status, exception);
if (exception->retry)
status = -EAGAIN;
out:
dprintk("<-- %s\n", __func__);
return;
out_restart:
task->tk_status = 0;
rpc_restart_call_prepare(task);
return;
out_overflow:
task->tk_status = -EOVERFLOW;
goto out;
return status;
}
static size_t max_response_pages(struct nfs_server *server)
@@ -8013,7 +8017,7 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
};
struct pnfs_layout_segment *
nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
{
struct inode *inode = lgp->args.inode;
struct nfs_server *server = NFS_SERVER(inode);
@@ -8033,6 +8037,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
.flags = RPC_TASK_ASYNC,
};
struct pnfs_layout_segment *lseg = NULL;
struct nfs4_exception exception = { .timeout = *timeout };
int status = 0;
dprintk("--> %s\n", __func__);
@@ -8046,7 +8051,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
return ERR_PTR(-ENOMEM);
}
lgp->args.layout.pglen = max_pages * PAGE_SIZE;
lgp->args.timestamp = jiffies;
lgp->res.layoutp = &lgp->args.layout;
lgp->res.seq_res.sr_slot = NULL;
@@ -8056,13 +8060,17 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
if (IS_ERR(task))
return ERR_CAST(task);
status = nfs4_wait_for_completion_rpc_task(task);
if (status == 0)
status = task->tk_status;
if (status == 0) {
status = nfs4_layoutget_handle_exception(task, lgp, &exception);
*timeout = exception.timeout;
}
trace_nfs4_layoutget(lgp->args.ctx,
&lgp->args.range,
&lgp->res.range,
&lgp->res.stateid,
status);
/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
if (status == 0 && lgp->res.layoutp->len)
lseg = pnfs_layout_process(lgp);
@@ -8118,7 +8126,8 @@ static void nfs4_layoutreturn_release(void *calldata)
dprintk("--> %s\n", __func__);
spin_lock(&lo->plh_inode->i_lock);
pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range,
be32_to_cpu(lrp->args.stateid.seqid));
pnfs_mark_layout_returned_if_empty(lo);
if (lrp->res.lrs_present)
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
@@ -8653,6 +8662,9 @@ nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
static bool nfs41_match_stateid(const nfs4_stateid *s1,
const nfs4_stateid *s2)
{
if (s1->type != s2->type)
return false;
if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0)
return false;
@@ -8793,6 +8805,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_STATEID_NFSV41
| NFS_CAP_ATOMIC_OPEN_V1
| NFS_CAP_ALLOCATE
| NFS_CAP_COPY
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS