Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"Lots of bugs fixes, including Zheng and Jan's extent status shrinker
fixes, which should improve CPU utilization and potential soft lockups
under heavy memory pressure, and Eric Whitney's bigalloc fixes"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (26 commits)
ext4: ext4_da_convert_inline_data_to_extent drop locked page after error
ext4: fix suboptimal seek_{data,hole} extents traversial
ext4: ext4_inline_data_fiemap should respect callers argument
ext4: prevent fsreentrance deadlock for inline_data
ext4: forbid journal_async_commit in data=ordered mode
jbd2: remove unnecessary NULL check before iput()
ext4: Remove an unnecessary check for NULL before iput()
ext4: remove unneeded code in ext4_unlink
ext4: don't count external journal blocks as overhead
ext4: remove never taken branch from ext4_ext_shift_path_extents()
ext4: create nojournal_checksum mount option
ext4: update comments regarding ext4_delete_inode()
ext4: cleanup GFP flags inside resize path
ext4: introduce aging to extent status tree
ext4: cleanup flag definitions for extent status tree
ext4: limit number of scanned extents in status tree shrinker
ext4: move handling of list of shrinkable inodes into extent status code
ext4: change LRU to round-robin in extent status tree shrinker
ext4: cache extent hole in extent status tree for ext4_da_map_blocks()
ext4: fix block reservation for bigalloc filesystems
...
This commit is contained in:
@@ -158,17 +158,8 @@ struct ext4_allocation_request {
|
|||||||
#define EXT4_MAP_MAPPED (1 << BH_Mapped)
|
#define EXT4_MAP_MAPPED (1 << BH_Mapped)
|
||||||
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
|
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
|
||||||
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
|
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
|
||||||
/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
|
|
||||||
* ext4_map_blocks wants to know whether or not the underlying cluster has
|
|
||||||
* already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
|
|
||||||
* the requested mapping was from previously mapped (or delayed allocated)
|
|
||||||
* cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
|
|
||||||
* should never appear on buffer_head's state flags.
|
|
||||||
*/
|
|
||||||
#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
|
|
||||||
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
|
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
|
||||||
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
|
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
|
||||||
EXT4_MAP_FROM_CLUSTER)
|
|
||||||
|
|
||||||
struct ext4_map_blocks {
|
struct ext4_map_blocks {
|
||||||
ext4_fsblk_t m_pblk;
|
ext4_fsblk_t m_pblk;
|
||||||
@@ -565,10 +556,8 @@ enum {
|
|||||||
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
|
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
|
||||||
/* Do not take i_data_sem locking in ext4_map_blocks */
|
/* Do not take i_data_sem locking in ext4_map_blocks */
|
||||||
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
|
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
|
||||||
/* Do not put hole in extent cache */
|
|
||||||
#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
|
|
||||||
/* Convert written extents to unwritten */
|
/* Convert written extents to unwritten */
|
||||||
#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400
|
#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The bit position of these flags must not overlap with any of the
|
* The bit position of these flags must not overlap with any of the
|
||||||
@@ -889,10 +878,12 @@ struct ext4_inode_info {
|
|||||||
/* extents status tree */
|
/* extents status tree */
|
||||||
struct ext4_es_tree i_es_tree;
|
struct ext4_es_tree i_es_tree;
|
||||||
rwlock_t i_es_lock;
|
rwlock_t i_es_lock;
|
||||||
struct list_head i_es_lru;
|
struct list_head i_es_list;
|
||||||
unsigned int i_es_all_nr; /* protected by i_es_lock */
|
unsigned int i_es_all_nr; /* protected by i_es_lock */
|
||||||
unsigned int i_es_lru_nr; /* protected by i_es_lock */
|
unsigned int i_es_shk_nr; /* protected by i_es_lock */
|
||||||
unsigned long i_touch_when; /* jiffies of last accessing */
|
ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for
|
||||||
|
extents to shrink. Protected by
|
||||||
|
i_es_lock */
|
||||||
|
|
||||||
/* ialloc */
|
/* ialloc */
|
||||||
ext4_group_t i_last_alloc_group;
|
ext4_group_t i_last_alloc_group;
|
||||||
@@ -1337,10 +1328,11 @@ struct ext4_sb_info {
|
|||||||
|
|
||||||
/* Reclaim extents from extent status tree */
|
/* Reclaim extents from extent status tree */
|
||||||
struct shrinker s_es_shrinker;
|
struct shrinker s_es_shrinker;
|
||||||
struct list_head s_es_lru;
|
struct list_head s_es_list; /* List of inodes with reclaimable extents */
|
||||||
|
long s_es_nr_inode;
|
||||||
struct ext4_es_stats s_es_stats;
|
struct ext4_es_stats s_es_stats;
|
||||||
struct mb_cache *s_mb_cache;
|
struct mb_cache *s_mb_cache;
|
||||||
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
|
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
|
||||||
|
|
||||||
/* Ratelimit ext4 messages. */
|
/* Ratelimit ext4 messages. */
|
||||||
struct ratelimit_state s_err_ratelimit_state;
|
struct ratelimit_state s_err_ratelimit_state;
|
||||||
@@ -2196,7 +2188,6 @@ extern int ext4_calculate_overhead(struct super_block *sb);
|
|||||||
extern void ext4_superblock_csum_set(struct super_block *sb);
|
extern void ext4_superblock_csum_set(struct super_block *sb);
|
||||||
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
|
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
|
||||||
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
|
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
|
||||||
extern void ext4_kvfree(void *ptr);
|
|
||||||
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
|
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
|
||||||
ext4_group_t ngroup);
|
ext4_group_t ngroup);
|
||||||
extern const char *ext4_decode_error(struct super_block *sb, int errno,
|
extern const char *ext4_decode_error(struct super_block *sb, int errno,
|
||||||
@@ -2647,7 +2638,7 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
|
|||||||
int *retval);
|
int *retval);
|
||||||
extern int ext4_inline_data_fiemap(struct inode *inode,
|
extern int ext4_inline_data_fiemap(struct inode *inode,
|
||||||
struct fiemap_extent_info *fieinfo,
|
struct fiemap_extent_info *fieinfo,
|
||||||
int *has_inline);
|
int *has_inline, __u64 start, __u64 len);
|
||||||
extern int ext4_try_to_evict_inline_data(handle_t *handle,
|
extern int ext4_try_to_evict_inline_data(handle_t *handle,
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
int needed);
|
int needed);
|
||||||
@@ -2794,16 +2785,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
|
|||||||
/* mmp.c */
|
/* mmp.c */
|
||||||
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
|
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
|
||||||
|
|
||||||
/*
|
|
||||||
* Note that these flags will never ever appear in a buffer_head's state flag.
|
|
||||||
* See EXT4_MAP_... to see where this is used.
|
|
||||||
*/
|
|
||||||
enum ext4_state_bits {
|
|
||||||
BH_AllocFromCluster /* allocated blocks were part of already
|
|
||||||
* allocated cluster. */
|
|
||||||
= BH_JBDPrivateStart
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add new method to test whether block and inode bitmaps are properly
|
* Add new method to test whether block and inode bitmaps are properly
|
||||||
* initialized. With uninit_bg reading the block from disk is not enough
|
* initialized. With uninit_bg reading the block from disk is not enough
|
||||||
|
|||||||
@@ -2306,16 +2306,16 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
|
|||||||
ext4_lblk_t block)
|
ext4_lblk_t block)
|
||||||
{
|
{
|
||||||
int depth = ext_depth(inode);
|
int depth = ext_depth(inode);
|
||||||
unsigned long len = 0;
|
ext4_lblk_t len;
|
||||||
ext4_lblk_t lblock = 0;
|
ext4_lblk_t lblock;
|
||||||
struct ext4_extent *ex;
|
struct ext4_extent *ex;
|
||||||
|
struct extent_status es;
|
||||||
|
|
||||||
ex = path[depth].p_ext;
|
ex = path[depth].p_ext;
|
||||||
if (ex == NULL) {
|
if (ex == NULL) {
|
||||||
/*
|
/* there is no extent yet, so gap is [0;-] */
|
||||||
* there is no extent yet, so gap is [0;-] and we
|
lblock = 0;
|
||||||
* don't cache it
|
len = EXT_MAX_BLOCKS;
|
||||||
*/
|
|
||||||
ext_debug("cache gap(whole file):");
|
ext_debug("cache gap(whole file):");
|
||||||
} else if (block < le32_to_cpu(ex->ee_block)) {
|
} else if (block < le32_to_cpu(ex->ee_block)) {
|
||||||
lblock = block;
|
lblock = block;
|
||||||
@@ -2324,9 +2324,6 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
|
|||||||
block,
|
block,
|
||||||
le32_to_cpu(ex->ee_block),
|
le32_to_cpu(ex->ee_block),
|
||||||
ext4_ext_get_actual_len(ex));
|
ext4_ext_get_actual_len(ex));
|
||||||
if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
|
|
||||||
ext4_es_insert_extent(inode, lblock, len, ~0,
|
|
||||||
EXTENT_STATUS_HOLE);
|
|
||||||
} else if (block >= le32_to_cpu(ex->ee_block)
|
} else if (block >= le32_to_cpu(ex->ee_block)
|
||||||
+ ext4_ext_get_actual_len(ex)) {
|
+ ext4_ext_get_actual_len(ex)) {
|
||||||
ext4_lblk_t next;
|
ext4_lblk_t next;
|
||||||
@@ -2340,14 +2337,19 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
|
|||||||
block);
|
block);
|
||||||
BUG_ON(next == lblock);
|
BUG_ON(next == lblock);
|
||||||
len = next - lblock;
|
len = next - lblock;
|
||||||
if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
|
|
||||||
ext4_es_insert_extent(inode, lblock, len, ~0,
|
|
||||||
EXTENT_STATUS_HOLE);
|
|
||||||
} else {
|
} else {
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
ext_debug(" -> %u:%lu\n", lblock, len);
|
ext4_es_find_delayed_extent_range(inode, lblock, lblock + len - 1, &es);
|
||||||
|
if (es.es_len) {
|
||||||
|
/* There's delayed extent containing lblock? */
|
||||||
|
if (es.es_lblk <= lblock)
|
||||||
|
return;
|
||||||
|
len = min(es.es_lblk - lblock, len);
|
||||||
|
}
|
||||||
|
ext_debug(" -> %u:%u\n", lblock, len);
|
||||||
|
ext4_es_insert_extent(inode, lblock, len, ~0, EXTENT_STATUS_HOLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -2481,7 +2483,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|||||||
ext4_lblk_t from, ext4_lblk_t to)
|
ext4_lblk_t from, ext4_lblk_t to)
|
||||||
{
|
{
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
unsigned short ee_len = ext4_ext_get_actual_len(ex);
|
unsigned short ee_len = ext4_ext_get_actual_len(ex);
|
||||||
ext4_fsblk_t pblk;
|
ext4_fsblk_t pblk;
|
||||||
int flags = get_default_free_blocks_flags(inode);
|
int flags = get_default_free_blocks_flags(inode);
|
||||||
|
|
||||||
@@ -2490,7 +2492,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|||||||
* at the beginning of the extent. Instead, we make a note
|
* at the beginning of the extent. Instead, we make a note
|
||||||
* that we tried freeing the cluster, and check to see if we
|
* that we tried freeing the cluster, and check to see if we
|
||||||
* need to free it on a subsequent call to ext4_remove_blocks,
|
* need to free it on a subsequent call to ext4_remove_blocks,
|
||||||
* or at the end of the ext4_truncate() operation.
|
* or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
|
||||||
*/
|
*/
|
||||||
flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
|
flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
|
||||||
|
|
||||||
@@ -2501,8 +2503,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|||||||
* partial cluster here.
|
* partial cluster here.
|
||||||
*/
|
*/
|
||||||
pblk = ext4_ext_pblock(ex) + ee_len - 1;
|
pblk = ext4_ext_pblock(ex) + ee_len - 1;
|
||||||
if ((*partial_cluster > 0) &&
|
if (*partial_cluster > 0 &&
|
||||||
(EXT4_B2C(sbi, pblk) != *partial_cluster)) {
|
*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
|
||||||
ext4_free_blocks(handle, inode, NULL,
|
ext4_free_blocks(handle, inode, NULL,
|
||||||
EXT4_C2B(sbi, *partial_cluster),
|
EXT4_C2B(sbi, *partial_cluster),
|
||||||
sbi->s_cluster_ratio, flags);
|
sbi->s_cluster_ratio, flags);
|
||||||
@@ -2528,7 +2530,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|||||||
&& to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
|
&& to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
|
||||||
/* tail removal */
|
/* tail removal */
|
||||||
ext4_lblk_t num;
|
ext4_lblk_t num;
|
||||||
unsigned int unaligned;
|
long long first_cluster;
|
||||||
|
|
||||||
num = le32_to_cpu(ex->ee_block) + ee_len - from;
|
num = le32_to_cpu(ex->ee_block) + ee_len - from;
|
||||||
pblk = ext4_ext_pblock(ex) + ee_len - num;
|
pblk = ext4_ext_pblock(ex) + ee_len - num;
|
||||||
@@ -2538,7 +2540,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|||||||
* used by any other extent (partial_cluster is negative).
|
* used by any other extent (partial_cluster is negative).
|
||||||
*/
|
*/
|
||||||
if (*partial_cluster < 0 &&
|
if (*partial_cluster < 0 &&
|
||||||
-(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1))
|
*partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1))
|
||||||
flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
|
flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
|
||||||
|
|
||||||
ext_debug("free last %u blocks starting %llu partial %lld\n",
|
ext_debug("free last %u blocks starting %llu partial %lld\n",
|
||||||
@@ -2549,21 +2551,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|||||||
* beginning of a cluster, and we removed the entire
|
* beginning of a cluster, and we removed the entire
|
||||||
* extent and the cluster is not used by any other extent,
|
* extent and the cluster is not used by any other extent,
|
||||||
* save the partial cluster here, since we might need to
|
* save the partial cluster here, since we might need to
|
||||||
* delete if we determine that the truncate operation has
|
* delete if we determine that the truncate or punch hole
|
||||||
* removed all of the blocks in the cluster.
|
* operation has removed all of the blocks in the cluster.
|
||||||
|
* If that cluster is used by another extent, preserve its
|
||||||
|
* negative value so it isn't freed later on.
|
||||||
*
|
*
|
||||||
* On the other hand, if we did not manage to free the whole
|
* If the whole extent wasn't freed, we've reached the
|
||||||
* extent, we have to mark the cluster as used (store negative
|
* start of the truncated/punched region and have finished
|
||||||
* cluster number in partial_cluster).
|
* removing blocks. If there's a partial cluster here it's
|
||||||
|
* shared with the remainder of the extent and is no longer
|
||||||
|
* a candidate for removal.
|
||||||
*/
|
*/
|
||||||
unaligned = EXT4_PBLK_COFF(sbi, pblk);
|
if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) {
|
||||||
if (unaligned && (ee_len == num) &&
|
first_cluster = (long long) EXT4_B2C(sbi, pblk);
|
||||||
(*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
|
if (first_cluster != -*partial_cluster)
|
||||||
*partial_cluster = EXT4_B2C(sbi, pblk);
|
*partial_cluster = first_cluster;
|
||||||
else if (unaligned)
|
} else {
|
||||||
*partial_cluster = -((long long)EXT4_B2C(sbi, pblk));
|
|
||||||
else if (*partial_cluster > 0)
|
|
||||||
*partial_cluster = 0;
|
*partial_cluster = 0;
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
ext4_error(sbi->s_sb, "strange request: removal(2) "
|
ext4_error(sbi->s_sb, "strange request: removal(2) "
|
||||||
"%u-%u from %u:%u\n",
|
"%u-%u from %u:%u\n",
|
||||||
@@ -2574,15 +2579,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* ext4_ext_rm_leaf() Removes the extents associated with the
|
* ext4_ext_rm_leaf() Removes the extents associated with the
|
||||||
* blocks appearing between "start" and "end", and splits the extents
|
* blocks appearing between "start" and "end". Both "start"
|
||||||
* if "start" and "end" appear in the same extent
|
* and "end" must appear in the same extent or EIO is returned.
|
||||||
*
|
*
|
||||||
* @handle: The journal handle
|
* @handle: The journal handle
|
||||||
* @inode: The files inode
|
* @inode: The files inode
|
||||||
* @path: The path to the leaf
|
* @path: The path to the leaf
|
||||||
* @partial_cluster: The cluster which we'll have to free if all extents
|
* @partial_cluster: The cluster which we'll have to free if all extents
|
||||||
* has been released from it. It gets negative in case
|
* has been released from it. However, if this value is
|
||||||
* that the cluster is still used.
|
* negative, it's a cluster just to the right of the
|
||||||
|
* punched region and it must not be freed.
|
||||||
* @start: The first block to remove
|
* @start: The first block to remove
|
||||||
* @end: The last block to remove
|
* @end: The last block to remove
|
||||||
*/
|
*/
|
||||||
@@ -2621,27 +2627,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|||||||
ex_ee_block = le32_to_cpu(ex->ee_block);
|
ex_ee_block = le32_to_cpu(ex->ee_block);
|
||||||
ex_ee_len = ext4_ext_get_actual_len(ex);
|
ex_ee_len = ext4_ext_get_actual_len(ex);
|
||||||
|
|
||||||
/*
|
|
||||||
* If we're starting with an extent other than the last one in the
|
|
||||||
* node, we need to see if it shares a cluster with the extent to
|
|
||||||
* the right (towards the end of the file). If its leftmost cluster
|
|
||||||
* is this extent's rightmost cluster and it is not cluster aligned,
|
|
||||||
* we'll mark it as a partial that is not to be deallocated.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (ex != EXT_LAST_EXTENT(eh)) {
|
|
||||||
ext4_fsblk_t current_pblk, right_pblk;
|
|
||||||
long long current_cluster, right_cluster;
|
|
||||||
|
|
||||||
current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
|
|
||||||
current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
|
|
||||||
right_pblk = ext4_ext_pblock(ex + 1);
|
|
||||||
right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
|
|
||||||
if (current_cluster == right_cluster &&
|
|
||||||
EXT4_PBLK_COFF(sbi, right_pblk))
|
|
||||||
*partial_cluster = -right_cluster;
|
|
||||||
}
|
|
||||||
|
|
||||||
trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
|
trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
|
||||||
|
|
||||||
while (ex >= EXT_FIRST_EXTENT(eh) &&
|
while (ex >= EXT_FIRST_EXTENT(eh) &&
|
||||||
@@ -2666,14 +2651,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|||||||
if (end < ex_ee_block) {
|
if (end < ex_ee_block) {
|
||||||
/*
|
/*
|
||||||
* We're going to skip this extent and move to another,
|
* We're going to skip this extent and move to another,
|
||||||
* so if this extent is not cluster aligned we have
|
* so note that its first cluster is in use to avoid
|
||||||
* to mark the current cluster as used to avoid
|
* freeing it when removing blocks. Eventually, the
|
||||||
* accidentally freeing it later on
|
* right edge of the truncated/punched region will
|
||||||
|
* be just to the left.
|
||||||
*/
|
*/
|
||||||
pblk = ext4_ext_pblock(ex);
|
if (sbi->s_cluster_ratio > 1) {
|
||||||
if (EXT4_PBLK_COFF(sbi, pblk))
|
pblk = ext4_ext_pblock(ex);
|
||||||
*partial_cluster =
|
*partial_cluster =
|
||||||
-((long long)EXT4_B2C(sbi, pblk));
|
-(long long) EXT4_B2C(sbi, pblk);
|
||||||
|
}
|
||||||
ex--;
|
ex--;
|
||||||
ex_ee_block = le32_to_cpu(ex->ee_block);
|
ex_ee_block = le32_to_cpu(ex->ee_block);
|
||||||
ex_ee_len = ext4_ext_get_actual_len(ex);
|
ex_ee_len = ext4_ext_get_actual_len(ex);
|
||||||
@@ -2749,8 +2736,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|||||||
sizeof(struct ext4_extent));
|
sizeof(struct ext4_extent));
|
||||||
}
|
}
|
||||||
le16_add_cpu(&eh->eh_entries, -1);
|
le16_add_cpu(&eh->eh_entries, -1);
|
||||||
} else if (*partial_cluster > 0)
|
}
|
||||||
*partial_cluster = 0;
|
|
||||||
|
|
||||||
err = ext4_ext_dirty(handle, inode, path + depth);
|
err = ext4_ext_dirty(handle, inode, path + depth);
|
||||||
if (err)
|
if (err)
|
||||||
@@ -2769,20 +2755,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
|||||||
/*
|
/*
|
||||||
* If there's a partial cluster and at least one extent remains in
|
* If there's a partial cluster and at least one extent remains in
|
||||||
* the leaf, free the partial cluster if it isn't shared with the
|
* the leaf, free the partial cluster if it isn't shared with the
|
||||||
* current extent. If there's a partial cluster and no extents
|
* current extent. If it is shared with the current extent
|
||||||
* remain in the leaf, it can't be freed here. It can only be
|
* we zero partial_cluster because we've reached the start of the
|
||||||
* freed when it's possible to determine if it's not shared with
|
* truncated/punched region and we're done removing blocks.
|
||||||
* any other extent - when the next leaf is processed or when space
|
|
||||||
* removal is complete.
|
|
||||||
*/
|
*/
|
||||||
if (*partial_cluster > 0 && eh->eh_entries &&
|
if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) {
|
||||||
(EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
|
pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
|
||||||
*partial_cluster)) {
|
if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
|
||||||
int flags = get_default_free_blocks_flags(inode);
|
ext4_free_blocks(handle, inode, NULL,
|
||||||
|
EXT4_C2B(sbi, *partial_cluster),
|
||||||
ext4_free_blocks(handle, inode, NULL,
|
sbi->s_cluster_ratio,
|
||||||
EXT4_C2B(sbi, *partial_cluster),
|
get_default_free_blocks_flags(inode));
|
||||||
sbi->s_cluster_ratio, flags);
|
}
|
||||||
*partial_cluster = 0;
|
*partial_cluster = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2819,7 +2803,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
|
|||||||
int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
||||||
ext4_lblk_t end)
|
ext4_lblk_t end)
|
||||||
{
|
{
|
||||||
struct super_block *sb = inode->i_sb;
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
int depth = ext_depth(inode);
|
int depth = ext_depth(inode);
|
||||||
struct ext4_ext_path *path = NULL;
|
struct ext4_ext_path *path = NULL;
|
||||||
long long partial_cluster = 0;
|
long long partial_cluster = 0;
|
||||||
@@ -2845,9 +2829,10 @@ again:
|
|||||||
*/
|
*/
|
||||||
if (end < EXT_MAX_BLOCKS - 1) {
|
if (end < EXT_MAX_BLOCKS - 1) {
|
||||||
struct ext4_extent *ex;
|
struct ext4_extent *ex;
|
||||||
ext4_lblk_t ee_block;
|
ext4_lblk_t ee_block, ex_end, lblk;
|
||||||
|
ext4_fsblk_t pblk;
|
||||||
|
|
||||||
/* find extent for this block */
|
/* find extent for or closest extent to this block */
|
||||||
path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
|
path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
|
||||||
if (IS_ERR(path)) {
|
if (IS_ERR(path)) {
|
||||||
ext4_journal_stop(handle);
|
ext4_journal_stop(handle);
|
||||||
@@ -2867,6 +2852,7 @@ again:
|
|||||||
}
|
}
|
||||||
|
|
||||||
ee_block = le32_to_cpu(ex->ee_block);
|
ee_block = le32_to_cpu(ex->ee_block);
|
||||||
|
ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* See if the last block is inside the extent, if so split
|
* See if the last block is inside the extent, if so split
|
||||||
@@ -2874,8 +2860,19 @@ again:
|
|||||||
* tail of the first part of the split extent in
|
* tail of the first part of the split extent in
|
||||||
* ext4_ext_rm_leaf().
|
* ext4_ext_rm_leaf().
|
||||||
*/
|
*/
|
||||||
if (end >= ee_block &&
|
if (end >= ee_block && end < ex_end) {
|
||||||
end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
|
|
||||||
|
/*
|
||||||
|
* If we're going to split the extent, note that
|
||||||
|
* the cluster containing the block after 'end' is
|
||||||
|
* in use to avoid freeing it when removing blocks.
|
||||||
|
*/
|
||||||
|
if (sbi->s_cluster_ratio > 1) {
|
||||||
|
pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
|
||||||
|
partial_cluster =
|
||||||
|
-(long long) EXT4_B2C(sbi, pblk);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Split the extent in two so that 'end' is the last
|
* Split the extent in two so that 'end' is the last
|
||||||
* block in the first new extent. Also we should not
|
* block in the first new extent. Also we should not
|
||||||
@@ -2886,6 +2883,24 @@ again:
|
|||||||
end + 1, 1);
|
end + 1, 1);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
} else if (sbi->s_cluster_ratio > 1 && end >= ex_end) {
|
||||||
|
/*
|
||||||
|
* If there's an extent to the right its first cluster
|
||||||
|
* contains the immediate right boundary of the
|
||||||
|
* truncated/punched region. Set partial_cluster to
|
||||||
|
* its negative value so it won't be freed if shared
|
||||||
|
* with the current extent. The end < ee_block case
|
||||||
|
* is handled in ext4_ext_rm_leaf().
|
||||||
|
*/
|
||||||
|
lblk = ex_end + 1;
|
||||||
|
err = ext4_ext_search_right(inode, path, &lblk, &pblk,
|
||||||
|
&ex);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
if (pblk)
|
||||||
|
partial_cluster =
|
||||||
|
-(long long) EXT4_B2C(sbi, pblk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
@@ -2996,16 +3011,18 @@ again:
|
|||||||
trace_ext4_ext_remove_space_done(inode, start, end, depth,
|
trace_ext4_ext_remove_space_done(inode, start, end, depth,
|
||||||
partial_cluster, path->p_hdr->eh_entries);
|
partial_cluster, path->p_hdr->eh_entries);
|
||||||
|
|
||||||
/* If we still have something in the partial cluster and we have removed
|
/*
|
||||||
|
* If we still have something in the partial cluster and we have removed
|
||||||
* even the first extent, then we should free the blocks in the partial
|
* even the first extent, then we should free the blocks in the partial
|
||||||
* cluster as well. */
|
* cluster as well. (This code will only run when there are no leaves
|
||||||
if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) {
|
* to the immediate left of the truncated/punched region.)
|
||||||
int flags = get_default_free_blocks_flags(inode);
|
*/
|
||||||
|
if (partial_cluster > 0 && err == 0) {
|
||||||
|
/* don't zero partial_cluster since it's not used afterwards */
|
||||||
ext4_free_blocks(handle, inode, NULL,
|
ext4_free_blocks(handle, inode, NULL,
|
||||||
EXT4_C2B(EXT4_SB(sb), partial_cluster),
|
EXT4_C2B(sbi, partial_cluster),
|
||||||
EXT4_SB(sb)->s_cluster_ratio, flags);
|
sbi->s_cluster_ratio,
|
||||||
partial_cluster = 0;
|
get_default_free_blocks_flags(inode));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: flexible tree reduction should be here */
|
/* TODO: flexible tree reduction should be here */
|
||||||
@@ -4267,6 +4284,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
ext4_io_end_t *io = ext4_inode_aio(inode);
|
ext4_io_end_t *io = ext4_inode_aio(inode);
|
||||||
ext4_lblk_t cluster_offset;
|
ext4_lblk_t cluster_offset;
|
||||||
int set_unwritten = 0;
|
int set_unwritten = 0;
|
||||||
|
bool map_from_cluster = false;
|
||||||
|
|
||||||
ext_debug("blocks %u/%u requested for inode %lu\n",
|
ext_debug("blocks %u/%u requested for inode %lu\n",
|
||||||
map->m_lblk, map->m_len, inode->i_ino);
|
map->m_lblk, map->m_len, inode->i_ino);
|
||||||
@@ -4343,10 +4361,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((sbi->s_cluster_ratio > 1) &&
|
|
||||||
ext4_find_delalloc_cluster(inode, map->m_lblk))
|
|
||||||
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* requested block isn't allocated yet;
|
* requested block isn't allocated yet;
|
||||||
* we couldn't try to create block if create flag is zero
|
* we couldn't try to create block if create flag is zero
|
||||||
@@ -4356,15 +4370,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
* put just found gap into cache to speed up
|
* put just found gap into cache to speed up
|
||||||
* subsequent requests
|
* subsequent requests
|
||||||
*/
|
*/
|
||||||
if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
|
ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
|
||||||
ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
|
|
||||||
goto out2;
|
goto out2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Okay, we need to do block allocation.
|
* Okay, we need to do block allocation.
|
||||||
*/
|
*/
|
||||||
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
|
|
||||||
newex.ee_block = cpu_to_le32(map->m_lblk);
|
newex.ee_block = cpu_to_le32(map->m_lblk);
|
||||||
cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
|
cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
|
||||||
|
|
||||||
@@ -4376,7 +4388,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
|
get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
|
||||||
ar.len = allocated = map->m_len;
|
ar.len = allocated = map->m_len;
|
||||||
newblock = map->m_pblk;
|
newblock = map->m_pblk;
|
||||||
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
map_from_cluster = true;
|
||||||
goto got_allocated_blocks;
|
goto got_allocated_blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4397,7 +4409,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
|
get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
|
||||||
ar.len = allocated = map->m_len;
|
ar.len = allocated = map->m_len;
|
||||||
newblock = map->m_pblk;
|
newblock = map->m_pblk;
|
||||||
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
map_from_cluster = true;
|
||||||
goto got_allocated_blocks;
|
goto got_allocated_blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4523,7 +4535,7 @@ got_allocated_blocks:
|
|||||||
*/
|
*/
|
||||||
reserved_clusters = get_reserved_cluster_alloc(inode,
|
reserved_clusters = get_reserved_cluster_alloc(inode,
|
||||||
map->m_lblk, allocated);
|
map->m_lblk, allocated);
|
||||||
if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
|
if (map_from_cluster) {
|
||||||
if (reserved_clusters) {
|
if (reserved_clusters) {
|
||||||
/*
|
/*
|
||||||
* We have clusters reserved for this range.
|
* We have clusters reserved for this range.
|
||||||
@@ -4620,7 +4632,6 @@ out2:
|
|||||||
|
|
||||||
trace_ext4_ext_map_blocks_exit(inode, flags, map,
|
trace_ext4_ext_map_blocks_exit(inode, flags, map,
|
||||||
err ? err : allocated);
|
err ? err : allocated);
|
||||||
ext4_es_lru_add(inode);
|
|
||||||
return err ? err : allocated;
|
return err ? err : allocated;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5140,7 +5151,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|||||||
if (ext4_has_inline_data(inode)) {
|
if (ext4_has_inline_data(inode)) {
|
||||||
int has_inline = 1;
|
int has_inline = 1;
|
||||||
|
|
||||||
error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline);
|
error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline,
|
||||||
|
start, len);
|
||||||
|
|
||||||
if (has_inline)
|
if (has_inline)
|
||||||
return error;
|
return error;
|
||||||
@@ -5154,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|||||||
|
|
||||||
/* fallback to generic here if not in extents fmt */
|
/* fallback to generic here if not in extents fmt */
|
||||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||||
return generic_block_fiemap(inode, fieinfo, start, len,
|
return __generic_block_fiemap(inode, fieinfo, start, len,
|
||||||
ext4_get_block);
|
ext4_get_block);
|
||||||
|
|
||||||
if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
|
if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
|
||||||
return -EBADR;
|
return -EBADR;
|
||||||
@@ -5179,7 +5191,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|||||||
error = ext4_fill_fiemap_extents(inode, start_blk,
|
error = ext4_fill_fiemap_extents(inode, start_blk,
|
||||||
len_blks, fieinfo);
|
len_blks, fieinfo);
|
||||||
}
|
}
|
||||||
ext4_es_lru_add(inode);
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5239,8 +5250,6 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
|
|||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
|
ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
|
||||||
if (!ex_last)
|
|
||||||
return -EIO;
|
|
||||||
|
|
||||||
err = ext4_access_path(handle, inode, path + depth);
|
err = ext4_access_path(handle, inode, path + depth);
|
||||||
if (err)
|
if (err)
|
||||||
|
|||||||
@@ -147,10 +147,9 @@ static struct kmem_cache *ext4_es_cachep;
|
|||||||
static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
|
static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
|
||||||
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t end);
|
ext4_lblk_t end);
|
||||||
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
|
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
|
||||||
int nr_to_scan);
|
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||||
static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
struct ext4_inode_info *locked_ei);
|
||||||
struct ext4_inode_info *locked_ei);
|
|
||||||
|
|
||||||
int __init ext4_init_es(void)
|
int __init ext4_init_es(void)
|
||||||
{
|
{
|
||||||
@@ -298,6 +297,36 @@ out:
|
|||||||
trace_ext4_es_find_delayed_extent_range_exit(inode, es);
|
trace_ext4_es_find_delayed_extent_range_exit(inode, es);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ext4_es_list_add(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
|
|
||||||
|
if (!list_empty(&ei->i_es_list))
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_lock(&sbi->s_es_lock);
|
||||||
|
if (list_empty(&ei->i_es_list)) {
|
||||||
|
list_add_tail(&ei->i_es_list, &sbi->s_es_list);
|
||||||
|
sbi->s_es_nr_inode++;
|
||||||
|
}
|
||||||
|
spin_unlock(&sbi->s_es_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ext4_es_list_del(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
|
|
||||||
|
spin_lock(&sbi->s_es_lock);
|
||||||
|
if (!list_empty(&ei->i_es_list)) {
|
||||||
|
list_del_init(&ei->i_es_list);
|
||||||
|
sbi->s_es_nr_inode--;
|
||||||
|
WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
|
||||||
|
}
|
||||||
|
spin_unlock(&sbi->s_es_lock);
|
||||||
|
}
|
||||||
|
|
||||||
static struct extent_status *
|
static struct extent_status *
|
||||||
ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
|
ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
|
||||||
ext4_fsblk_t pblk)
|
ext4_fsblk_t pblk)
|
||||||
@@ -314,9 +343,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
|
|||||||
* We don't count delayed extent because we never try to reclaim them
|
* We don't count delayed extent because we never try to reclaim them
|
||||||
*/
|
*/
|
||||||
if (!ext4_es_is_delayed(es)) {
|
if (!ext4_es_is_delayed(es)) {
|
||||||
EXT4_I(inode)->i_es_lru_nr++;
|
if (!EXT4_I(inode)->i_es_shk_nr++)
|
||||||
|
ext4_es_list_add(inode);
|
||||||
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
|
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
|
||||||
s_es_stats.es_stats_lru_cnt);
|
s_es_stats.es_stats_shk_cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
EXT4_I(inode)->i_es_all_nr++;
|
EXT4_I(inode)->i_es_all_nr++;
|
||||||
@@ -330,12 +360,13 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
|||||||
EXT4_I(inode)->i_es_all_nr--;
|
EXT4_I(inode)->i_es_all_nr--;
|
||||||
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
|
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
|
||||||
|
|
||||||
/* Decrease the lru counter when this es is not delayed */
|
/* Decrease the shrink counter when this es is not delayed */
|
||||||
if (!ext4_es_is_delayed(es)) {
|
if (!ext4_es_is_delayed(es)) {
|
||||||
BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
|
BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
|
||||||
EXT4_I(inode)->i_es_lru_nr--;
|
if (!--EXT4_I(inode)->i_es_shk_nr)
|
||||||
|
ext4_es_list_del(inode);
|
||||||
percpu_counter_dec(&EXT4_SB(inode->i_sb)->
|
percpu_counter_dec(&EXT4_SB(inode->i_sb)->
|
||||||
s_es_stats.es_stats_lru_cnt);
|
s_es_stats.es_stats_shk_cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
kmem_cache_free(ext4_es_cachep, es);
|
kmem_cache_free(ext4_es_cachep, es);
|
||||||
@@ -351,7 +382,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
|||||||
static int ext4_es_can_be_merged(struct extent_status *es1,
|
static int ext4_es_can_be_merged(struct extent_status *es1,
|
||||||
struct extent_status *es2)
|
struct extent_status *es2)
|
||||||
{
|
{
|
||||||
if (ext4_es_status(es1) != ext4_es_status(es2))
|
if (ext4_es_type(es1) != ext4_es_type(es2))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
|
if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
|
||||||
@@ -394,6 +425,8 @@ ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
|
|||||||
es1 = rb_entry(node, struct extent_status, rb_node);
|
es1 = rb_entry(node, struct extent_status, rb_node);
|
||||||
if (ext4_es_can_be_merged(es1, es)) {
|
if (ext4_es_can_be_merged(es1, es)) {
|
||||||
es1->es_len += es->es_len;
|
es1->es_len += es->es_len;
|
||||||
|
if (ext4_es_is_referenced(es))
|
||||||
|
ext4_es_set_referenced(es1);
|
||||||
rb_erase(&es->rb_node, &tree->root);
|
rb_erase(&es->rb_node, &tree->root);
|
||||||
ext4_es_free_extent(inode, es);
|
ext4_es_free_extent(inode, es);
|
||||||
es = es1;
|
es = es1;
|
||||||
@@ -416,6 +449,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
|
|||||||
es1 = rb_entry(node, struct extent_status, rb_node);
|
es1 = rb_entry(node, struct extent_status, rb_node);
|
||||||
if (ext4_es_can_be_merged(es, es1)) {
|
if (ext4_es_can_be_merged(es, es1)) {
|
||||||
es->es_len += es1->es_len;
|
es->es_len += es1->es_len;
|
||||||
|
if (ext4_es_is_referenced(es1))
|
||||||
|
ext4_es_set_referenced(es);
|
||||||
rb_erase(node, &tree->root);
|
rb_erase(node, &tree->root);
|
||||||
ext4_es_free_extent(inode, es1);
|
ext4_es_free_extent(inode, es1);
|
||||||
}
|
}
|
||||||
@@ -683,8 +718,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
goto error;
|
goto error;
|
||||||
retry:
|
retry:
|
||||||
err = __es_insert_extent(inode, &newes);
|
err = __es_insert_extent(inode, &newes);
|
||||||
if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
|
if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
|
||||||
EXT4_I(inode)))
|
128, EXT4_I(inode)))
|
||||||
goto retry;
|
goto retry;
|
||||||
if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
|
if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
|
||||||
err = 0;
|
err = 0;
|
||||||
@@ -782,6 +817,8 @@ out:
|
|||||||
es->es_lblk = es1->es_lblk;
|
es->es_lblk = es1->es_lblk;
|
||||||
es->es_len = es1->es_len;
|
es->es_len = es1->es_len;
|
||||||
es->es_pblk = es1->es_pblk;
|
es->es_pblk = es1->es_pblk;
|
||||||
|
if (!ext4_es_is_referenced(es))
|
||||||
|
ext4_es_set_referenced(es);
|
||||||
stats->es_stats_cache_hits++;
|
stats->es_stats_cache_hits++;
|
||||||
} else {
|
} else {
|
||||||
stats->es_stats_cache_misses++;
|
stats->es_stats_cache_misses++;
|
||||||
@@ -841,8 +878,8 @@ retry:
|
|||||||
es->es_lblk = orig_es.es_lblk;
|
es->es_lblk = orig_es.es_lblk;
|
||||||
es->es_len = orig_es.es_len;
|
es->es_len = orig_es.es_len;
|
||||||
if ((err == -ENOMEM) &&
|
if ((err == -ENOMEM) &&
|
||||||
__ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
|
__es_shrink(EXT4_SB(inode->i_sb),
|
||||||
EXT4_I(inode)))
|
128, EXT4_I(inode)))
|
||||||
goto retry;
|
goto retry;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@@ -914,6 +951,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
end = lblk + len - 1;
|
end = lblk + len - 1;
|
||||||
BUG_ON(end < lblk);
|
BUG_ON(end < lblk);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ext4_clear_inode() depends on us taking i_es_lock unconditionally
|
||||||
|
* so that we are sure __es_shrink() is done with the inode before it
|
||||||
|
* is reclaimed.
|
||||||
|
*/
|
||||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||||
err = __es_remove_extent(inode, lblk, end);
|
err = __es_remove_extent(inode, lblk, end);
|
||||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||||
@@ -921,114 +963,75 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
|
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||||
struct list_head *b)
|
struct ext4_inode_info *locked_ei)
|
||||||
{
|
|
||||||
struct ext4_inode_info *eia, *eib;
|
|
||||||
eia = list_entry(a, struct ext4_inode_info, i_es_lru);
|
|
||||||
eib = list_entry(b, struct ext4_inode_info, i_es_lru);
|
|
||||||
|
|
||||||
if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
|
|
||||||
!ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
|
|
||||||
return 1;
|
|
||||||
if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
|
|
||||||
ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
|
|
||||||
return -1;
|
|
||||||
if (eia->i_touch_when == eib->i_touch_when)
|
|
||||||
return 0;
|
|
||||||
if (time_after(eia->i_touch_when, eib->i_touch_when))
|
|
||||||
return 1;
|
|
||||||
else
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
|
||||||
struct ext4_inode_info *locked_ei)
|
|
||||||
{
|
{
|
||||||
struct ext4_inode_info *ei;
|
struct ext4_inode_info *ei;
|
||||||
struct ext4_es_stats *es_stats;
|
struct ext4_es_stats *es_stats;
|
||||||
struct list_head *cur, *tmp;
|
|
||||||
LIST_HEAD(skipped);
|
|
||||||
ktime_t start_time;
|
ktime_t start_time;
|
||||||
u64 scan_time;
|
u64 scan_time;
|
||||||
|
int nr_to_walk;
|
||||||
int nr_shrunk = 0;
|
int nr_shrunk = 0;
|
||||||
int retried = 0, skip_precached = 1, nr_skipped = 0;
|
int retried = 0, nr_skipped = 0;
|
||||||
|
|
||||||
es_stats = &sbi->s_es_stats;
|
es_stats = &sbi->s_es_stats;
|
||||||
start_time = ktime_get();
|
start_time = ktime_get();
|
||||||
spin_lock(&sbi->s_es_lru_lock);
|
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
|
spin_lock(&sbi->s_es_lock);
|
||||||
int shrunk;
|
nr_to_walk = sbi->s_es_nr_inode;
|
||||||
|
while (nr_to_walk-- > 0) {
|
||||||
|
if (list_empty(&sbi->s_es_list)) {
|
||||||
|
spin_unlock(&sbi->s_es_lock);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
|
||||||
|
i_es_list);
|
||||||
|
/* Move the inode to the tail */
|
||||||
|
list_move_tail(&ei->i_es_list, &sbi->s_es_list);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we have already reclaimed all extents from extent
|
* Normally we try hard to avoid shrinking precached inodes,
|
||||||
* status tree, just stop the loop immediately.
|
* but we will as a last resort.
|
||||||
*/
|
*/
|
||||||
if (percpu_counter_read_positive(
|
if (!retried && ext4_test_inode_state(&ei->vfs_inode,
|
||||||
&es_stats->es_stats_lru_cnt) == 0)
|
EXT4_STATE_EXT_PRECACHED)) {
|
||||||
break;
|
|
||||||
|
|
||||||
ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Skip the inode that is newer than the last_sorted
|
|
||||||
* time. Normally we try hard to avoid shrinking
|
|
||||||
* precached inodes, but we will as a last resort.
|
|
||||||
*/
|
|
||||||
if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
|
|
||||||
(skip_precached && ext4_test_inode_state(&ei->vfs_inode,
|
|
||||||
EXT4_STATE_EXT_PRECACHED))) {
|
|
||||||
nr_skipped++;
|
nr_skipped++;
|
||||||
list_move_tail(cur, &skipped);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ei->i_es_lru_nr == 0 || ei == locked_ei ||
|
if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
|
||||||
!write_trylock(&ei->i_es_lock))
|
nr_skipped++;
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Now we hold i_es_lock which protects us from inode reclaim
|
||||||
|
* freeing inode under us
|
||||||
|
*/
|
||||||
|
spin_unlock(&sbi->s_es_lock);
|
||||||
|
|
||||||
shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
|
nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
|
||||||
if (ei->i_es_lru_nr == 0)
|
|
||||||
list_del_init(&ei->i_es_lru);
|
|
||||||
write_unlock(&ei->i_es_lock);
|
write_unlock(&ei->i_es_lock);
|
||||||
|
|
||||||
nr_shrunk += shrunk;
|
if (nr_to_scan <= 0)
|
||||||
nr_to_scan -= shrunk;
|
goto out;
|
||||||
if (nr_to_scan == 0)
|
spin_lock(&sbi->s_es_lock);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
spin_unlock(&sbi->s_es_lock);
|
||||||
/* Move the newer inodes into the tail of the LRU list. */
|
|
||||||
list_splice_tail(&skipped, &sbi->s_es_lru);
|
|
||||||
INIT_LIST_HEAD(&skipped);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we skipped any inodes, and we weren't able to make any
|
* If we skipped any inodes, and we weren't able to make any
|
||||||
* forward progress, sort the list and try again.
|
* forward progress, try again to scan precached inodes.
|
||||||
*/
|
*/
|
||||||
if ((nr_shrunk == 0) && nr_skipped && !retried) {
|
if ((nr_shrunk == 0) && nr_skipped && !retried) {
|
||||||
retried++;
|
retried++;
|
||||||
list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
|
|
||||||
es_stats->es_stats_last_sorted = jiffies;
|
|
||||||
ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
|
|
||||||
i_es_lru);
|
|
||||||
/*
|
|
||||||
* If there are no non-precached inodes left on the
|
|
||||||
* list, start releasing precached extents.
|
|
||||||
*/
|
|
||||||
if (ext4_test_inode_state(&ei->vfs_inode,
|
|
||||||
EXT4_STATE_EXT_PRECACHED))
|
|
||||||
skip_precached = 0;
|
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&sbi->s_es_lru_lock);
|
|
||||||
|
|
||||||
if (locked_ei && nr_shrunk == 0)
|
if (locked_ei && nr_shrunk == 0)
|
||||||
nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
|
nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
|
||||||
|
|
||||||
|
out:
|
||||||
scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
|
scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
|
||||||
if (likely(es_stats->es_stats_scan_time))
|
if (likely(es_stats->es_stats_scan_time))
|
||||||
es_stats->es_stats_scan_time = (scan_time +
|
es_stats->es_stats_scan_time = (scan_time +
|
||||||
@@ -1043,7 +1046,7 @@ retry:
|
|||||||
else
|
else
|
||||||
es_stats->es_stats_shrunk = nr_shrunk;
|
es_stats->es_stats_shrunk = nr_shrunk;
|
||||||
|
|
||||||
trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
|
trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
|
||||||
nr_skipped, retried);
|
nr_skipped, retried);
|
||||||
return nr_shrunk;
|
return nr_shrunk;
|
||||||
}
|
}
|
||||||
@@ -1055,7 +1058,7 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
|
|||||||
struct ext4_sb_info *sbi;
|
struct ext4_sb_info *sbi;
|
||||||
|
|
||||||
sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
|
sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
|
||||||
nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
|
nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||||
trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
|
trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
|
||||||
return nr;
|
return nr;
|
||||||
}
|
}
|
||||||
@@ -1068,13 +1071,13 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
|
|||||||
int nr_to_scan = sc->nr_to_scan;
|
int nr_to_scan = sc->nr_to_scan;
|
||||||
int ret, nr_shrunk;
|
int ret, nr_shrunk;
|
||||||
|
|
||||||
ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
|
ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||||
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
|
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
|
||||||
|
|
||||||
if (!nr_to_scan)
|
if (!nr_to_scan)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
|
nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
|
||||||
|
|
||||||
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
|
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
|
||||||
return nr_shrunk;
|
return nr_shrunk;
|
||||||
@@ -1102,28 +1105,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* here we just find an inode that has the max nr. of objects */
|
/* here we just find an inode that has the max nr. of objects */
|
||||||
spin_lock(&sbi->s_es_lru_lock);
|
spin_lock(&sbi->s_es_lock);
|
||||||
list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) {
|
list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
|
||||||
inode_cnt++;
|
inode_cnt++;
|
||||||
if (max && max->i_es_all_nr < ei->i_es_all_nr)
|
if (max && max->i_es_all_nr < ei->i_es_all_nr)
|
||||||
max = ei;
|
max = ei;
|
||||||
else if (!max)
|
else if (!max)
|
||||||
max = ei;
|
max = ei;
|
||||||
}
|
}
|
||||||
spin_unlock(&sbi->s_es_lru_lock);
|
spin_unlock(&sbi->s_es_lock);
|
||||||
|
|
||||||
seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
|
seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
|
||||||
percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
|
percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
|
||||||
percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt));
|
percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
|
||||||
seq_printf(seq, " %lu/%lu cache hits/misses\n",
|
seq_printf(seq, " %lu/%lu cache hits/misses\n",
|
||||||
es_stats->es_stats_cache_hits,
|
es_stats->es_stats_cache_hits,
|
||||||
es_stats->es_stats_cache_misses);
|
es_stats->es_stats_cache_misses);
|
||||||
if (es_stats->es_stats_last_sorted != 0)
|
|
||||||
seq_printf(seq, " %u ms last sorted interval\n",
|
|
||||||
jiffies_to_msecs(jiffies -
|
|
||||||
es_stats->es_stats_last_sorted));
|
|
||||||
if (inode_cnt)
|
if (inode_cnt)
|
||||||
seq_printf(seq, " %d inodes on lru list\n", inode_cnt);
|
seq_printf(seq, " %d inodes on list\n", inode_cnt);
|
||||||
|
|
||||||
seq_printf(seq, "average:\n %llu us scan time\n",
|
seq_printf(seq, "average:\n %llu us scan time\n",
|
||||||
div_u64(es_stats->es_stats_scan_time, 1000));
|
div_u64(es_stats->es_stats_scan_time, 1000));
|
||||||
@@ -1132,7 +1131,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
|
|||||||
seq_printf(seq,
|
seq_printf(seq,
|
||||||
"maximum:\n %lu inode (%u objects, %u reclaimable)\n"
|
"maximum:\n %lu inode (%u objects, %u reclaimable)\n"
|
||||||
" %llu us max scan time\n",
|
" %llu us max scan time\n",
|
||||||
max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr,
|
max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
|
||||||
div_u64(es_stats->es_stats_max_scan_time, 1000));
|
div_u64(es_stats->es_stats_max_scan_time, 1000));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1181,9 +1180,11 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
|||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&sbi->s_es_lru);
|
/* Make sure we have enough bits for physical block number */
|
||||||
spin_lock_init(&sbi->s_es_lru_lock);
|
BUILD_BUG_ON(ES_SHIFT < 48);
|
||||||
sbi->s_es_stats.es_stats_last_sorted = 0;
|
INIT_LIST_HEAD(&sbi->s_es_list);
|
||||||
|
sbi->s_es_nr_inode = 0;
|
||||||
|
spin_lock_init(&sbi->s_es_lock);
|
||||||
sbi->s_es_stats.es_stats_shrunk = 0;
|
sbi->s_es_stats.es_stats_shrunk = 0;
|
||||||
sbi->s_es_stats.es_stats_cache_hits = 0;
|
sbi->s_es_stats.es_stats_cache_hits = 0;
|
||||||
sbi->s_es_stats.es_stats_cache_misses = 0;
|
sbi->s_es_stats.es_stats_cache_misses = 0;
|
||||||
@@ -1192,7 +1193,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
|||||||
err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
|
err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL);
|
err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
|
||||||
if (err)
|
if (err)
|
||||||
goto err1;
|
goto err1;
|
||||||
|
|
||||||
@@ -1210,7 +1211,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err2:
|
err2:
|
||||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
|
percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||||
err1:
|
err1:
|
||||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
||||||
return err;
|
return err;
|
||||||
@@ -1221,71 +1222,83 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
|
|||||||
if (sbi->s_proc)
|
if (sbi->s_proc)
|
||||||
remove_proc_entry("es_shrinker_info", sbi->s_proc);
|
remove_proc_entry("es_shrinker_info", sbi->s_proc);
|
||||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
|
||||||
percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
|
percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
|
||||||
unregister_shrinker(&sbi->s_es_shrinker);
|
unregister_shrinker(&sbi->s_es_shrinker);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ext4_es_lru_add(struct inode *inode)
|
/*
|
||||||
{
|
* Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
|
||||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
* most *nr_to_scan extents, update *nr_to_scan accordingly.
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
*
|
||||||
|
* Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
|
||||||
ei->i_touch_when = jiffies;
|
* Increment *nr_shrunk by the number of reclaimed extents. Also update
|
||||||
|
* ei->i_es_shrink_lblk to where we should continue scanning.
|
||||||
if (!list_empty(&ei->i_es_lru))
|
*/
|
||||||
return;
|
static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
|
||||||
|
int *nr_to_scan, int *nr_shrunk)
|
||||||
spin_lock(&sbi->s_es_lru_lock);
|
|
||||||
if (list_empty(&ei->i_es_lru))
|
|
||||||
list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
|
|
||||||
spin_unlock(&sbi->s_es_lru_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ext4_es_lru_del(struct inode *inode)
|
|
||||||
{
|
|
||||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
||||||
|
|
||||||
spin_lock(&sbi->s_es_lru_lock);
|
|
||||||
if (!list_empty(&ei->i_es_lru))
|
|
||||||
list_del_init(&ei->i_es_lru);
|
|
||||||
spin_unlock(&sbi->s_es_lru_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
|
|
||||||
int nr_to_scan)
|
|
||||||
{
|
{
|
||||||
struct inode *inode = &ei->vfs_inode;
|
struct inode *inode = &ei->vfs_inode;
|
||||||
struct ext4_es_tree *tree = &ei->i_es_tree;
|
struct ext4_es_tree *tree = &ei->i_es_tree;
|
||||||
struct rb_node *node;
|
|
||||||
struct extent_status *es;
|
struct extent_status *es;
|
||||||
unsigned long nr_shrunk = 0;
|
struct rb_node *node;
|
||||||
|
|
||||||
|
es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
|
||||||
|
if (!es)
|
||||||
|
goto out_wrap;
|
||||||
|
node = &es->rb_node;
|
||||||
|
while (*nr_to_scan > 0) {
|
||||||
|
if (es->es_lblk > end) {
|
||||||
|
ei->i_es_shrink_lblk = end + 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*nr_to_scan)--;
|
||||||
|
node = rb_next(&es->rb_node);
|
||||||
|
/*
|
||||||
|
* We can't reclaim delayed extent from status tree because
|
||||||
|
* fiemap, bigallic, and seek_data/hole need to use it.
|
||||||
|
*/
|
||||||
|
if (ext4_es_is_delayed(es))
|
||||||
|
goto next;
|
||||||
|
if (ext4_es_is_referenced(es)) {
|
||||||
|
ext4_es_clear_referenced(es);
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
rb_erase(&es->rb_node, &tree->root);
|
||||||
|
ext4_es_free_extent(inode, es);
|
||||||
|
(*nr_shrunk)++;
|
||||||
|
next:
|
||||||
|
if (!node)
|
||||||
|
goto out_wrap;
|
||||||
|
es = rb_entry(node, struct extent_status, rb_node);
|
||||||
|
}
|
||||||
|
ei->i_es_shrink_lblk = es->es_lblk;
|
||||||
|
return 1;
|
||||||
|
out_wrap:
|
||||||
|
ei->i_es_shrink_lblk = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
|
||||||
|
{
|
||||||
|
struct inode *inode = &ei->vfs_inode;
|
||||||
|
int nr_shrunk = 0;
|
||||||
|
ext4_lblk_t start = ei->i_es_shrink_lblk;
|
||||||
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
|
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
|
||||||
DEFAULT_RATELIMIT_BURST);
|
DEFAULT_RATELIMIT_BURST);
|
||||||
|
|
||||||
if (ei->i_es_lru_nr == 0)
|
if (ei->i_es_shk_nr == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
|
if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
|
||||||
__ratelimit(&_rs))
|
__ratelimit(&_rs))
|
||||||
ext4_warning(inode->i_sb, "forced shrink of precached extents");
|
ext4_warning(inode->i_sb, "forced shrink of precached extents");
|
||||||
|
|
||||||
node = rb_first(&tree->root);
|
if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
|
||||||
while (node != NULL) {
|
start != 0)
|
||||||
es = rb_entry(node, struct extent_status, rb_node);
|
es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
|
||||||
node = rb_next(&es->rb_node);
|
|
||||||
/*
|
ei->i_es_tree.cache_es = NULL;
|
||||||
* We can't reclaim delayed extent from status tree because
|
|
||||||
* fiemap, bigallic, and seek_data/hole need to use it.
|
|
||||||
*/
|
|
||||||
if (!ext4_es_is_delayed(es)) {
|
|
||||||
rb_erase(&es->rb_node, &tree->root);
|
|
||||||
ext4_es_free_extent(inode, es);
|
|
||||||
nr_shrunk++;
|
|
||||||
if (--nr_to_scan == 0)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tree->cache_es = NULL;
|
|
||||||
return nr_shrunk;
|
return nr_shrunk;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,25 +29,28 @@
|
|||||||
/*
|
/*
|
||||||
* These flags live in the high bits of extent_status.es_pblk
|
* These flags live in the high bits of extent_status.es_pblk
|
||||||
*/
|
*/
|
||||||
#define ES_SHIFT 60
|
enum {
|
||||||
|
ES_WRITTEN_B,
|
||||||
|
ES_UNWRITTEN_B,
|
||||||
|
ES_DELAYED_B,
|
||||||
|
ES_HOLE_B,
|
||||||
|
ES_REFERENCED_B,
|
||||||
|
ES_FLAGS
|
||||||
|
};
|
||||||
|
|
||||||
#define EXTENT_STATUS_WRITTEN (1 << 3)
|
#define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS)
|
||||||
#define EXTENT_STATUS_UNWRITTEN (1 << 2)
|
#define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT)
|
||||||
#define EXTENT_STATUS_DELAYED (1 << 1)
|
|
||||||
#define EXTENT_STATUS_HOLE (1 << 0)
|
|
||||||
|
|
||||||
#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
|
#define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B)
|
||||||
EXTENT_STATUS_UNWRITTEN | \
|
#define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B)
|
||||||
EXTENT_STATUS_DELAYED | \
|
#define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B)
|
||||||
EXTENT_STATUS_HOLE)
|
#define EXTENT_STATUS_HOLE (1 << ES_HOLE_B)
|
||||||
|
#define EXTENT_STATUS_REFERENCED (1 << ES_REFERENCED_B)
|
||||||
|
|
||||||
#define ES_WRITTEN (1ULL << 63)
|
#define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \
|
||||||
#define ES_UNWRITTEN (1ULL << 62)
|
EXTENT_STATUS_UNWRITTEN | \
|
||||||
#define ES_DELAYED (1ULL << 61)
|
EXTENT_STATUS_DELAYED | \
|
||||||
#define ES_HOLE (1ULL << 60)
|
EXTENT_STATUS_HOLE) << ES_SHIFT)
|
||||||
|
|
||||||
#define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \
|
|
||||||
ES_DELAYED | ES_HOLE)
|
|
||||||
|
|
||||||
struct ext4_sb_info;
|
struct ext4_sb_info;
|
||||||
struct ext4_extent;
|
struct ext4_extent;
|
||||||
@@ -65,14 +68,13 @@ struct ext4_es_tree {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct ext4_es_stats {
|
struct ext4_es_stats {
|
||||||
unsigned long es_stats_last_sorted;
|
|
||||||
unsigned long es_stats_shrunk;
|
unsigned long es_stats_shrunk;
|
||||||
unsigned long es_stats_cache_hits;
|
unsigned long es_stats_cache_hits;
|
||||||
unsigned long es_stats_cache_misses;
|
unsigned long es_stats_cache_misses;
|
||||||
u64 es_stats_scan_time;
|
u64 es_stats_scan_time;
|
||||||
u64 es_stats_max_scan_time;
|
u64 es_stats_max_scan_time;
|
||||||
struct percpu_counter es_stats_all_cnt;
|
struct percpu_counter es_stats_all_cnt;
|
||||||
struct percpu_counter es_stats_lru_cnt;
|
struct percpu_counter es_stats_shk_cnt;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern int __init ext4_init_es(void);
|
extern int __init ext4_init_es(void);
|
||||||
@@ -93,29 +95,49 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode,
|
|||||||
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
|
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
struct extent_status *es);
|
struct extent_status *es);
|
||||||
|
|
||||||
|
static inline unsigned int ext4_es_status(struct extent_status *es)
|
||||||
|
{
|
||||||
|
return es->es_pblk >> ES_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int ext4_es_type(struct extent_status *es)
|
||||||
|
{
|
||||||
|
return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int ext4_es_is_written(struct extent_status *es)
|
static inline int ext4_es_is_written(struct extent_status *es)
|
||||||
{
|
{
|
||||||
return (es->es_pblk & ES_WRITTEN) != 0;
|
return (ext4_es_type(es) & EXTENT_STATUS_WRITTEN) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int ext4_es_is_unwritten(struct extent_status *es)
|
static inline int ext4_es_is_unwritten(struct extent_status *es)
|
||||||
{
|
{
|
||||||
return (es->es_pblk & ES_UNWRITTEN) != 0;
|
return (ext4_es_type(es) & EXTENT_STATUS_UNWRITTEN) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int ext4_es_is_delayed(struct extent_status *es)
|
static inline int ext4_es_is_delayed(struct extent_status *es)
|
||||||
{
|
{
|
||||||
return (es->es_pblk & ES_DELAYED) != 0;
|
return (ext4_es_type(es) & EXTENT_STATUS_DELAYED) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int ext4_es_is_hole(struct extent_status *es)
|
static inline int ext4_es_is_hole(struct extent_status *es)
|
||||||
{
|
{
|
||||||
return (es->es_pblk & ES_HOLE) != 0;
|
return (ext4_es_type(es) & EXTENT_STATUS_HOLE) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int ext4_es_status(struct extent_status *es)
|
static inline void ext4_es_set_referenced(struct extent_status *es)
|
||||||
{
|
{
|
||||||
return es->es_pblk >> ES_SHIFT;
|
es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ext4_es_clear_referenced(struct extent_status *es)
|
||||||
|
{
|
||||||
|
es->es_pblk &= ~(((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ext4_es_is_referenced(struct extent_status *es)
|
||||||
|
{
|
||||||
|
return (ext4_es_status(es) & EXTENT_STATUS_REFERENCED) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
|
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
|
||||||
@@ -135,23 +157,19 @@ static inline void ext4_es_store_pblock(struct extent_status *es,
|
|||||||
static inline void ext4_es_store_status(struct extent_status *es,
|
static inline void ext4_es_store_status(struct extent_status *es,
|
||||||
unsigned int status)
|
unsigned int status)
|
||||||
{
|
{
|
||||||
es->es_pblk = (((ext4_fsblk_t)
|
es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
|
||||||
(status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
|
(es->es_pblk & ~ES_MASK);
|
||||||
(es->es_pblk & ~ES_MASK));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ext4_es_store_pblock_status(struct extent_status *es,
|
static inline void ext4_es_store_pblock_status(struct extent_status *es,
|
||||||
ext4_fsblk_t pb,
|
ext4_fsblk_t pb,
|
||||||
unsigned int status)
|
unsigned int status)
|
||||||
{
|
{
|
||||||
es->es_pblk = (((ext4_fsblk_t)
|
es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
|
||||||
(status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
|
(pb & ~ES_MASK);
|
||||||
(pb & ~ES_MASK));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
|
extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
|
||||||
extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
|
extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
|
||||||
extern void ext4_es_lru_add(struct inode *inode);
|
|
||||||
extern void ext4_es_lru_del(struct inode *inode);
|
|
||||||
|
|
||||||
#endif /* _EXT4_EXTENTS_STATUS_H */
|
#endif /* _EXT4_EXTENTS_STATUS_H */
|
||||||
|
|||||||
222
fs/ext4/file.c
222
fs/ext4/file.c
@@ -273,24 +273,19 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
|
|||||||
* we determine this extent as a data or a hole according to whether the
|
* we determine this extent as a data or a hole according to whether the
|
||||||
* page cache has data or not.
|
* page cache has data or not.
|
||||||
*/
|
*/
|
||||||
static int ext4_find_unwritten_pgoff(struct inode *inode,
|
static int ext4_find_unwritten_pgoff(struct inode *inode, int whence,
|
||||||
int whence,
|
loff_t endoff, loff_t *offset)
|
||||||
struct ext4_map_blocks *map,
|
|
||||||
loff_t *offset)
|
|
||||||
{
|
{
|
||||||
struct pagevec pvec;
|
struct pagevec pvec;
|
||||||
unsigned int blkbits;
|
|
||||||
pgoff_t index;
|
pgoff_t index;
|
||||||
pgoff_t end;
|
pgoff_t end;
|
||||||
loff_t endoff;
|
|
||||||
loff_t startoff;
|
loff_t startoff;
|
||||||
loff_t lastoff;
|
loff_t lastoff;
|
||||||
int found = 0;
|
int found = 0;
|
||||||
|
|
||||||
blkbits = inode->i_sb->s_blocksize_bits;
|
|
||||||
startoff = *offset;
|
startoff = *offset;
|
||||||
lastoff = startoff;
|
lastoff = startoff;
|
||||||
endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
|
|
||||||
|
|
||||||
index = startoff >> PAGE_CACHE_SHIFT;
|
index = startoff >> PAGE_CACHE_SHIFT;
|
||||||
end = endoff >> PAGE_CACHE_SHIFT;
|
end = endoff >> PAGE_CACHE_SHIFT;
|
||||||
@@ -408,147 +403,144 @@ out:
|
|||||||
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
{
|
{
|
||||||
struct inode *inode = file->f_mapping->host;
|
struct inode *inode = file->f_mapping->host;
|
||||||
struct ext4_map_blocks map;
|
struct fiemap_extent_info fie;
|
||||||
struct extent_status es;
|
struct fiemap_extent ext[2];
|
||||||
ext4_lblk_t start, last, end;
|
loff_t next;
|
||||||
loff_t dataoff, isize;
|
int i, ret = 0;
|
||||||
int blkbits;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
mutex_lock(&inode->i_mutex);
|
mutex_lock(&inode->i_mutex);
|
||||||
|
if (offset >= inode->i_size) {
|
||||||
isize = i_size_read(inode);
|
|
||||||
if (offset >= isize) {
|
|
||||||
mutex_unlock(&inode->i_mutex);
|
mutex_unlock(&inode->i_mutex);
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
|
fie.fi_flags = 0;
|
||||||
|
fie.fi_extents_max = 2;
|
||||||
|
fie.fi_extents_start = (struct fiemap_extent __user *) &ext;
|
||||||
|
while (1) {
|
||||||
|
mm_segment_t old_fs = get_fs();
|
||||||
|
|
||||||
blkbits = inode->i_sb->s_blocksize_bits;
|
fie.fi_extents_mapped = 0;
|
||||||
start = offset >> blkbits;
|
memset(ext, 0, sizeof(*ext) * fie.fi_extents_max);
|
||||||
last = start;
|
|
||||||
end = isize >> blkbits;
|
|
||||||
dataoff = offset;
|
|
||||||
|
|
||||||
do {
|
set_fs(get_ds());
|
||||||
map.m_lblk = last;
|
ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
|
||||||
map.m_len = end - last + 1;
|
set_fs(old_fs);
|
||||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
if (ret)
|
||||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
break;
|
||||||
if (last != start)
|
|
||||||
dataoff = (loff_t)last << blkbits;
|
/* No extents found, EOF */
|
||||||
|
if (!fie.fi_extents_mapped) {
|
||||||
|
ret = -ENXIO;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
for (i = 0; i < fie.fi_extents_mapped; i++) {
|
||||||
|
next = (loff_t)(ext[i].fe_length + ext[i].fe_logical);
|
||||||
|
|
||||||
/*
|
if (offset < (loff_t)ext[i].fe_logical)
|
||||||
* If there is a delay extent at this offset,
|
offset = (loff_t)ext[i].fe_logical;
|
||||||
* it will be as a data.
|
/*
|
||||||
*/
|
* If extent is not unwritten, then it contains valid
|
||||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
* data, mapped or delayed.
|
||||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
*/
|
||||||
if (last != start)
|
if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN))
|
||||||
dataoff = (loff_t)last << blkbits;
|
goto out;
|
||||||
break;
|
|
||||||
|
/*
|
||||||
|
* If there is a unwritten extent at this offset,
|
||||||
|
* it will be as a data or a hole according to page
|
||||||
|
* cache that has data or not.
|
||||||
|
*/
|
||||||
|
if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
||||||
|
next, &offset))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
|
||||||
|
ret = -ENXIO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
offset = next;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
/*
|
if (offset > inode->i_size)
|
||||||
* If there is a unwritten extent at this offset,
|
offset = inode->i_size;
|
||||||
* it will be as a data or a hole according to page
|
out:
|
||||||
* cache that has data or not.
|
|
||||||
*/
|
|
||||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
|
||||||
int unwritten;
|
|
||||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
|
||||||
&map, &dataoff);
|
|
||||||
if (unwritten)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
last++;
|
|
||||||
dataoff = (loff_t)last << blkbits;
|
|
||||||
} while (last <= end);
|
|
||||||
|
|
||||||
mutex_unlock(&inode->i_mutex);
|
mutex_unlock(&inode->i_mutex);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (dataoff > isize)
|
return vfs_setpos(file, offset, maxsize);
|
||||||
return -ENXIO;
|
|
||||||
|
|
||||||
return vfs_setpos(file, dataoff, maxsize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext4_seek_hole() retrieves the offset for SEEK_HOLE.
|
* ext4_seek_hole() retrieves the offset for SEEK_HOLE
|
||||||
*/
|
*/
|
||||||
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
{
|
{
|
||||||
struct inode *inode = file->f_mapping->host;
|
struct inode *inode = file->f_mapping->host;
|
||||||
struct ext4_map_blocks map;
|
struct fiemap_extent_info fie;
|
||||||
struct extent_status es;
|
struct fiemap_extent ext[2];
|
||||||
ext4_lblk_t start, last, end;
|
loff_t next;
|
||||||
loff_t holeoff, isize;
|
int i, ret = 0;
|
||||||
int blkbits;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
mutex_lock(&inode->i_mutex);
|
mutex_lock(&inode->i_mutex);
|
||||||
|
if (offset >= inode->i_size) {
|
||||||
isize = i_size_read(inode);
|
|
||||||
if (offset >= isize) {
|
|
||||||
mutex_unlock(&inode->i_mutex);
|
mutex_unlock(&inode->i_mutex);
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
blkbits = inode->i_sb->s_blocksize_bits;
|
fie.fi_flags = 0;
|
||||||
start = offset >> blkbits;
|
fie.fi_extents_max = 2;
|
||||||
last = start;
|
fie.fi_extents_start = (struct fiemap_extent __user *)&ext;
|
||||||
end = isize >> blkbits;
|
while (1) {
|
||||||
holeoff = offset;
|
mm_segment_t old_fs = get_fs();
|
||||||
|
|
||||||
do {
|
fie.fi_extents_mapped = 0;
|
||||||
map.m_lblk = last;
|
memset(ext, 0, sizeof(*ext));
|
||||||
map.m_len = end - last + 1;
|
|
||||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
|
||||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
|
||||||
last += ret;
|
|
||||||
holeoff = (loff_t)last << blkbits;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
set_fs(get_ds());
|
||||||
* If there is a delay extent at this offset,
|
ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
|
||||||
* we will skip this extent.
|
set_fs(old_fs);
|
||||||
*/
|
if (ret)
|
||||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
break;
|
||||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
|
||||||
last = es.es_lblk + es.es_len;
|
|
||||||
holeoff = (loff_t)last << blkbits;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/* No extents found */
|
||||||
* If there is a unwritten extent at this offset,
|
if (!fie.fi_extents_mapped)
|
||||||
* it will be as a data or a hole according to page
|
break;
|
||||||
* cache that has data or not.
|
|
||||||
*/
|
for (i = 0; i < fie.fi_extents_mapped; i++) {
|
||||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
next = (loff_t)(ext[i].fe_logical + ext[i].fe_length);
|
||||||
int unwritten;
|
/*
|
||||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
* If extent is not unwritten, then it contains valid
|
||||||
&map, &holeoff);
|
* data, mapped or delayed.
|
||||||
if (!unwritten) {
|
*/
|
||||||
last += ret;
|
if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
|
||||||
holeoff = (loff_t)last << blkbits;
|
if (offset < (loff_t)ext[i].fe_logical)
|
||||||
|
goto out;
|
||||||
|
offset = next;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* If there is a unwritten extent at this offset,
|
||||||
|
* it will be as a data or a hole according to page
|
||||||
|
* cache that has data or not.
|
||||||
|
*/
|
||||||
|
if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
||||||
|
next, &offset))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
offset = next;
|
||||||
|
if (ext[i].fe_flags & FIEMAP_EXTENT_LAST)
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
/* find a hole */
|
if (offset > inode->i_size)
|
||||||
break;
|
offset = inode->i_size;
|
||||||
} while (last <= end);
|
out:
|
||||||
|
|
||||||
mutex_unlock(&inode->i_mutex);
|
mutex_unlock(&inode->i_mutex);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (holeoff > isize)
|
return vfs_setpos(file, offset, maxsize);
|
||||||
holeoff = isize;
|
|
||||||
|
|
||||||
return vfs_setpos(file, holeoff, maxsize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -811,8 +811,11 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
|
|||||||
ret = __block_write_begin(page, 0, inline_size,
|
ret = __block_write_begin(page, 0, inline_size,
|
||||||
ext4_da_get_block_prep);
|
ext4_da_get_block_prep);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
up_read(&EXT4_I(inode)->xattr_sem);
|
||||||
|
unlock_page(page);
|
||||||
|
page_cache_release(page);
|
||||||
ext4_truncate_failed_write(inode);
|
ext4_truncate_failed_write(inode);
|
||||||
goto out;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
SetPageDirty(page);
|
SetPageDirty(page);
|
||||||
@@ -870,6 +873,12 @@ retry_journal:
|
|||||||
goto out_journal;
|
goto out_journal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We cannot recurse into the filesystem as the transaction
|
||||||
|
* is already started.
|
||||||
|
*/
|
||||||
|
flags |= AOP_FLAG_NOFS;
|
||||||
|
|
||||||
if (ret == -ENOSPC) {
|
if (ret == -ENOSPC) {
|
||||||
ret = ext4_da_convert_inline_data_to_extent(mapping,
|
ret = ext4_da_convert_inline_data_to_extent(mapping,
|
||||||
inode,
|
inode,
|
||||||
@@ -882,11 +891,6 @@ retry_journal:
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* We cannot recurse into the filesystem as the transaction
|
|
||||||
* is already started.
|
|
||||||
*/
|
|
||||||
flags |= AOP_FLAG_NOFS;
|
|
||||||
|
|
||||||
page = grab_cache_page_write_begin(mapping, 0, flags);
|
page = grab_cache_page_write_begin(mapping, 0, flags);
|
||||||
if (!page) {
|
if (!page) {
|
||||||
@@ -1807,11 +1811,12 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
|
|||||||
|
|
||||||
int ext4_inline_data_fiemap(struct inode *inode,
|
int ext4_inline_data_fiemap(struct inode *inode,
|
||||||
struct fiemap_extent_info *fieinfo,
|
struct fiemap_extent_info *fieinfo,
|
||||||
int *has_inline)
|
int *has_inline, __u64 start, __u64 len)
|
||||||
{
|
{
|
||||||
__u64 physical = 0;
|
__u64 physical = 0;
|
||||||
__u64 length;
|
__u64 inline_len;
|
||||||
__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST;
|
__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
|
||||||
|
FIEMAP_EXTENT_LAST;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
struct ext4_iloc iloc;
|
struct ext4_iloc iloc;
|
||||||
|
|
||||||
@@ -1820,6 +1825,13 @@ int ext4_inline_data_fiemap(struct inode *inode,
|
|||||||
*has_inline = 0;
|
*has_inline = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
inline_len = min_t(size_t, ext4_get_inline_size(inode),
|
||||||
|
i_size_read(inode));
|
||||||
|
if (start >= inline_len)
|
||||||
|
goto out;
|
||||||
|
if (start + len < inline_len)
|
||||||
|
inline_len = start + len;
|
||||||
|
inline_len -= start;
|
||||||
|
|
||||||
error = ext4_get_inode_loc(inode, &iloc);
|
error = ext4_get_inode_loc(inode, &iloc);
|
||||||
if (error)
|
if (error)
|
||||||
@@ -1828,11 +1840,10 @@ int ext4_inline_data_fiemap(struct inode *inode,
|
|||||||
physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
|
physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
|
||||||
physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
|
physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
|
||||||
physical += offsetof(struct ext4_inode, i_block);
|
physical += offsetof(struct ext4_inode, i_block);
|
||||||
length = i_size_read(inode);
|
|
||||||
|
|
||||||
if (physical)
|
if (physical)
|
||||||
error = fiemap_fill_next_extent(fieinfo, 0, physical,
|
error = fiemap_fill_next_extent(fieinfo, start, physical,
|
||||||
length, flags);
|
inline_len, flags);
|
||||||
brelse(iloc.bh);
|
brelse(iloc.bh);
|
||||||
out:
|
out:
|
||||||
up_read(&EXT4_I(inode)->xattr_sem);
|
up_read(&EXT4_I(inode)->xattr_sem);
|
||||||
|
|||||||
@@ -416,11 +416,6 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
|
|||||||
}
|
}
|
||||||
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
|
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
|
||||||
up_read((&EXT4_I(inode)->i_data_sem));
|
up_read((&EXT4_I(inode)->i_data_sem));
|
||||||
/*
|
|
||||||
* Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
|
|
||||||
* because it shouldn't be marked in es_map->m_flags.
|
|
||||||
*/
|
|
||||||
map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't check m_len because extent will be collpased in status
|
* We don't check m_len because extent will be collpased in status
|
||||||
@@ -491,7 +486,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
|
|
||||||
/* Lookup extent status tree firstly */
|
/* Lookup extent status tree firstly */
|
||||||
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
|
if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
|
||||||
ext4_es_lru_add(inode);
|
|
||||||
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
|
if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
|
||||||
map->m_pblk = ext4_es_pblock(&es) +
|
map->m_pblk = ext4_es_pblock(&es) +
|
||||||
map->m_lblk - es.es_lblk;
|
map->m_lblk - es.es_lblk;
|
||||||
@@ -1393,7 +1387,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
|
|||||||
|
|
||||||
/* Lookup extent status tree firstly */
|
/* Lookup extent status tree firstly */
|
||||||
if (ext4_es_lookup_extent(inode, iblock, &es)) {
|
if (ext4_es_lookup_extent(inode, iblock, &es)) {
|
||||||
ext4_es_lru_add(inode);
|
|
||||||
if (ext4_es_is_hole(&es)) {
|
if (ext4_es_is_hole(&es)) {
|
||||||
retval = 0;
|
retval = 0;
|
||||||
down_read(&EXT4_I(inode)->i_data_sem);
|
down_read(&EXT4_I(inode)->i_data_sem);
|
||||||
@@ -1434,24 +1427,12 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
|
|||||||
* file system block.
|
* file system block.
|
||||||
*/
|
*/
|
||||||
down_read(&EXT4_I(inode)->i_data_sem);
|
down_read(&EXT4_I(inode)->i_data_sem);
|
||||||
if (ext4_has_inline_data(inode)) {
|
if (ext4_has_inline_data(inode))
|
||||||
/*
|
|
||||||
* We will soon create blocks for this page, and let
|
|
||||||
* us pretend as if the blocks aren't allocated yet.
|
|
||||||
* In case of clusters, we have to handle the work
|
|
||||||
* of mapping from cluster so that the reserved space
|
|
||||||
* is calculated properly.
|
|
||||||
*/
|
|
||||||
if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
|
|
||||||
ext4_find_delalloc_cluster(inode, map->m_lblk))
|
|
||||||
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
|
||||||
retval = 0;
|
retval = 0;
|
||||||
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||||
retval = ext4_ext_map_blocks(NULL, inode, map,
|
retval = ext4_ext_map_blocks(NULL, inode, map, 0);
|
||||||
EXT4_GET_BLOCKS_NO_PUT_HOLE);
|
|
||||||
else
|
else
|
||||||
retval = ext4_ind_map_blocks(NULL, inode, map,
|
retval = ext4_ind_map_blocks(NULL, inode, map, 0);
|
||||||
EXT4_GET_BLOCKS_NO_PUT_HOLE);
|
|
||||||
|
|
||||||
add_delayed:
|
add_delayed:
|
||||||
if (retval == 0) {
|
if (retval == 0) {
|
||||||
@@ -1465,7 +1446,8 @@ add_delayed:
|
|||||||
* then we don't need to reserve it again. However we still need
|
* then we don't need to reserve it again. However we still need
|
||||||
* to reserve metadata for every block we're going to write.
|
* to reserve metadata for every block we're going to write.
|
||||||
*/
|
*/
|
||||||
if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
|
if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
|
||||||
|
!ext4_find_delalloc_cluster(inode, map->m_lblk)) {
|
||||||
ret = ext4_da_reserve_space(inode, iblock);
|
ret = ext4_da_reserve_space(inode, iblock);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/* not enough space to reserve */
|
/* not enough space to reserve */
|
||||||
@@ -1481,11 +1463,6 @@ add_delayed:
|
|||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
|
|
||||||
* and it should not appear on the bh->b_state.
|
|
||||||
*/
|
|
||||||
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
|
|
||||||
|
|
||||||
map_bh(bh, inode->i_sb, invalid_block);
|
map_bh(bh, inode->i_sb, invalid_block);
|
||||||
set_buffer_new(bh);
|
set_buffer_new(bh);
|
||||||
set_buffer_delay(bh);
|
set_buffer_delay(bh);
|
||||||
@@ -3643,7 +3620,7 @@ out_stop:
|
|||||||
* If this was a simple ftruncate() and the file will remain alive,
|
* If this was a simple ftruncate() and the file will remain alive,
|
||||||
* then we need to clear up the orphan record which we created above.
|
* then we need to clear up the orphan record which we created above.
|
||||||
* However, if this was a real unlink then we were called by
|
* However, if this was a real unlink then we were called by
|
||||||
* ext4_delete_inode(), and we allow that function to clean up the
|
* ext4_evict_inode(), and we allow that function to clean up the
|
||||||
* orphan info for us.
|
* orphan info for us.
|
||||||
*/
|
*/
|
||||||
if (inode->i_nlink)
|
if (inode->i_nlink)
|
||||||
|
|||||||
@@ -78,8 +78,6 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
|
|||||||
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
|
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
|
||||||
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
|
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
|
||||||
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
|
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
|
||||||
ext4_es_lru_del(inode1);
|
|
||||||
ext4_es_lru_del(inode2);
|
|
||||||
|
|
||||||
isize = i_size_read(inode1);
|
isize = i_size_read(inode1);
|
||||||
i_size_write(inode1, i_size_read(inode2));
|
i_size_write(inode1, i_size_read(inode2));
|
||||||
|
|||||||
@@ -2358,7 +2358,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
|
|||||||
if (sbi->s_group_info) {
|
if (sbi->s_group_info) {
|
||||||
memcpy(new_groupinfo, sbi->s_group_info,
|
memcpy(new_groupinfo, sbi->s_group_info,
|
||||||
sbi->s_group_info_size * sizeof(*sbi->s_group_info));
|
sbi->s_group_info_size * sizeof(*sbi->s_group_info));
|
||||||
ext4_kvfree(sbi->s_group_info);
|
kvfree(sbi->s_group_info);
|
||||||
}
|
}
|
||||||
sbi->s_group_info = new_groupinfo;
|
sbi->s_group_info = new_groupinfo;
|
||||||
sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
|
sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
|
||||||
@@ -2385,7 +2385,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
|||||||
if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
|
if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
|
||||||
metalen = sizeof(*meta_group_info) <<
|
metalen = sizeof(*meta_group_info) <<
|
||||||
EXT4_DESC_PER_BLOCK_BITS(sb);
|
EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||||
meta_group_info = kmalloc(metalen, GFP_KERNEL);
|
meta_group_info = kmalloc(metalen, GFP_NOFS);
|
||||||
if (meta_group_info == NULL) {
|
if (meta_group_info == NULL) {
|
||||||
ext4_msg(sb, KERN_ERR, "can't allocate mem "
|
ext4_msg(sb, KERN_ERR, "can't allocate mem "
|
||||||
"for a buddy group");
|
"for a buddy group");
|
||||||
@@ -2399,7 +2399,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
|||||||
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
|
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
|
||||||
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
|
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
|
||||||
|
|
||||||
meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
|
meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
|
||||||
if (meta_group_info[i] == NULL) {
|
if (meta_group_info[i] == NULL) {
|
||||||
ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
|
ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
|
||||||
goto exit_group_info;
|
goto exit_group_info;
|
||||||
@@ -2428,7 +2428,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
|||||||
{
|
{
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
meta_group_info[i]->bb_bitmap =
|
meta_group_info[i]->bb_bitmap =
|
||||||
kmalloc(sb->s_blocksize, GFP_KERNEL);
|
kmalloc(sb->s_blocksize, GFP_NOFS);
|
||||||
BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
|
BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
|
||||||
bh = ext4_read_block_bitmap(sb, group);
|
bh = ext4_read_block_bitmap(sb, group);
|
||||||
BUG_ON(bh == NULL);
|
BUG_ON(bh == NULL);
|
||||||
@@ -2495,7 +2495,7 @@ err_freebuddy:
|
|||||||
kfree(sbi->s_group_info[i]);
|
kfree(sbi->s_group_info[i]);
|
||||||
iput(sbi->s_buddy_cache);
|
iput(sbi->s_buddy_cache);
|
||||||
err_freesgi:
|
err_freesgi:
|
||||||
ext4_kvfree(sbi->s_group_info);
|
kvfree(sbi->s_group_info);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2708,12 +2708,11 @@ int ext4_mb_release(struct super_block *sb)
|
|||||||
EXT4_DESC_PER_BLOCK_BITS(sb);
|
EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||||
for (i = 0; i < num_meta_group_infos; i++)
|
for (i = 0; i < num_meta_group_infos; i++)
|
||||||
kfree(sbi->s_group_info[i]);
|
kfree(sbi->s_group_info[i]);
|
||||||
ext4_kvfree(sbi->s_group_info);
|
kvfree(sbi->s_group_info);
|
||||||
}
|
}
|
||||||
kfree(sbi->s_mb_offsets);
|
kfree(sbi->s_mb_offsets);
|
||||||
kfree(sbi->s_mb_maxs);
|
kfree(sbi->s_mb_maxs);
|
||||||
if (sbi->s_buddy_cache)
|
iput(sbi->s_buddy_cache);
|
||||||
iput(sbi->s_buddy_cache);
|
|
||||||
if (sbi->s_mb_stats) {
|
if (sbi->s_mb_stats) {
|
||||||
ext4_msg(sb, KERN_INFO,
|
ext4_msg(sb, KERN_INFO,
|
||||||
"mballoc: %u blocks %u reqs (%u success)",
|
"mballoc: %u blocks %u reqs (%u success)",
|
||||||
|
|||||||
@@ -592,7 +592,7 @@ err_out:
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* set the i_blocks count to zero
|
* set the i_blocks count to zero
|
||||||
* so that the ext4_delete_inode does the
|
* so that the ext4_evict_inode() does the
|
||||||
* right job
|
* right job
|
||||||
*
|
*
|
||||||
* We don't need to take the i_lock because
|
* We don't need to take the i_lock because
|
||||||
|
|||||||
@@ -273,6 +273,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
|||||||
int replaced_count = 0;
|
int replaced_count = 0;
|
||||||
int from = data_offset_in_page << orig_inode->i_blkbits;
|
int from = data_offset_in_page << orig_inode->i_blkbits;
|
||||||
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
|
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
|
||||||
|
struct super_block *sb = orig_inode->i_sb;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* It needs twice the amount of ordinary journal buffers because
|
* It needs twice the amount of ordinary journal buffers because
|
||||||
@@ -405,10 +406,13 @@ unlock_pages:
|
|||||||
page_cache_release(pagep[1]);
|
page_cache_release(pagep[1]);
|
||||||
stop_journal:
|
stop_journal:
|
||||||
ext4_journal_stop(handle);
|
ext4_journal_stop(handle);
|
||||||
|
if (*err == -ENOSPC &&
|
||||||
|
ext4_should_retry_alloc(sb, &retries))
|
||||||
|
goto again;
|
||||||
/* Buffer was busy because probably is pinned to journal transaction,
|
/* Buffer was busy because probably is pinned to journal transaction,
|
||||||
* force transaction commit may help to free it. */
|
* force transaction commit may help to free it. */
|
||||||
if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
|
if (*err == -EBUSY && retries++ < 4 && EXT4_SB(sb)->s_journal &&
|
||||||
&retries))
|
jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal))
|
||||||
goto again;
|
goto again;
|
||||||
return replaced_count;
|
return replaced_count;
|
||||||
|
|
||||||
|
|||||||
@@ -2814,7 +2814,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
|
|||||||
ext4_orphan_add(handle, inode);
|
ext4_orphan_add(handle, inode);
|
||||||
inode->i_ctime = ext4_current_time(inode);
|
inode->i_ctime = ext4_current_time(inode);
|
||||||
ext4_mark_inode_dirty(handle, inode);
|
ext4_mark_inode_dirty(handle, inode);
|
||||||
retval = 0;
|
|
||||||
|
|
||||||
end_unlink:
|
end_unlink:
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
|
|||||||
@@ -856,7 +856,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
|
|||||||
n_group_desc[gdb_num] = gdb_bh;
|
n_group_desc[gdb_num] = gdb_bh;
|
||||||
EXT4_SB(sb)->s_group_desc = n_group_desc;
|
EXT4_SB(sb)->s_group_desc = n_group_desc;
|
||||||
EXT4_SB(sb)->s_gdb_count++;
|
EXT4_SB(sb)->s_gdb_count++;
|
||||||
ext4_kvfree(o_group_desc);
|
kvfree(o_group_desc);
|
||||||
|
|
||||||
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
|
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
|
||||||
err = ext4_handle_dirty_super(handle, sb);
|
err = ext4_handle_dirty_super(handle, sb);
|
||||||
@@ -866,7 +866,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
|
|||||||
return err;
|
return err;
|
||||||
|
|
||||||
exit_inode:
|
exit_inode:
|
||||||
ext4_kvfree(n_group_desc);
|
kvfree(n_group_desc);
|
||||||
brelse(iloc.bh);
|
brelse(iloc.bh);
|
||||||
exit_dind:
|
exit_dind:
|
||||||
brelse(dind);
|
brelse(dind);
|
||||||
@@ -909,7 +909,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
|
|||||||
n_group_desc[gdb_num] = gdb_bh;
|
n_group_desc[gdb_num] = gdb_bh;
|
||||||
EXT4_SB(sb)->s_group_desc = n_group_desc;
|
EXT4_SB(sb)->s_group_desc = n_group_desc;
|
||||||
EXT4_SB(sb)->s_gdb_count++;
|
EXT4_SB(sb)->s_gdb_count++;
|
||||||
ext4_kvfree(o_group_desc);
|
kvfree(o_group_desc);
|
||||||
BUFFER_TRACE(gdb_bh, "get_write_access");
|
BUFFER_TRACE(gdb_bh, "get_write_access");
|
||||||
err = ext4_journal_get_write_access(handle, gdb_bh);
|
err = ext4_journal_get_write_access(handle, gdb_bh);
|
||||||
if (unlikely(err))
|
if (unlikely(err))
|
||||||
|
|||||||
@@ -176,15 +176,6 @@ void *ext4_kvzalloc(size_t size, gfp_t flags)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ext4_kvfree(void *ptr)
|
|
||||||
{
|
|
||||||
if (is_vmalloc_addr(ptr))
|
|
||||||
vfree(ptr);
|
|
||||||
else
|
|
||||||
kfree(ptr);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
|
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
|
||||||
struct ext4_group_desc *bg)
|
struct ext4_group_desc *bg)
|
||||||
{
|
{
|
||||||
@@ -811,8 +802,8 @@ static void ext4_put_super(struct super_block *sb)
|
|||||||
|
|
||||||
for (i = 0; i < sbi->s_gdb_count; i++)
|
for (i = 0; i < sbi->s_gdb_count; i++)
|
||||||
brelse(sbi->s_group_desc[i]);
|
brelse(sbi->s_group_desc[i]);
|
||||||
ext4_kvfree(sbi->s_group_desc);
|
kvfree(sbi->s_group_desc);
|
||||||
ext4_kvfree(sbi->s_flex_groups);
|
kvfree(sbi->s_flex_groups);
|
||||||
percpu_counter_destroy(&sbi->s_freeclusters_counter);
|
percpu_counter_destroy(&sbi->s_freeclusters_counter);
|
||||||
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
||||||
percpu_counter_destroy(&sbi->s_dirs_counter);
|
percpu_counter_destroy(&sbi->s_dirs_counter);
|
||||||
@@ -880,10 +871,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
|||||||
spin_lock_init(&ei->i_prealloc_lock);
|
spin_lock_init(&ei->i_prealloc_lock);
|
||||||
ext4_es_init_tree(&ei->i_es_tree);
|
ext4_es_init_tree(&ei->i_es_tree);
|
||||||
rwlock_init(&ei->i_es_lock);
|
rwlock_init(&ei->i_es_lock);
|
||||||
INIT_LIST_HEAD(&ei->i_es_lru);
|
INIT_LIST_HEAD(&ei->i_es_list);
|
||||||
ei->i_es_all_nr = 0;
|
ei->i_es_all_nr = 0;
|
||||||
ei->i_es_lru_nr = 0;
|
ei->i_es_shk_nr = 0;
|
||||||
ei->i_touch_when = 0;
|
ei->i_es_shrink_lblk = 0;
|
||||||
ei->i_reserved_data_blocks = 0;
|
ei->i_reserved_data_blocks = 0;
|
||||||
ei->i_reserved_meta_blocks = 0;
|
ei->i_reserved_meta_blocks = 0;
|
||||||
ei->i_allocated_meta_blocks = 0;
|
ei->i_allocated_meta_blocks = 0;
|
||||||
@@ -973,7 +964,6 @@ void ext4_clear_inode(struct inode *inode)
|
|||||||
dquot_drop(inode);
|
dquot_drop(inode);
|
||||||
ext4_discard_preallocations(inode);
|
ext4_discard_preallocations(inode);
|
||||||
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
|
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
|
||||||
ext4_es_lru_del(inode);
|
|
||||||
if (EXT4_I(inode)->jinode) {
|
if (EXT4_I(inode)->jinode) {
|
||||||
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
|
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
|
||||||
EXT4_I(inode)->jinode);
|
EXT4_I(inode)->jinode);
|
||||||
@@ -1153,7 +1143,7 @@ enum {
|
|||||||
Opt_inode_readahead_blks, Opt_journal_ioprio,
|
Opt_inode_readahead_blks, Opt_journal_ioprio,
|
||||||
Opt_dioread_nolock, Opt_dioread_lock,
|
Opt_dioread_nolock, Opt_dioread_lock,
|
||||||
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
|
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
|
||||||
Opt_max_dir_size_kb,
|
Opt_max_dir_size_kb, Opt_nojournal_checksum,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const match_table_t tokens = {
|
static const match_table_t tokens = {
|
||||||
@@ -1187,6 +1177,7 @@ static const match_table_t tokens = {
|
|||||||
{Opt_journal_dev, "journal_dev=%u"},
|
{Opt_journal_dev, "journal_dev=%u"},
|
||||||
{Opt_journal_path, "journal_path=%s"},
|
{Opt_journal_path, "journal_path=%s"},
|
||||||
{Opt_journal_checksum, "journal_checksum"},
|
{Opt_journal_checksum, "journal_checksum"},
|
||||||
|
{Opt_nojournal_checksum, "nojournal_checksum"},
|
||||||
{Opt_journal_async_commit, "journal_async_commit"},
|
{Opt_journal_async_commit, "journal_async_commit"},
|
||||||
{Opt_abort, "abort"},
|
{Opt_abort, "abort"},
|
||||||
{Opt_data_journal, "data=journal"},
|
{Opt_data_journal, "data=journal"},
|
||||||
@@ -1368,6 +1359,8 @@ static const struct mount_opts {
|
|||||||
MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
|
MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
|
||||||
{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
|
{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
|
||||||
MOPT_EXT4_ONLY | MOPT_CLEAR},
|
MOPT_EXT4_ONLY | MOPT_CLEAR},
|
||||||
|
{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
|
||||||
|
MOPT_EXT4_ONLY | MOPT_CLEAR},
|
||||||
{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
|
{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
|
||||||
MOPT_EXT4_ONLY | MOPT_SET},
|
MOPT_EXT4_ONLY | MOPT_SET},
|
||||||
{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
|
{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
|
||||||
@@ -1709,6 +1702,12 @@ static int parse_options(char *options, struct super_block *sb,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
|
||||||
|
test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
|
||||||
|
ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit "
|
||||||
|
"in data=ordered mode");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1946,7 +1945,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
|
|||||||
memcpy(new_groups, sbi->s_flex_groups,
|
memcpy(new_groups, sbi->s_flex_groups,
|
||||||
(sbi->s_flex_groups_allocated *
|
(sbi->s_flex_groups_allocated *
|
||||||
sizeof(struct flex_groups)));
|
sizeof(struct flex_groups)));
|
||||||
ext4_kvfree(sbi->s_flex_groups);
|
kvfree(sbi->s_flex_groups);
|
||||||
}
|
}
|
||||||
sbi->s_flex_groups = new_groups;
|
sbi->s_flex_groups = new_groups;
|
||||||
sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
|
sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
|
||||||
@@ -3317,7 +3316,7 @@ int ext4_calculate_overhead(struct super_block *sb)
|
|||||||
struct ext4_super_block *es = sbi->s_es;
|
struct ext4_super_block *es = sbi->s_es;
|
||||||
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
|
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
|
||||||
ext4_fsblk_t overhead = 0;
|
ext4_fsblk_t overhead = 0;
|
||||||
char *buf = (char *) get_zeroed_page(GFP_KERNEL);
|
char *buf = (char *) get_zeroed_page(GFP_NOFS);
|
||||||
|
|
||||||
if (!buf)
|
if (!buf)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@@ -3345,8 +3344,8 @@ int ext4_calculate_overhead(struct super_block *sb)
|
|||||||
memset(buf, 0, PAGE_SIZE);
|
memset(buf, 0, PAGE_SIZE);
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
/* Add the journal blocks as well */
|
/* Add the internal journal blocks as well */
|
||||||
if (sbi->s_journal)
|
if (sbi->s_journal && !sbi->journal_bdev)
|
||||||
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
|
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
|
||||||
|
|
||||||
sbi->s_overhead = overhead;
|
sbi->s_overhead = overhead;
|
||||||
@@ -4232,7 +4231,7 @@ failed_mount7:
|
|||||||
failed_mount6:
|
failed_mount6:
|
||||||
ext4_mb_release(sb);
|
ext4_mb_release(sb);
|
||||||
if (sbi->s_flex_groups)
|
if (sbi->s_flex_groups)
|
||||||
ext4_kvfree(sbi->s_flex_groups);
|
kvfree(sbi->s_flex_groups);
|
||||||
percpu_counter_destroy(&sbi->s_freeclusters_counter);
|
percpu_counter_destroy(&sbi->s_freeclusters_counter);
|
||||||
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
||||||
percpu_counter_destroy(&sbi->s_dirs_counter);
|
percpu_counter_destroy(&sbi->s_dirs_counter);
|
||||||
@@ -4261,7 +4260,7 @@ failed_mount3:
|
|||||||
failed_mount2:
|
failed_mount2:
|
||||||
for (i = 0; i < db_count; i++)
|
for (i = 0; i < db_count; i++)
|
||||||
brelse(sbi->s_group_desc[i]);
|
brelse(sbi->s_group_desc[i]);
|
||||||
ext4_kvfree(sbi->s_group_desc);
|
kvfree(sbi->s_group_desc);
|
||||||
failed_mount:
|
failed_mount:
|
||||||
if (sbi->s_chksum_driver)
|
if (sbi->s_chksum_driver)
|
||||||
crypto_free_shash(sbi->s_chksum_driver);
|
crypto_free_shash(sbi->s_chksum_driver);
|
||||||
@@ -4862,6 +4861,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
|||||||
goto restore_opts;
|
goto restore_opts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
|
||||||
|
test_opt(sb, JOURNAL_CHECKSUM)) {
|
||||||
|
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
|
||||||
|
"during remount not supported");
|
||||||
|
err = -EINVAL;
|
||||||
|
goto restore_opts;
|
||||||
|
}
|
||||||
|
|
||||||
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
|
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
|
||||||
if (test_opt2(sb, EXPLICIT_DELALLOC)) {
|
if (test_opt2(sb, EXPLICIT_DELALLOC)) {
|
||||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||||
|
|||||||
@@ -1714,8 +1714,7 @@ int jbd2_journal_destroy(journal_t *journal)
|
|||||||
|
|
||||||
if (journal->j_proc_entry)
|
if (journal->j_proc_entry)
|
||||||
jbd2_stats_proc_exit(journal);
|
jbd2_stats_proc_exit(journal);
|
||||||
if (journal->j_inode)
|
iput(journal->j_inode);
|
||||||
iput(journal->j_inode);
|
|
||||||
if (journal->j_revoke)
|
if (journal->j_revoke)
|
||||||
jbd2_journal_destroy_revoke(journal);
|
jbd2_journal_destroy_revoke(journal);
|
||||||
if (journal->j_chksum_driver)
|
if (journal->j_chksum_driver)
|
||||||
|
|||||||
@@ -43,15 +43,13 @@ struct extent_status;
|
|||||||
{ EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \
|
{ EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \
|
||||||
{ EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \
|
{ EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \
|
||||||
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
|
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
|
||||||
{ EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" }, \
|
{ EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" })
|
||||||
{ EXT4_GET_BLOCKS_NO_PUT_HOLE, "NO_PUT_HOLE" })
|
|
||||||
|
|
||||||
#define show_mflags(flags) __print_flags(flags, "", \
|
#define show_mflags(flags) __print_flags(flags, "", \
|
||||||
{ EXT4_MAP_NEW, "N" }, \
|
{ EXT4_MAP_NEW, "N" }, \
|
||||||
{ EXT4_MAP_MAPPED, "M" }, \
|
{ EXT4_MAP_MAPPED, "M" }, \
|
||||||
{ EXT4_MAP_UNWRITTEN, "U" }, \
|
{ EXT4_MAP_UNWRITTEN, "U" }, \
|
||||||
{ EXT4_MAP_BOUNDARY, "B" }, \
|
{ EXT4_MAP_BOUNDARY, "B" })
|
||||||
{ EXT4_MAP_FROM_CLUSTER, "C" })
|
|
||||||
|
|
||||||
#define show_free_flags(flags) __print_flags(flags, "|", \
|
#define show_free_flags(flags) __print_flags(flags, "|", \
|
||||||
{ EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \
|
{ EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \
|
||||||
@@ -2452,15 +2450,14 @@ TRACE_EVENT(ext4_collapse_range,
|
|||||||
|
|
||||||
TRACE_EVENT(ext4_es_shrink,
|
TRACE_EVENT(ext4_es_shrink,
|
||||||
TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
|
TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
|
||||||
int skip_precached, int nr_skipped, int retried),
|
int nr_skipped, int retried),
|
||||||
|
|
||||||
TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried),
|
TP_ARGS(sb, nr_shrunk, scan_time, nr_skipped, retried),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__field( dev_t, dev )
|
__field( dev_t, dev )
|
||||||
__field( int, nr_shrunk )
|
__field( int, nr_shrunk )
|
||||||
__field( unsigned long long, scan_time )
|
__field( unsigned long long, scan_time )
|
||||||
__field( int, skip_precached )
|
|
||||||
__field( int, nr_skipped )
|
__field( int, nr_skipped )
|
||||||
__field( int, retried )
|
__field( int, retried )
|
||||||
),
|
),
|
||||||
@@ -2469,16 +2466,14 @@ TRACE_EVENT(ext4_es_shrink,
|
|||||||
__entry->dev = sb->s_dev;
|
__entry->dev = sb->s_dev;
|
||||||
__entry->nr_shrunk = nr_shrunk;
|
__entry->nr_shrunk = nr_shrunk;
|
||||||
__entry->scan_time = div_u64(scan_time, 1000);
|
__entry->scan_time = div_u64(scan_time, 1000);
|
||||||
__entry->skip_precached = skip_precached;
|
|
||||||
__entry->nr_skipped = nr_skipped;
|
__entry->nr_skipped = nr_skipped;
|
||||||
__entry->retried = retried;
|
__entry->retried = retried;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d "
|
TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu "
|
||||||
"nr_skipped %d retried %d",
|
"nr_skipped %d retried %d",
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk,
|
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk,
|
||||||
__entry->scan_time, __entry->skip_precached,
|
__entry->scan_time, __entry->nr_skipped, __entry->retried)
|
||||||
__entry->nr_skipped, __entry->retried)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
#endif /* _TRACE_EXT4_H */
|
#endif /* _TRACE_EXT4_H */
|
||||||
|
|||||||
Reference in New Issue
Block a user