Add samsung specific changes
This commit is contained in:
@@ -208,6 +208,29 @@ config BLK_INLINE_ENCRYPTION_FALLBACK
|
||||
by falling back to the kernel crypto API when inline
|
||||
encryption hardware is not present.
|
||||
|
||||
config BLK_SEC_COMMON
|
||||
tristate "Samsung specific module in block layer"
|
||||
default n
|
||||
help
|
||||
Say Y here if you want to be enable samsung specific module
|
||||
in block layer.
|
||||
|
||||
config BLK_SEC_STATS
|
||||
tristate "Samsung statistics module in block layer"
|
||||
default n
|
||||
select BLK_SEC_COMMON
|
||||
help
|
||||
Say Y here if you want to be enable samsung statistics module
|
||||
in block layer.
|
||||
|
||||
config BLK_SEC_WB
|
||||
tristate "Samsung Write Booster module in block layer"
|
||||
default n
|
||||
select BLK_SEC_COMMON
|
||||
help
|
||||
Say Y here if you want to be enable samsung write booster module
|
||||
in block layer.
|
||||
|
||||
source "block/partitions/Kconfig"
|
||||
|
||||
config BLK_MQ_PCI
|
||||
@@ -221,11 +244,6 @@ config BLK_MQ_VIRTIO
|
||||
config BLK_PM
|
||||
def_bool PM
|
||||
|
||||
config BLOCK_SUPPORT_STLOG
|
||||
bool "Enable storage log"
|
||||
depends on BLOCK && PROC_STLOG
|
||||
default y
|
||||
|
||||
# do not use in new code
|
||||
config BLOCK_HOLDER_DEPRECATED
|
||||
bool
|
||||
|
@@ -44,4 +44,25 @@ config BFQ_CGROUP_DEBUG
|
||||
Enable some debugging help. Currently it exports additional stat
|
||||
files in a cgroup which can be useful for debugging.
|
||||
|
||||
config MQ_IOSCHED_SSG
|
||||
tristate "SamSung Generic I/O scheduler"
|
||||
default n
|
||||
help
|
||||
SamSung Generic IO scheduler.
|
||||
|
||||
config MQ_IOSCHED_SSG_CGROUP
|
||||
tristate "Control Group for SamSung Generic I/O scheduler"
|
||||
default n
|
||||
depends on BLK_CGROUP
|
||||
depends on MQ_IOSCHED_SSG
|
||||
help
|
||||
Control Group for SamSung Generic IO scheduler.
|
||||
|
||||
config MQ_IOSCHED_SSG_WB
|
||||
tristate "Write Booster for SamSung Generic I/O scheduler"
|
||||
default n
|
||||
depends on MQ_IOSCHED_SSG
|
||||
help
|
||||
Write Booster for SamSung Generic IO scheduler.
|
||||
|
||||
endmenu
|
||||
|
@@ -25,6 +25,10 @@ obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
|
||||
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
|
||||
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
|
||||
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
|
||||
ssg-$(CONFIG_MQ_IOSCHED_SSG) := ssg-iosched.o ssg-stat.o
|
||||
ssg-$(CONFIG_MQ_IOSCHED_SSG_CGROUP) += ssg-cgroup.o
|
||||
ssg-$(CONFIG_MQ_IOSCHED_SSG_WB) += ssg-wb.o
|
||||
obj-$(CONFIG_MQ_IOSCHED_SSG) += ssg.o
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o
|
||||
@@ -40,3 +44,7 @@ obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \
|
||||
blk-crypto-sysfs.o
|
||||
obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o
|
||||
obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o
|
||||
obj-$(CONFIG_BLK_SEC_COMMON) += blk-sec-common.o
|
||||
blk-sec-stats-$(CONFIG_BLK_SEC_STATS) := blk-sec-stat.o blk-sec-stat-pio.o blk-sec-stat-traffic.o
|
||||
obj-$(CONFIG_BLK_SEC_STATS) += blk-sec-stats.o
|
||||
obj-$(CONFIG_BLK_SEC_WB) += blk-sec-wb.o
|
||||
|
@@ -25,10 +25,6 @@
|
||||
#include "blk-rq-qos.h"
|
||||
#include "blk-cgroup.h"
|
||||
|
||||
#ifdef CONFIG_DDAR
|
||||
extern int fscrypt_dd_encrypted(struct bio_vec *bv);
|
||||
#endif
|
||||
|
||||
#define ALLOC_CACHE_THRESHOLD 16
|
||||
#define ALLOC_CACHE_MAX 256
|
||||
|
||||
@@ -934,10 +930,6 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DDAR
|
||||
if ((*same_page == false) && fscrypt_dd_encrypted(bv))
|
||||
return false;
|
||||
#endif
|
||||
bv->bv_len += len;
|
||||
return true;
|
||||
}
|
||||
|
271
block/blk-sec-common.c
Normal file
271
block/blk-sec-common.c
Normal file
@@ -0,0 +1,271 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Samsung specific module in block layer
|
||||
*
|
||||
* Copyright (C) 2021 Manjong Lee <mj0123.lee@samsung.com>
|
||||
* Copyright (C) 2021 Junho Kim <junho89.kim@samsung.com>
|
||||
* Copyright (C) 2021 Changheun Lee <nanich.lee@samsung.com>
|
||||
* Copyright (C) 2021 Seunghwan Hyun <seunghwan.hyun@samsung.com>
|
||||
* Copyright (C) 2021 Tran Xuan Nam <nam.tx2@samsung.com>
|
||||
*/
|
||||
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/part_stat.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRV_SAMSUNG)
|
||||
#include <linux/sec_class.h>
|
||||
#else
|
||||
static struct class *blk_sec_class;
|
||||
#endif
|
||||
|
||||
#include "blk-sec.h"
|
||||
#include "../drivers/ufs/host/ufs-sec-feature.h"
|
||||
|
||||
struct disk_info {
|
||||
/* fields related with target device itself */
|
||||
struct gendisk *gd;
|
||||
struct request_queue *queue;
|
||||
};
|
||||
|
||||
struct device *blk_sec_dev;
|
||||
EXPORT_SYMBOL(blk_sec_dev);
|
||||
|
||||
struct workqueue_struct *blk_sec_common_wq;
|
||||
EXPORT_SYMBOL(blk_sec_common_wq);
|
||||
|
||||
static struct disk_info internal_disk;
|
||||
static unsigned int internal_min_size_mb = 10 * 1024; /* 10GB */
|
||||
|
||||
static char manual_hcgc_status[32] = "off";
|
||||
|
||||
#define SECTORS2MB(x) ((x) / 2 / 1024)
|
||||
|
||||
#define SCSI_DISK0_MAJOR 8
|
||||
#define MMC_BLOCK_MAJOR 179
|
||||
#define MAJOR8_DEV_NUM 16 /* maximum number of minor devices in scsi disk0 */
|
||||
#define SCSI_MINORS 16 /* first minor number of scsi disk0 */
|
||||
#define MMC_TARGET_DEV 16 /* number of mmc devices set of target (maximum 256) */
|
||||
#define MMC_MINORS 8 /* first minor number of mmc disk */
|
||||
|
||||
static bool is_internal_bdev(struct block_device *dev)
|
||||
{
|
||||
int size_mb;
|
||||
|
||||
if (bdev_is_partition(dev))
|
||||
return false;
|
||||
|
||||
if (dev->bd_disk->flags & GENHD_FL_REMOVABLE)
|
||||
return false;
|
||||
|
||||
size_mb = SECTORS2MB(get_capacity(dev->bd_disk));
|
||||
if (size_mb >= internal_min_size_mb)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct gendisk *find_internal_disk(void)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
struct gendisk *gd = NULL;
|
||||
int idx;
|
||||
dev_t devno = MKDEV(0, 0);
|
||||
|
||||
for (idx = 0; idx < MAJOR8_DEV_NUM; idx++) {
|
||||
devno = MKDEV(SCSI_DISK0_MAJOR, SCSI_MINORS * idx);
|
||||
bdev = blkdev_get_by_dev(devno, BLK_OPEN_READ, NULL, NULL);
|
||||
if (IS_ERR(bdev))
|
||||
continue;
|
||||
|
||||
if (bdev->bd_disk && is_internal_bdev(bdev))
|
||||
gd = bdev->bd_disk;
|
||||
|
||||
blkdev_put(bdev, NULL);
|
||||
|
||||
if (gd)
|
||||
return gd;
|
||||
}
|
||||
|
||||
for (idx = 0; idx < MMC_TARGET_DEV; idx++) {
|
||||
devno = MKDEV(MMC_BLOCK_MAJOR, MMC_MINORS * idx);
|
||||
bdev = blkdev_get_by_dev(devno, BLK_OPEN_READ, NULL, NULL);
|
||||
if (IS_ERR(bdev))
|
||||
continue;
|
||||
|
||||
if (bdev->bd_disk && is_internal_bdev(bdev))
|
||||
gd = bdev->bd_disk;
|
||||
|
||||
blkdev_put(bdev, NULL);
|
||||
|
||||
if (gd)
|
||||
return gd;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int init_internal_disk_info(void)
|
||||
{
|
||||
if (!internal_disk.gd) {
|
||||
internal_disk.gd = find_internal_disk();
|
||||
if (unlikely(!internal_disk.gd)) {
|
||||
pr_err("%s: can't find internal disk\n", __func__);
|
||||
return -ENODEV;
|
||||
}
|
||||
internal_disk.queue = internal_disk.gd->queue;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void clear_internal_disk_info(void)
|
||||
{
|
||||
internal_disk.gd = NULL;
|
||||
internal_disk.queue = NULL;
|
||||
}
|
||||
|
||||
struct gendisk *blk_sec_internal_disk(void)
|
||||
{
|
||||
if (unlikely(!internal_disk.gd))
|
||||
init_internal_disk_info();
|
||||
|
||||
return internal_disk.gd;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sec_internal_disk);
|
||||
|
||||
static int blk_sec_uevent(const struct device *dev, struct kobj_uevent_env *env)
|
||||
{
|
||||
return add_uevent_var(env, "DEVNAME=%s", dev->kobj.name);
|
||||
}
|
||||
|
||||
static struct device_type blk_sec_type = {
|
||||
.uevent = blk_sec_uevent,
|
||||
};
|
||||
|
||||
static ssize_t manual_hcgc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return scnprintf(buf, PAGE_SIZE, "%s\n", manual_hcgc_status);
|
||||
}
|
||||
|
||||
static ssize_t manual_hcgc_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
#define BUF_SIZE 32
|
||||
char hcgc_str[BUF_SIZE];
|
||||
char *envp[] = { "NAME=HCGC_BKL_SEC", hcgc_str, NULL, };
|
||||
|
||||
if (!ufs_sec_is_hcgc_allowed())
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (strncmp(buf, "on", 2) && strncmp(buf, "off", 3) &&
|
||||
strncmp(buf, "done", 4) && strncmp(buf, "disable", 7) && strncmp(buf, "enable", 6))
|
||||
return -EINVAL;
|
||||
|
||||
if (!strncmp(manual_hcgc_status, "disable", 7) && strncmp(buf, "enable", 6))
|
||||
return -EINVAL;
|
||||
|
||||
memset(manual_hcgc_status, 0, BUF_SIZE);
|
||||
|
||||
if (!strncmp(buf, "done", 4)) {
|
||||
strncpy(manual_hcgc_status, buf, BUF_SIZE - 1);
|
||||
return count;
|
||||
}
|
||||
|
||||
snprintf(hcgc_str, BUF_SIZE, "MANUAL_HCGC=%s", buf);
|
||||
kobject_uevent_env(&blk_sec_dev->kobj, KOBJ_CHANGE, envp);
|
||||
|
||||
if (!strncmp(buf, "enable", 6))
|
||||
strncpy(manual_hcgc_status, "off", BUF_SIZE - 1);
|
||||
else
|
||||
strncpy(manual_hcgc_status, buf, BUF_SIZE - 1);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static struct kobj_attribute manual_hcgc_attr = __ATTR(manual_hcgc, 0600, manual_hcgc_show, manual_hcgc_store);
|
||||
|
||||
static const struct attribute *blk_sec_attrs[] = {
|
||||
&manual_hcgc_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct kobject *blk_sec_kobj;
|
||||
|
||||
static int __init blk_sec_common_init(void)
|
||||
{
|
||||
int retval;
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRV_SAMSUNG)
|
||||
blk_sec_dev = sec_device_create(NULL, "blk_sec");
|
||||
if (IS_ERR(blk_sec_dev)) {
|
||||
pr_err("%s: Failed to create blk_sec device\n", __func__);
|
||||
return PTR_ERR(blk_sec_dev);
|
||||
}
|
||||
#else
|
||||
blk_sec_class = class_create("blk_sec");
|
||||
if (IS_ERR(blk_sec_class)) {
|
||||
pr_err("%s: couldn't create blk_sec class\n", __func__);
|
||||
return PTR_ERR(blk_sec_class);
|
||||
}
|
||||
|
||||
blk_sec_dev = device_create(blk_sec_class, NULL, MKDEV(0, 0), NULL, "blk_sec");
|
||||
if (IS_ERR(blk_sec_dev)) {
|
||||
pr_err("%s: Failed to create blk_sec device\n", __func__);
|
||||
class_destroy(blk_sec_class);
|
||||
return PTR_ERR(blk_sec_dev);
|
||||
}
|
||||
#endif
|
||||
|
||||
blk_sec_dev->type = &blk_sec_type;
|
||||
|
||||
blk_sec_kobj = kobject_create_and_add("blk_sec", kernel_kobj);
|
||||
if (!blk_sec_kobj)
|
||||
goto destroy_device;
|
||||
if (sysfs_create_files(blk_sec_kobj, blk_sec_attrs)) {
|
||||
kobject_put(blk_sec_kobj);
|
||||
goto destroy_device;
|
||||
}
|
||||
|
||||
blk_sec_common_wq = create_freezable_workqueue("blk_sec_common");
|
||||
|
||||
retval = init_internal_disk_info();
|
||||
if (retval) {
|
||||
clear_internal_disk_info();
|
||||
pr_err("%s: Can't find internal disk info!", __func__);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
destroy_device:
|
||||
#if IS_ENABLED(CONFIG_DRV_SAMSUNG)
|
||||
sec_device_destroy(blk_sec_dev->devt);
|
||||
#else
|
||||
device_destroy(blk_sec_class, MKDEV(0, 0));
|
||||
class_destroy(blk_sec_class);
|
||||
#endif
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void __exit blk_sec_common_exit(void)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_DRV_SAMSUNG)
|
||||
sec_device_destroy(blk_sec_dev->devt);
|
||||
#else
|
||||
device_destroy(blk_sec_class, MKDEV(0, 0));
|
||||
class_destroy(blk_sec_class);
|
||||
#endif
|
||||
sysfs_remove_files(blk_sec_kobj, blk_sec_attrs);
|
||||
kobject_put(blk_sec_kobj);
|
||||
|
||||
clear_internal_disk_info();
|
||||
}
|
||||
|
||||
module_init(blk_sec_common_init);
|
||||
module_exit(blk_sec_common_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Changheun Lee <nanich.lee@samsung.com>");
|
||||
MODULE_DESCRIPTION("Samsung specific module in block layer");
|
||||
MODULE_VERSION("1.0");
|
354
block/blk-sec-stat-pio.c
Normal file
354
block/blk-sec-stat-pio.c
Normal file
@@ -0,0 +1,354 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Samsung Block Statistics
|
||||
*
|
||||
* Copyright (C) 2021 Manjong Lee <mj0123.lee@samsung.com>
|
||||
* Copyright (C) 2021 Junho Kim <junho89.kim@samsung.com>
|
||||
* Copyright (C) 2021 Changheun Lee <nanich.lee@samsung.com>
|
||||
* Copyright (C) 2021 Seunghwan Hyun <seunghwan.hyun@samsung.com>
|
||||
* Copyright (C) 2021 Tran Xuan Nam <nam.tx2@samsung.com>
|
||||
*/
|
||||
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include "blk-sec.h"
|
||||
|
||||
#define MAX_PIO_NODE_NUM 10000
|
||||
#define SORT_PIO_NODE_NUM 100
|
||||
#define PIO_HASH_SIZE 100
|
||||
#define MAX_PIO_DURATION_MS 10000
|
||||
|
||||
#define GET_HASH_KEY(tgid) ((unsigned int)(tgid) % PIO_HASH_SIZE)
|
||||
#define RESET_PIO_IO(pio) \
|
||||
do { \
|
||||
atomic_set(&(pio)->kb[REQ_OP_READ], 0); \
|
||||
atomic_set(&(pio)->kb[REQ_OP_WRITE], 0); \
|
||||
atomic_set(&(pio)->kb[REQ_OP_FLUSH], 0); \
|
||||
atomic_set(&(pio)->kb[REQ_OP_DISCARD], 0); \
|
||||
} while (0)
|
||||
#define GET_PIO_PRIO(pio) \
|
||||
(atomic_read(&(pio)->kb[REQ_OP_READ]) + \
|
||||
atomic_read(&(pio)->kb[REQ_OP_WRITE]) * 2)
|
||||
|
||||
LIST_HEAD(pio_list);
|
||||
LIST_HEAD(inflight_pios);
|
||||
static DEFINE_SPINLOCK(pio_list_lock);
|
||||
static DEFINE_SPINLOCK(inflight_pios_lock);
|
||||
static int pio_cnt;
|
||||
static int pio_enabled;
|
||||
static unsigned int pio_duration_ms = 5000;
|
||||
static unsigned long pio_timeout;
|
||||
static struct kmem_cache *pio_cache;
|
||||
static struct pio_node *pio_hash[PIO_HASH_SIZE];
|
||||
static struct pio_node others;
|
||||
|
||||
static struct pio_node *add_pio_node(struct request *rq,
|
||||
struct task_struct *gleader)
|
||||
{
|
||||
struct pio_node *pio = NULL;
|
||||
unsigned int hash_key = 0;
|
||||
|
||||
if (pio_cnt >= MAX_PIO_NODE_NUM) {
|
||||
add_others:
|
||||
return &others;
|
||||
}
|
||||
|
||||
pio = kmem_cache_alloc(pio_cache, GFP_NOWAIT);
|
||||
if (!pio)
|
||||
goto add_others;
|
||||
|
||||
INIT_LIST_HEAD(&pio->list);
|
||||
|
||||
pio->tgid = task_tgid_nr(gleader);
|
||||
strncpy(pio->name, gleader->comm, TASK_COMM_LEN - 1);
|
||||
pio->name[TASK_COMM_LEN - 1] = '\0';
|
||||
pio->start_time = gleader->start_time;
|
||||
|
||||
RESET_PIO_IO(pio);
|
||||
atomic_set(&pio->ref_count, 1);
|
||||
|
||||
hash_key = GET_HASH_KEY(pio->tgid);
|
||||
|
||||
spin_lock(&pio_list_lock);
|
||||
list_add(&pio->list, &pio_list);
|
||||
pio->h_next = pio_hash[hash_key];
|
||||
pio_hash[hash_key] = pio;
|
||||
pio_cnt++;
|
||||
spin_unlock(&pio_list_lock);
|
||||
|
||||
return pio;
|
||||
}
|
||||
|
||||
static struct pio_node *find_pio_node(pid_t tgid, u64 tg_start_time)
|
||||
{
|
||||
struct pio_node *pio;
|
||||
|
||||
for (pio = pio_hash[GET_HASH_KEY(tgid)]; pio; pio = pio->h_next) {
|
||||
if (pio->tgid != tgid)
|
||||
continue;
|
||||
if (pio->start_time != tg_start_time)
|
||||
continue;
|
||||
return pio;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void free_pio_nodes(struct list_head *remove_list)
|
||||
{
|
||||
struct pio_node *pio;
|
||||
struct pio_node *pion;
|
||||
|
||||
/* move previous inflight pios to remove_list */
|
||||
spin_lock(&inflight_pios_lock);
|
||||
list_splice_init(&inflight_pios, remove_list);
|
||||
spin_unlock(&inflight_pios_lock);
|
||||
|
||||
list_for_each_entry_safe(pio, pion, remove_list, list) {
|
||||
list_del(&pio->list);
|
||||
if (atomic_read(&pio->ref_count)) {
|
||||
spin_lock(&inflight_pios_lock);
|
||||
list_add(&pio->list, &inflight_pios);
|
||||
spin_unlock(&inflight_pios_lock);
|
||||
continue;
|
||||
}
|
||||
kmem_cache_free(pio_cache, pio);
|
||||
}
|
||||
}
|
||||
|
||||
struct pio_node *get_pio_node(struct request *rq)
|
||||
{
|
||||
struct task_struct *gleader = current->group_leader;
|
||||
struct pio_node *pio;
|
||||
|
||||
if (pio_enabled == 0)
|
||||
return NULL;
|
||||
if (time_after(jiffies, pio_timeout))
|
||||
return NULL;
|
||||
if (req_op(rq) > REQ_OP_DISCARD)
|
||||
return NULL;
|
||||
|
||||
spin_lock(&pio_list_lock);
|
||||
pio = find_pio_node(task_tgid_nr(gleader), gleader->start_time);
|
||||
if (pio) {
|
||||
atomic_inc(&pio->ref_count);
|
||||
spin_unlock(&pio_list_lock);
|
||||
return pio;
|
||||
}
|
||||
spin_unlock(&pio_list_lock);
|
||||
|
||||
return add_pio_node(rq, gleader);
|
||||
}
|
||||
|
||||
void update_pio_node(struct request *rq,
|
||||
unsigned int data_size, struct pio_node *pio)
|
||||
{
|
||||
if (!pio)
|
||||
return;
|
||||
|
||||
/* transfer bytes to kbytes via '>> 10' */
|
||||
atomic_add((req_op(rq) == REQ_OP_FLUSH) ? 1 : data_size >> 10,
|
||||
&pio->kb[req_op(rq)]);
|
||||
}
|
||||
|
||||
void put_pio_node(struct pio_node *pio)
|
||||
{
|
||||
if (!pio)
|
||||
return;
|
||||
|
||||
atomic_dec(&pio->ref_count);
|
||||
}
|
||||
|
||||
static void sort_pios(struct list_head *pios)
|
||||
{
|
||||
struct pio_node *max_pio = NULL;
|
||||
struct pio_node *pio;
|
||||
unsigned long long max = 0;
|
||||
LIST_HEAD(sorted_list);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SORT_PIO_NODE_NUM; i++) {
|
||||
list_for_each_entry(pio, pios, list) {
|
||||
if (GET_PIO_PRIO(pio) > max) {
|
||||
max = GET_PIO_PRIO(pio);
|
||||
max_pio = pio;
|
||||
}
|
||||
}
|
||||
if (max_pio != NULL)
|
||||
list_move_tail(&max_pio->list, &sorted_list);
|
||||
|
||||
max = 0;
|
||||
max_pio = NULL;
|
||||
}
|
||||
list_splice_init(&sorted_list, pios);
|
||||
}
|
||||
|
||||
static ssize_t pio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
LIST_HEAD(curr_pios);
|
||||
int curr_pio_cnt;
|
||||
struct pio_node curr_others;
|
||||
struct pio_node *pio;
|
||||
int len = 0;
|
||||
|
||||
spin_lock(&pio_list_lock);
|
||||
list_replace_init(&pio_list, &curr_pios);
|
||||
curr_pio_cnt = pio_cnt;
|
||||
curr_others = others;
|
||||
memset(pio_hash, 0x0, sizeof(pio_hash));
|
||||
pio_cnt = 0;
|
||||
RESET_PIO_IO(&others);
|
||||
spin_unlock(&pio_list_lock);
|
||||
|
||||
if (curr_pio_cnt > SORT_PIO_NODE_NUM)
|
||||
sort_pios(&curr_pios);
|
||||
|
||||
list_for_each_entry(pio, &curr_pios, list) {
|
||||
if (PAGE_SIZE - len > 80) {
|
||||
len += scnprintf(buf + len, PAGE_SIZE - len,
|
||||
"%d %d %d %s\n",
|
||||
pio->tgid,
|
||||
atomic_read(&pio->kb[REQ_OP_READ]),
|
||||
atomic_read(&pio->kb[REQ_OP_WRITE]),
|
||||
(pio->name[0]) ? pio->name : "-");
|
||||
continue;
|
||||
}
|
||||
|
||||
atomic_add(atomic_read(&pio->kb[REQ_OP_READ]),
|
||||
&curr_others.kb[REQ_OP_READ]);
|
||||
atomic_add(atomic_read(&pio->kb[REQ_OP_WRITE]),
|
||||
&curr_others.kb[REQ_OP_WRITE]);
|
||||
atomic_add(atomic_read(&pio->kb[REQ_OP_FLUSH]),
|
||||
&curr_others.kb[REQ_OP_FLUSH]);
|
||||
atomic_add(atomic_read(&pio->kb[REQ_OP_DISCARD]),
|
||||
&curr_others.kb[REQ_OP_DISCARD]);
|
||||
}
|
||||
|
||||
if (atomic_read(&curr_others.kb[REQ_OP_READ]) +
|
||||
atomic_read(&curr_others.kb[REQ_OP_WRITE]))
|
||||
len += scnprintf(buf + len, PAGE_SIZE - len,
|
||||
"%d %d %d %s\n",
|
||||
curr_others.tgid,
|
||||
atomic_read(&curr_others.kb[REQ_OP_READ]),
|
||||
atomic_read(&curr_others.kb[REQ_OP_WRITE]),
|
||||
curr_others.name);
|
||||
|
||||
free_pio_nodes(&curr_pios);
|
||||
pio_timeout = jiffies + msecs_to_jiffies(pio_duration_ms);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t pio_enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
LIST_HEAD(curr_pios);
|
||||
int enable = 0;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoint(buf, 10, &enable);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pio_enabled = (enable >= 1) ? 1 : 0;
|
||||
|
||||
spin_lock(&pio_list_lock);
|
||||
list_replace_init(&pio_list, &curr_pios);
|
||||
memset(pio_hash, 0x0, sizeof(pio_hash));
|
||||
pio_cnt = 0;
|
||||
RESET_PIO_IO(&others);
|
||||
spin_unlock(&pio_list_lock);
|
||||
|
||||
free_pio_nodes(&curr_pios);
|
||||
pio_timeout = jiffies + msecs_to_jiffies(pio_duration_ms);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t pio_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
int len = 0;
|
||||
|
||||
len = scnprintf(buf, PAGE_SIZE, "%d\n", pio_enabled);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t pio_duration_ms_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = kstrtoint(buf, 10, &pio_duration_ms);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (pio_duration_ms > MAX_PIO_DURATION_MS)
|
||||
pio_duration_ms = MAX_PIO_DURATION_MS;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t pio_duration_ms_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
int len = 0;
|
||||
|
||||
len = scnprintf(buf, PAGE_SIZE, "%u\n", pio_duration_ms);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct kobj_attribute pios_attr = __ATTR(pios, 0444, pio_show, NULL);
|
||||
static struct kobj_attribute pios_enable_attr = __ATTR(pios_enable, 0644,
|
||||
pio_enabled_show, pio_enabled_store);
|
||||
static struct kobj_attribute pios_duration_ms_attr = __ATTR(pios_duration_ms, 0644,
|
||||
pio_duration_ms_show, pio_duration_ms_store);
|
||||
|
||||
static const struct attribute *blk_sec_stat_pio_attrs[] = {
|
||||
&pios_attr.attr,
|
||||
&pios_enable_attr.attr,
|
||||
&pios_duration_ms_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
int blk_sec_stat_pio_init(struct kobject *kobj)
|
||||
{
|
||||
int retval;
|
||||
|
||||
if (!kobj)
|
||||
return -EINVAL;
|
||||
|
||||
pio_cache = kmem_cache_create("pio_node", sizeof(struct pio_node), 0, 0, NULL);
|
||||
if (!pio_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
retval = sysfs_create_files(kobj, blk_sec_stat_pio_attrs);
|
||||
if (retval) {
|
||||
kmem_cache_destroy(pio_cache);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* init others */
|
||||
INIT_LIST_HEAD(&others.list);
|
||||
others.tgid = INT_MAX;
|
||||
strncpy(others.name, "others", TASK_COMM_LEN - 1);
|
||||
others.name[TASK_COMM_LEN - 1] = '\0';
|
||||
others.start_time = 0;
|
||||
RESET_PIO_IO(&others);
|
||||
atomic_set(&others.ref_count, 1);
|
||||
others.h_next = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blk_sec_stat_pio_exit(struct kobject *kobj)
|
||||
{
|
||||
if (!kobj)
|
||||
return;
|
||||
|
||||
sysfs_remove_files(kobj, blk_sec_stat_pio_attrs);
|
||||
kmem_cache_destroy(pio_cache);
|
||||
}
|
328
block/blk-sec-stat-traffic.c
Normal file
328
block/blk-sec-stat-traffic.c
Normal file
@@ -0,0 +1,328 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Samsung Block Statistics
|
||||
*
|
||||
* Copyright (C) 2021 Manjong Lee <mj0123.lee@samsung.com>
|
||||
* Copyright (C) 2021 Junho Kim <junho89.kim@samsung.com>
|
||||
* Copyright (C) 2021 Changheun Lee <nanich.lee@samsung.com>
|
||||
* Copyright (C) 2021 Seunghwan Hyun <seunghwan.hyun@samsung.com>
|
||||
* Copyright (C) 2021 Tran Xuan Nam <nam.tx2@samsung.com>
|
||||
*/
|
||||
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/pm_qos.h>
|
||||
|
||||
#include "blk-sec.h"
|
||||
|
||||
struct traffic {
|
||||
u64 transferred_bytes;
|
||||
int level;
|
||||
int io_cpus;
|
||||
unsigned int timestamp;
|
||||
|
||||
struct work_struct update_work;
|
||||
struct delayed_work notify_work;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(u64, transferred_bytes);
|
||||
static DEFINE_PER_CPU(struct freq_qos_request, cpufreq_req);
|
||||
static struct pm_qos_request cpu_pm_req;
|
||||
static unsigned int interval_ms = 1000;
|
||||
static unsigned int interval_bytes = 100 * 1024 * 1024;
|
||||
static struct traffic traffic;
|
||||
static DEFINE_PER_CPU(u32, cpu_count);
|
||||
static DEFINE_PER_CPU(u32, prev_cpu_count);
|
||||
|
||||
#define TL0_UEVENT_DELAY_MS 2000
|
||||
|
||||
#define UPDATE_WORK_TO_TRAFFIC(work) \
|
||||
container_of(work, struct traffic, update_work)
|
||||
#define NOTIFY_WORK_TO_TRAFFIC(work) \
|
||||
container_of(to_delayed_work(work), struct traffic, notify_work)
|
||||
|
||||
static u64 get_transferred_bytes(void)
|
||||
{
|
||||
u64 bytes = 0;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
bytes += per_cpu(transferred_bytes, cpu);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
static int get_io_cpus(void)
|
||||
{
|
||||
int i, cpus = 0;
|
||||
u32 count, prev_count;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
count = per_cpu(cpu_count, i);
|
||||
prev_count = per_cpu(prev_cpu_count, i);
|
||||
|
||||
cpus = cpus | ((!!(count - prev_count)) << i);
|
||||
*per_cpu_ptr(&prev_cpu_count, i) = count;
|
||||
}
|
||||
|
||||
return cpus;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert throughput to level. Level is defined as below:
|
||||
* 0: 0 - "< 100" MB/s
|
||||
* 1: 100 - "< 200" MB/s
|
||||
* 2: 200 - "< 400" MB/s
|
||||
* 3: 400 - "< 800" MB/s
|
||||
* ...so on
|
||||
*/
|
||||
static int tp2level(int tput)
|
||||
{
|
||||
if (tput < 100)
|
||||
return 0;
|
||||
return (int) ilog2(tput / 100) + 1;
|
||||
}
|
||||
|
||||
static void notify_traffic_level(struct traffic *traffic)
|
||||
{
|
||||
#define BUF_SIZE 16
|
||||
char level_str[BUF_SIZE];
|
||||
char io_cpus_str[BUF_SIZE];
|
||||
char *envp[] = { "NAME=IO_TRAFFIC", level_str, io_cpus_str, NULL, };
|
||||
int ret;
|
||||
|
||||
if (IS_ERR_OR_NULL(blk_sec_dev))
|
||||
return;
|
||||
|
||||
memset(level_str, 0, BUF_SIZE);
|
||||
memset(io_cpus_str, 0, BUF_SIZE);
|
||||
snprintf(level_str, BUF_SIZE, "LEVEL=%d", traffic->level);
|
||||
snprintf(io_cpus_str, BUF_SIZE, "IO_CPUS=%x", traffic->io_cpus);
|
||||
|
||||
ret = kobject_uevent_env(&blk_sec_dev->kobj, KOBJ_CHANGE, envp);
|
||||
if (ret)
|
||||
pr_err("%s: couldn't send uevent (%d)", __func__, ret);
|
||||
}
|
||||
|
||||
#define MB(x) ((x) / 1024 / 1024)
|
||||
|
||||
static void update_traffic_level(struct work_struct *work)
|
||||
{
|
||||
struct traffic *traffic = UPDATE_WORK_TO_TRAFFIC(work);
|
||||
struct traffic old = *traffic;
|
||||
unsigned int duration_ms;
|
||||
u64 amount;
|
||||
int tput;
|
||||
int delay = 0;
|
||||
|
||||
traffic->transferred_bytes = get_transferred_bytes();
|
||||
traffic->timestamp = jiffies_to_msecs(jiffies);
|
||||
traffic->io_cpus |= get_io_cpus();
|
||||
|
||||
duration_ms = traffic->timestamp - old.timestamp;
|
||||
if (unlikely(!duration_ms))
|
||||
duration_ms = jiffies_to_msecs(1);
|
||||
amount = traffic->transferred_bytes - old.transferred_bytes;
|
||||
tput = MB(amount) * 1000 / duration_ms;
|
||||
traffic->level = tp2level(tput);
|
||||
|
||||
if (traffic->level == 0) {
|
||||
traffic->io_cpus = 0;
|
||||
delay = msecs_to_jiffies(TL0_UEVENT_DELAY_MS);
|
||||
}
|
||||
|
||||
if (!!traffic->level == !!old.level &&
|
||||
(traffic->level == 0 || traffic->io_cpus == old.io_cpus))
|
||||
return;
|
||||
|
||||
cancel_delayed_work_sync(&traffic->notify_work);
|
||||
queue_delayed_work(system_highpri_wq, &traffic->notify_work, delay);
|
||||
}
|
||||
|
||||
static void send_uevent(struct work_struct *work)
|
||||
{
|
||||
struct traffic *traffic = NOTIFY_WORK_TO_TRAFFIC(work);
|
||||
|
||||
notify_traffic_level(traffic);
|
||||
}
|
||||
|
||||
void blk_sec_stat_traffic_prepare(struct request *rq)
|
||||
{
|
||||
this_cpu_inc(cpu_count);
|
||||
}
|
||||
|
||||
void blk_sec_stat_traffic_update(struct request *rq, unsigned int data_size)
|
||||
{
|
||||
unsigned int duration_ms;
|
||||
u64 amount;
|
||||
|
||||
if (req_op(rq) > REQ_OP_WRITE)
|
||||
return;
|
||||
|
||||
this_cpu_add(transferred_bytes, data_size);
|
||||
this_cpu_inc(cpu_count);
|
||||
|
||||
duration_ms = jiffies_to_msecs(jiffies) - traffic.timestamp;
|
||||
amount = get_transferred_bytes() - traffic.transferred_bytes;
|
||||
|
||||
if ((duration_ms < interval_ms) && (amount < interval_bytes))
|
||||
return;
|
||||
|
||||
queue_work(system_highpri_wq, &traffic.update_work);
|
||||
}
|
||||
|
||||
static void init_traffic(struct traffic *traffic)
|
||||
{
|
||||
traffic->transferred_bytes = 0;
|
||||
traffic->level = 0;
|
||||
traffic->io_cpus = 0;
|
||||
traffic->timestamp = jiffies_to_msecs(jiffies);
|
||||
INIT_WORK(&traffic->update_work, update_traffic_level);
|
||||
INIT_DELAYED_WORK(&traffic->notify_work, send_uevent);
|
||||
}
|
||||
|
||||
static void allow_cpu_lpm(bool enable)
|
||||
{
|
||||
if (enable)
|
||||
cpu_latency_qos_update_request(&cpu_pm_req, PM_QOS_DEFAULT_VALUE);
|
||||
else
|
||||
cpu_latency_qos_update_request(&cpu_pm_req, 0);
|
||||
}
|
||||
|
||||
static ssize_t transferred_bytes_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", get_transferred_bytes());
|
||||
}
|
||||
|
||||
static ssize_t cpufreq_min_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct freq_qos_request *req;
|
||||
int len = 0;
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
req = &per_cpu(cpufreq_req, i);
|
||||
if (IS_ERR_OR_NULL(req->qos))
|
||||
continue;
|
||||
len += scnprintf(buf + len, PAGE_SIZE - len, "%d: %d, %d, %d\n",
|
||||
i,
|
||||
req->qos->min_freq.target_value,
|
||||
req->qos->min_freq.default_value,
|
||||
req->qos->min_freq.no_constraint_value);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t cpufreq_min_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
struct freq_qos_request *req;
|
||||
struct cpufreq_policy *policy;
|
||||
s32 cpufreq_min;
|
||||
int io_cpus;
|
||||
int i;
|
||||
char *sptr = (char *)buf;
|
||||
char *token;
|
||||
|
||||
token = strsep(&sptr, ":");
|
||||
if (!token || !sptr)
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtoint(token, 16, &io_cpus))
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtoint(sptr, 10, &cpufreq_min))
|
||||
return -EINVAL;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
if (!test_bit(i, (unsigned long *)&io_cpus))
|
||||
continue;
|
||||
|
||||
req = &per_cpu(cpufreq_req, i);
|
||||
if (IS_ERR_OR_NULL(req->qos)) {
|
||||
policy = cpufreq_cpu_get(i);
|
||||
if (!policy)
|
||||
continue;
|
||||
|
||||
freq_qos_add_request(&policy->constraints,
|
||||
req, FREQ_QOS_MIN, cpufreq_min);
|
||||
cpufreq_cpu_put(policy);
|
||||
}
|
||||
freq_qos_update_request(req, cpufreq_min);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t cpu_lpm_enabled_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
if (IS_ERR_OR_NULL(cpu_pm_req.qos))
|
||||
return 0;
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%d: %d, %d, %d\n",
|
||||
!!cpu_pm_req.qos->target_value,
|
||||
cpu_pm_req.qos->target_value,
|
||||
cpu_pm_req.qos->default_value,
|
||||
cpu_pm_req.qos->no_constraint_value);
|
||||
}
|
||||
|
||||
static ssize_t cpu_lpm_enabled_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
int enable;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoint(buf, 10, &enable);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
allow_cpu_lpm(!!enable);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static struct kobj_attribute transferred_bytes_attr =
|
||||
__ATTR(transferred_bytes, 0444, transferred_bytes_show, NULL);
|
||||
static struct kobj_attribute cpufreq_min_attr =
|
||||
__ATTR(cpufreq_min, 0600, cpufreq_min_show, cpufreq_min_store);
|
||||
static struct kobj_attribute cpu_lpm_enable_attr =
|
||||
__ATTR(cpu_lpm_enable, 0600, cpu_lpm_enabled_show, cpu_lpm_enabled_store);
|
||||
|
||||
static const struct attribute *blk_sec_stat_traffic_attrs[] = {
|
||||
&transferred_bytes_attr.attr,
|
||||
&cpufreq_min_attr.attr,
|
||||
&cpu_lpm_enable_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
int blk_sec_stat_traffic_init(struct kobject *kobj)
|
||||
{
|
||||
if (!kobj)
|
||||
return -EINVAL;
|
||||
|
||||
init_traffic(&traffic);
|
||||
|
||||
cpu_latency_qos_add_request(&cpu_pm_req, PM_QOS_DEFAULT_VALUE);
|
||||
|
||||
return sysfs_create_files(kobj, blk_sec_stat_traffic_attrs);
|
||||
}
|
||||
|
||||
void blk_sec_stat_traffic_exit(struct kobject *kobj)
|
||||
{
|
||||
if (!kobj)
|
||||
return;
|
||||
|
||||
allow_cpu_lpm(true);
|
||||
cpu_latency_qos_remove_request(&cpu_pm_req);
|
||||
cancel_delayed_work_sync(&traffic.notify_work);
|
||||
|
||||
sysfs_remove_files(kobj, blk_sec_stat_traffic_attrs);
|
||||
}
|
203
block/blk-sec-stat.c
Normal file
203
block/blk-sec-stat.c
Normal file
@@ -0,0 +1,203 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Samsung Block Statistics
|
||||
*
|
||||
* Copyright (C) 2021 Manjong Lee <mj0123.lee@samsung.com>
|
||||
* Copyright (C) 2021 Junho Kim <junho89.kim@samsung.com>
|
||||
* Copyright (C) 2021 Changheun Lee <nanich.lee@samsung.com>
|
||||
* Copyright (C) 2021 Seunghwan Hyun <seunghwan.hyun@samsung.com>
|
||||
* Copyright (C) 2021 Tran Xuan Nam <nam.tx2@samsung.com>
|
||||
*/
|
||||
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/part_stat.h>
|
||||
|
||||
#include "blk-sec.h"
|
||||
|
||||
struct accumulated_stat {
|
||||
struct timespec64 uptime;
|
||||
unsigned long sectors[3]; /* READ, WRITE, DISCARD */
|
||||
unsigned long ios[3];
|
||||
unsigned long iot;
|
||||
};
|
||||
static struct accumulated_stat old, new;
|
||||
|
||||
extern int blk_sec_stat_pio_init(struct kobject *kobj);
|
||||
extern void blk_sec_stat_pio_exit(struct kobject *kobj);
|
||||
extern struct pio_node *get_pio_node(struct request *rq);
|
||||
extern void update_pio_node(struct request *rq,
|
||||
unsigned int data_size, struct pio_node *pio);
|
||||
extern void put_pio_node(struct pio_node *pio);
|
||||
|
||||
extern int blk_sec_stat_traffic_init(struct kobject *kobj);
|
||||
extern void blk_sec_stat_traffic_exit(struct kobject *kobj);
|
||||
extern void blk_sec_stat_traffic_prepare(struct request *rq);
|
||||
extern void blk_sec_stat_traffic_update(struct request *rq,
|
||||
unsigned int data_size);
|
||||
|
||||
void blk_sec_stat_account_init(struct request_queue *q)
|
||||
{
|
||||
if (!blk_sec_internal_disk())
|
||||
pr_err("%s: Can't find internal disk info!", __func__);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sec_stat_account_init);
|
||||
|
||||
void blk_sec_stat_account_exit(struct elevator_queue *eq)
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sec_stat_account_exit);
|
||||
|
||||
#define UNSIGNED_DIFF(n, o) (((n) >= (o)) ? ((n) - (o)) : ((n) + (0 - (o))))
|
||||
#define SECTORS2KB(x) ((x) / 2)
|
||||
|
||||
static inline void get_monotonic_boottime(struct timespec64 *ts)
|
||||
{
|
||||
*ts = ktime_to_timespec64(ktime_get_boottime());
|
||||
}
|
||||
|
||||
static ssize_t diskios_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct gendisk *gd = blk_sec_internal_disk();
|
||||
struct block_device *bdev;
|
||||
long hours;
|
||||
int ret;
|
||||
|
||||
if (unlikely(!gd))
|
||||
return -EINVAL;
|
||||
|
||||
bdev = gd->part0;
|
||||
|
||||
new.ios[STAT_READ] = part_stat_read(bdev, ios[STAT_READ]);
|
||||
new.ios[STAT_WRITE] = part_stat_read(bdev, ios[STAT_WRITE]);
|
||||
new.ios[STAT_DISCARD] = part_stat_read(bdev, ios[STAT_DISCARD]);
|
||||
new.sectors[STAT_READ] = part_stat_read(bdev, sectors[STAT_READ]);
|
||||
new.sectors[STAT_WRITE] = part_stat_read(bdev, sectors[STAT_WRITE]);
|
||||
new.sectors[STAT_DISCARD] = part_stat_read(bdev, sectors[STAT_DISCARD]);
|
||||
new.iot = jiffies_to_msecs(part_stat_read(bdev, io_ticks)) / 1000;
|
||||
|
||||
get_monotonic_boottime(&(new.uptime));
|
||||
hours = (new.uptime.tv_sec - old.uptime.tv_sec) / 60;
|
||||
hours = (hours + 30) / 60;
|
||||
|
||||
ret = sprintf(buf, "\"ReadC\":\"%lu\",\"ReadKB\":\"%lu\","
|
||||
"\"WriteC\":\"%lu\",\"WriteKB\":\"%lu\","
|
||||
"\"DiscardC\":\"%lu\",\"DiscardKB\":\"%lu\","
|
||||
"\"IOT\":\"%lu\","
|
||||
"\"Hours\":\"%ld\"\n",
|
||||
UNSIGNED_DIFF(new.ios[STAT_READ], old.ios[STAT_READ]),
|
||||
SECTORS2KB(UNSIGNED_DIFF(new.sectors[STAT_READ], old.sectors[STAT_READ])),
|
||||
UNSIGNED_DIFF(new.ios[STAT_WRITE], old.ios[STAT_WRITE]),
|
||||
SECTORS2KB(UNSIGNED_DIFF(new.sectors[STAT_WRITE], old.sectors[STAT_WRITE])),
|
||||
UNSIGNED_DIFF(new.ios[STAT_DISCARD], old.ios[STAT_DISCARD]),
|
||||
SECTORS2KB(UNSIGNED_DIFF(new.sectors[STAT_DISCARD], old.sectors[STAT_DISCARD])),
|
||||
UNSIGNED_DIFF(new.iot, old.iot),
|
||||
hours);
|
||||
|
||||
old.ios[STAT_READ] = new.ios[STAT_READ];
|
||||
old.ios[STAT_WRITE] = new.ios[STAT_WRITE];
|
||||
old.ios[STAT_DISCARD] = new.ios[STAT_DISCARD];
|
||||
old.sectors[STAT_READ] = new.sectors[STAT_READ];
|
||||
old.sectors[STAT_WRITE] = new.sectors[STAT_WRITE];
|
||||
old.sectors[STAT_DISCARD] = new.sectors[STAT_DISCARD];
|
||||
old.uptime = new.uptime;
|
||||
old.iot = new.iot;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool may_account_rq(struct request *rq)
|
||||
{
|
||||
struct gendisk *gd = blk_sec_internal_disk();
|
||||
|
||||
if (unlikely(!gd))
|
||||
return false;
|
||||
|
||||
if (gd->queue != rq->q)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void blk_sec_stat_account_io_prepare(struct request *rq, void *ptr_pio)
|
||||
{
|
||||
if (unlikely(!may_account_rq(rq)))
|
||||
return;
|
||||
|
||||
blk_sec_stat_traffic_prepare(rq);
|
||||
*(struct pio_node **)ptr_pio = get_pio_node(rq);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sec_stat_account_io_prepare);
|
||||
|
||||
void blk_sec_stat_account_io_complete(struct request *rq,
|
||||
unsigned int data_size, void *pio)
|
||||
{
|
||||
if (unlikely(!may_account_rq(rq)))
|
||||
return;
|
||||
|
||||
blk_sec_stat_traffic_update(rq, data_size);
|
||||
update_pio_node(rq, data_size, (struct pio_node *)pio);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sec_stat_account_io_complete);
|
||||
|
||||
void blk_sec_stat_account_io_finish(struct request *rq, void *ptr_pio)
|
||||
{
|
||||
if (unlikely(!may_account_rq(rq)))
|
||||
return;
|
||||
|
||||
put_pio_node(*(struct pio_node **)ptr_pio);
|
||||
*(struct pio_node **)ptr_pio = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sec_stat_account_io_finish);
|
||||
|
||||
static struct kobj_attribute diskios_attr = __ATTR(diskios, 0444, diskios_show, NULL);
|
||||
|
||||
static const struct attribute *blk_sec_stat_attrs[] = {
|
||||
&diskios_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct kobject *blk_sec_stats_kobj;
|
||||
|
||||
static int __init blk_sec_stats_init(void)
|
||||
{
|
||||
int retval;
|
||||
|
||||
blk_sec_stats_kobj = kobject_create_and_add("blk_sec_stats", kernel_kobj);
|
||||
if (!blk_sec_stats_kobj)
|
||||
return -ENOMEM;
|
||||
|
||||
retval = sysfs_create_files(blk_sec_stats_kobj, blk_sec_stat_attrs);
|
||||
if (retval) {
|
||||
kobject_put(blk_sec_stats_kobj);
|
||||
return retval;
|
||||
}
|
||||
|
||||
retval = blk_sec_stat_pio_init(blk_sec_stats_kobj);
|
||||
if (retval)
|
||||
pr_err("%s: fail to initialize PIO sub module", __func__);
|
||||
|
||||
retval = blk_sec_stat_traffic_init(blk_sec_stats_kobj);
|
||||
if (retval)
|
||||
pr_err("%s: fail to initialize TRAFFIC sub module", __func__);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit blk_sec_stats_exit(void)
|
||||
{
|
||||
blk_sec_stat_traffic_exit(blk_sec_stats_kobj);
|
||||
blk_sec_stat_pio_exit(blk_sec_stats_kobj);
|
||||
sysfs_remove_files(blk_sec_stats_kobj, blk_sec_stat_attrs);
|
||||
kobject_put(blk_sec_stats_kobj);
|
||||
}
|
||||
|
||||
module_init(blk_sec_stats_init);
|
||||
module_exit(blk_sec_stats_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Manjong Lee <mj0123.lee@samsung.com>");
|
||||
MODULE_DESCRIPTION("Samsung block layer statistics module for various purposes");
|
||||
MODULE_VERSION("1.0");
|
241
block/blk-sec-wb.c
Normal file
241
block/blk-sec-wb.c
Normal file
@@ -0,0 +1,241 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Samsung Block Write Booster
|
||||
*
|
||||
* Copyright (C) 2023 Jisoo Oh <jisoo2146.oh@samsung.com>
|
||||
* Copyright (C) 2023 Changheun Lee <nanich.lee@samsung.com>
|
||||
*/
|
||||
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/part_stat.h>
|
||||
#include <linux/timer.h>
|
||||
|
||||
#include "blk-sec.h"
|
||||
#include "../drivers/ufs/host/ufs-sec-feature.h"
|
||||
|
||||
#define MIN_ENABLE_MS 100
|
||||
#define MAX_ENABLE_MS 5000
|
||||
|
||||
struct blk_sec_wb {
|
||||
struct mutex lock;
|
||||
|
||||
volatile unsigned long request;
|
||||
unsigned int state;
|
||||
|
||||
struct work_struct ctrl_work;
|
||||
struct timer_list user_wb_off_timer;
|
||||
};
|
||||
|
||||
static struct blk_sec_wb wb;
|
||||
|
||||
static void notify_wb_change(bool enabled)
|
||||
{
|
||||
#define BUF_SIZE 16
|
||||
char buf[BUF_SIZE];
|
||||
char *envp[] = { "NAME=BLK_SEC_WB", buf, NULL, };
|
||||
int ret;
|
||||
|
||||
if (IS_ERR(blk_sec_dev))
|
||||
return;
|
||||
|
||||
memset(buf, 0, BUF_SIZE);
|
||||
snprintf(buf, BUF_SIZE, "ENABLED=%d", enabled);
|
||||
ret = kobject_uevent_env(&blk_sec_dev->kobj, KOBJ_CHANGE, envp);
|
||||
if (ret)
|
||||
pr_err("%s: couldn't send uevent (%d)", __func__, ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* don't call this function in interrupt context,
|
||||
* it will be sleep when ufs_sec_wb_ctrl() is called
|
||||
*
|
||||
* Context: can sleep
|
||||
*/
|
||||
static int wb_ctrl(bool enable)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
might_sleep();
|
||||
|
||||
mutex_lock(&wb.lock);
|
||||
|
||||
if (enable && (wb.state == WB_ON))
|
||||
goto out;
|
||||
|
||||
if (!enable && (wb.state == WB_OFF))
|
||||
goto out;
|
||||
|
||||
ret = ufs_sec_wb_ctrl(enable);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (enable)
|
||||
wb.state = WB_ON;
|
||||
else
|
||||
wb.state = WB_OFF;
|
||||
|
||||
notify_wb_change(enable);
|
||||
|
||||
out:
|
||||
mutex_unlock(&wb.lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void wb_ctrl_work(struct work_struct *work)
|
||||
{
|
||||
wb_ctrl(!!wb.request);
|
||||
}
|
||||
|
||||
static void user_wb_off_handler(struct timer_list *timer)
|
||||
{
|
||||
clear_bit(WB_REQ_USER, &wb.request);
|
||||
queue_work(blk_sec_common_wq, &wb.ctrl_work);
|
||||
}
|
||||
|
||||
static void ufs_reset_notify(void)
|
||||
{
|
||||
queue_work(blk_sec_common_wq, &wb.ctrl_work);
|
||||
}
|
||||
|
||||
int blk_sec_wb_ctrl(bool enable, int req_type)
|
||||
{
|
||||
if (req_type < 0 || req_type >= NR_WB_REQ_TYPE)
|
||||
return -EINVAL;
|
||||
|
||||
if (enable)
|
||||
set_bit(req_type, &wb.request);
|
||||
else
|
||||
clear_bit(req_type, &wb.request);
|
||||
|
||||
return wb_ctrl(!!wb.request);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sec_wb_ctrl);
|
||||
|
||||
int blk_sec_wb_ctrl_async(bool enable, int req_type)
|
||||
{
|
||||
if (req_type < 0 || req_type >= NR_WB_REQ_TYPE)
|
||||
return -EINVAL;
|
||||
|
||||
if (enable)
|
||||
set_bit(req_type, &wb.request);
|
||||
else
|
||||
clear_bit(req_type, &wb.request);
|
||||
|
||||
queue_work(blk_sec_common_wq, &wb.ctrl_work);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sec_wb_ctrl_async);
|
||||
|
||||
bool blk_sec_wb_is_supported(struct gendisk *gd)
|
||||
{
|
||||
if (blk_sec_internal_disk() != gd)
|
||||
return false;
|
||||
|
||||
return ufs_sec_is_wb_supported();
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sec_wb_is_supported);
|
||||
|
||||
static ssize_t request_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return scnprintf(buf, PAGE_SIZE, "%lx\n", wb.request);
|
||||
}
|
||||
|
||||
static ssize_t state_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n", wb.state);
|
||||
}
|
||||
|
||||
static ssize_t enable_ms_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
unsigned long expire_jiffies = wb.user_wb_off_timer.expires;
|
||||
unsigned long current_jiffies = jiffies;
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
||||
time_after(expire_jiffies, current_jiffies) ?
|
||||
jiffies_to_msecs(expire_jiffies - current_jiffies) : 0);
|
||||
}
|
||||
|
||||
static ssize_t enable_ms_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
int wb_on_duration = 0;
|
||||
unsigned long expire_jiffies = 0;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoint(buf, 10, &wb_on_duration);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (wb_on_duration <= 0)
|
||||
return count;
|
||||
|
||||
if (wb_on_duration < MIN_ENABLE_MS)
|
||||
wb_on_duration = MIN_ENABLE_MS;
|
||||
if (wb_on_duration > MAX_ENABLE_MS)
|
||||
wb_on_duration = MAX_ENABLE_MS;
|
||||
|
||||
expire_jiffies = jiffies + msecs_to_jiffies(wb_on_duration);
|
||||
if (time_after(expire_jiffies, wb.user_wb_off_timer.expires))
|
||||
mod_timer(&wb.user_wb_off_timer, expire_jiffies);
|
||||
|
||||
blk_sec_wb_ctrl(true, WB_REQ_USER);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static struct kobj_attribute request_attr = __ATTR_RO(request);
|
||||
static struct kobj_attribute state_attr = __ATTR_RO(state);
|
||||
static struct kobj_attribute enable_ms_attr =
|
||||
__ATTR(enable_ms, 0644, enable_ms_show, enable_ms_store);
|
||||
|
||||
static const struct attribute *blk_sec_wb_attrs[] = {
|
||||
&request_attr.attr,
|
||||
&state_attr.attr,
|
||||
&enable_ms_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct kobject *blk_sec_wb_kobj;
|
||||
|
||||
static int __init blk_sec_wb_init(void)
|
||||
{
|
||||
int retval;
|
||||
|
||||
blk_sec_wb_kobj = kobject_create_and_add("blk_sec_wb", kernel_kobj);
|
||||
if (!blk_sec_wb_kobj)
|
||||
return -ENOMEM;
|
||||
|
||||
retval = sysfs_create_files(blk_sec_wb_kobj, blk_sec_wb_attrs);
|
||||
if (retval) {
|
||||
kobject_put(blk_sec_wb_kobj);
|
||||
return retval;
|
||||
}
|
||||
|
||||
mutex_init(&wb.lock);
|
||||
wb.state = WB_OFF;
|
||||
INIT_WORK(&wb.ctrl_work, wb_ctrl_work);
|
||||
timer_setup(&wb.user_wb_off_timer, user_wb_off_handler, 0);
|
||||
ufs_sec_wb_register_reset_notify(&ufs_reset_notify);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit blk_sec_wb_exit(void)
|
||||
{
|
||||
del_timer_sync(&wb.user_wb_off_timer);
|
||||
sysfs_remove_files(blk_sec_wb_kobj, blk_sec_wb_attrs);
|
||||
kobject_put(blk_sec_wb_kobj);
|
||||
}
|
||||
|
||||
module_init(blk_sec_wb_init);
|
||||
module_exit(blk_sec_wb_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Jisoo Oh <jisoo2146.oh@samsung.com>");
|
||||
MODULE_DESCRIPTION("Samsung write booster module in block layer");
|
||||
MODULE_VERSION("1.0");
|
92
block/blk-sec.h
Normal file
92
block/blk-sec.h
Normal file
@@ -0,0 +1,92 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef BLK_SEC_H
|
||||
#define BLK_SEC_H
|
||||
|
||||
enum {
|
||||
WB_REQ_IOSCHED = 0,
|
||||
WB_REQ_USER,
|
||||
|
||||
NR_WB_REQ_TYPE
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_BLK_SEC_COMMON)
|
||||
extern struct device *blk_sec_dev;
|
||||
extern struct workqueue_struct *blk_sec_common_wq;
|
||||
|
||||
extern struct gendisk *blk_sec_internal_disk(void);
|
||||
#else
|
||||
static struct gendisk *blk_sec_internal_disk(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_BLK_SEC_STATS)
|
||||
struct pio_node {
|
||||
struct list_head list;
|
||||
|
||||
pid_t tgid;
|
||||
char name[TASK_COMM_LEN];
|
||||
u64 start_time;
|
||||
|
||||
atomic_t kb[REQ_OP_DISCARD + 1];
|
||||
|
||||
atomic_t ref_count;
|
||||
struct pio_node *h_next; /* next pio_node for hash */
|
||||
};
|
||||
|
||||
extern void blk_sec_stat_account_init(struct request_queue *q);
|
||||
extern void blk_sec_stat_account_exit(struct elevator_queue *eq);
|
||||
extern void blk_sec_stat_account_io_prepare(struct request *rq,
|
||||
void *ptr_pio);
|
||||
extern void blk_sec_stat_account_io_complete(struct request *rq,
|
||||
unsigned int data_size, void *pio);
|
||||
extern void blk_sec_stat_account_io_finish(struct request *rq,
|
||||
void *ptr_pio);
|
||||
#else
|
||||
static inline void blk_sec_stat_account_init(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_sec_stat_account_exit(struct elevator_queue *eq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_sec_stat_account_io_prepare(struct request *rq,
|
||||
void *ptr_pio)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_sec_stat_account_io_complete(struct request *rq,
|
||||
unsigned int data_size, void *pio)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_sec_stat_account_io_finish(struct request *rq,
|
||||
void *ptr_pio)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_BLK_SEC_WB)
|
||||
extern int blk_sec_wb_ctrl(bool enable, int req_type);
|
||||
extern int blk_sec_wb_ctrl_async(bool enable, int req_type);
|
||||
extern bool blk_sec_wb_is_supported(struct gendisk *gd);
|
||||
#else
|
||||
static inline int blk_sec_wb_ctrl(bool enable, int req_type)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int blk_sec_wb_ctrl_async(bool enable, int req_type)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool blk_sec_wb_is_supported(struct gendisk *gd)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // BLK_SEC_H
|
@@ -28,13 +28,6 @@
|
||||
#include <linux/blktrace_api.h>
|
||||
|
||||
#include "blk-throttle.h"
|
||||
|
||||
#ifdef CONFIG_BLOCK_SUPPORT_STLOG
|
||||
#include <linux/fslog.h>
|
||||
#else
|
||||
#define ST_LOG(fmt, ...)
|
||||
#endif
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-rq-qos.h"
|
||||
@@ -331,14 +324,6 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
|
||||
struct block_device *part;
|
||||
unsigned long idx;
|
||||
|
||||
#ifdef CONFIG_BLOCK_SUPPORT_STLOG
|
||||
int major = disk->major;
|
||||
int first_minor = disk->first_minor;
|
||||
|
||||
if (action == KOBJ_ADD)
|
||||
ST_LOG("<%s> KOBJ_ADD %d:%d", __func__, major, first_minor);
|
||||
#endif
|
||||
|
||||
rcu_read_lock();
|
||||
xa_for_each(&disk->part_tbl, idx, part) {
|
||||
if (bdev_is_partition(part) && !bdev_nr_sectors(part))
|
||||
@@ -348,11 +333,6 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
|
||||
|
||||
rcu_read_unlock();
|
||||
kobject_uevent(bdev_kobj(part), action);
|
||||
if (action == KOBJ_ADD) {
|
||||
ST_LOG("<%s> KOBJ_ADD %d:%d", __func__, major,
|
||||
first_minor + part->bd_partno);
|
||||
}
|
||||
|
||||
put_device(&part->bd_device);
|
||||
rcu_read_lock();
|
||||
}
|
||||
@@ -657,9 +637,6 @@ void del_gendisk(struct gendisk *disk)
|
||||
struct block_device *part;
|
||||
unsigned long idx;
|
||||
|
||||
#ifdef CONFIG_BLOCK_SUPPORT_STLOG
|
||||
struct device *dev;
|
||||
#endif
|
||||
might_sleep();
|
||||
|
||||
if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
|
||||
@@ -711,11 +688,6 @@ void del_gendisk(struct gendisk *disk)
|
||||
disk->part0->bd_stamp = 0;
|
||||
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
|
||||
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
|
||||
#ifdef CONFIG_BLOCK_SUPPORT_STLOG
|
||||
dev = disk_to_dev(disk);
|
||||
ST_LOG("<%s> KOBJ_REMOVE %d:%d %s", __func__,
|
||||
MAJOR(dev->devt), MINOR(dev->devt), dev->kobj.name);
|
||||
#endif
|
||||
device_del(disk_to_dev(disk));
|
||||
|
||||
blk_mq_freeze_queue_wait(q);
|
||||
|
@@ -113,10 +113,6 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
if (start + len > bdev_nr_bytes(bdev))
|
||||
return -EINVAL;
|
||||
|
||||
printk(KERN_INFO "DIS %d:%d %llu %llu",
|
||||
MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev),
|
||||
(unsigned long long)start, (unsigned long long)len);
|
||||
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
|
||||
if (err)
|
||||
|
354
block/ssg-cgroup.c
Normal file
354
block/ssg-cgroup.c
Normal file
@@ -0,0 +1,354 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Control Group of SamSung Generic I/O scheduler
|
||||
*
|
||||
* Copyright (C) 2021 Changheun Lee <nanich.lee@samsung.com>
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include "blk-cgroup.h"
|
||||
#include "blk-mq.h"
|
||||
#include "ssg.h"
|
||||
|
||||
|
||||
|
||||
static struct blkcg_policy ssg_blkcg_policy;
|
||||
|
||||
|
||||
|
||||
#define CPD_TO_SSG_BLKCG(_cpd) \
|
||||
container_of((_cpd), struct ssg_blkcg, cpd)
|
||||
#define BLKCG_TO_SSG_BLKCG(_blkcg) \
|
||||
CPD_TO_SSG_BLKCG(blkcg_to_cpd((_blkcg), &ssg_blkcg_policy))
|
||||
|
||||
#define PD_TO_SSG_BLKG(_pd) \
|
||||
container_of((_pd), struct ssg_blkg, pd)
|
||||
#define BLKG_TO_SSG_BLKG(_blkg) \
|
||||
PD_TO_SSG_BLKG(blkg_to_pd((_blkg), &ssg_blkcg_policy))
|
||||
|
||||
#define CSS_TO_SSG_BLKCG(css) BLKCG_TO_SSG_BLKCG(css_to_blkcg(css))
|
||||
|
||||
|
||||
static int boost_reserved_ratio = 0;
|
||||
|
||||
#define MIN_AVAILABLE_RATIO 50
|
||||
#define MAX_AVAILABLE_RATIO 100
|
||||
|
||||
|
||||
static struct blkcg_policy_data *ssg_blkcg_cpd_alloc(gfp_t gfp)
|
||||
{
|
||||
struct ssg_blkcg *ssg_blkcg;
|
||||
|
||||
ssg_blkcg = kzalloc(sizeof(struct ssg_blkcg), gfp);
|
||||
if (ZERO_OR_NULL_PTR(ssg_blkcg))
|
||||
return NULL;
|
||||
|
||||
ssg_blkcg->max_available_ratio = MAX_AVAILABLE_RATIO;
|
||||
ssg_blkcg->boost_on = 0;
|
||||
|
||||
return &ssg_blkcg->cpd;
|
||||
}
|
||||
|
||||
static void ssg_blkcg_cpd_free(struct blkcg_policy_data *cpd)
|
||||
{
|
||||
struct ssg_blkcg *ssg_blkcg = CPD_TO_SSG_BLKCG(cpd);
|
||||
|
||||
if (IS_ERR_OR_NULL(ssg_blkcg))
|
||||
return;
|
||||
|
||||
kfree(ssg_blkcg);
|
||||
}
|
||||
|
||||
static void ssg_blkcg_set_shallow_depth(struct ssg_blkcg *ssg_blkcg,
|
||||
struct ssg_blkg *ssg_blkg, struct blk_mq_tags *tags)
|
||||
{
|
||||
unsigned int depth = tags->bitmap_tags.sb.depth;
|
||||
unsigned int map_nr = tags->bitmap_tags.sb.map_nr;
|
||||
|
||||
ssg_blkcg->max_available_ratio = ssg_blkcg->boost_on ?
|
||||
MAX_AVAILABLE_RATIO : MAX_AVAILABLE_RATIO - boost_reserved_ratio;
|
||||
ssg_blkg->max_available_rqs =
|
||||
depth * ssg_blkcg->max_available_ratio / 100U;
|
||||
ssg_blkg->shallow_depth =
|
||||
max_t(unsigned int, 1, ssg_blkg->max_available_rqs / map_nr);
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *ssg_blkcg_pd_alloc(struct gendisk *disk,
|
||||
struct blkcg *blkcg, gfp_t gfp)
|
||||
{
|
||||
struct ssg_blkg *ssg_blkg;
|
||||
|
||||
ssg_blkg = kzalloc_node(sizeof(struct ssg_blkg), gfp, disk->node_id);
|
||||
if (ZERO_OR_NULL_PTR(ssg_blkg))
|
||||
return NULL;
|
||||
|
||||
return &ssg_blkg->pd;
|
||||
}
|
||||
|
||||
static void ssg_blkcg_pd_init(struct blkg_policy_data *pd)
|
||||
{
|
||||
struct ssg_blkg *ssg_blkg;
|
||||
struct ssg_blkcg *ssg_blkcg;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned long i;
|
||||
|
||||
ssg_blkg = PD_TO_SSG_BLKG(pd);
|
||||
if (IS_ERR_OR_NULL(ssg_blkg))
|
||||
return;
|
||||
|
||||
ssg_blkcg = BLKCG_TO_SSG_BLKCG(pd->blkg->blkcg);
|
||||
if (IS_ERR_OR_NULL(ssg_blkcg))
|
||||
return;
|
||||
|
||||
atomic_set(&ssg_blkg->current_rqs, 0);
|
||||
queue_for_each_hw_ctx(pd->blkg->q, hctx, i)
|
||||
ssg_blkcg_set_shallow_depth(ssg_blkcg, ssg_blkg,
|
||||
hctx->sched_tags);
|
||||
}
|
||||
|
||||
static void ssg_blkcg_pd_free(struct blkg_policy_data *pd)
|
||||
{
|
||||
struct ssg_blkg *ssg_blkg = PD_TO_SSG_BLKG(pd);
|
||||
|
||||
if (IS_ERR_OR_NULL(ssg_blkg))
|
||||
return;
|
||||
|
||||
kfree(ssg_blkg);
|
||||
}
|
||||
|
||||
unsigned int ssg_blkcg_shallow_depth(struct request_queue *q)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
struct ssg_blkg *ssg_blkg;
|
||||
|
||||
rcu_read_lock();
|
||||
blkg = blkg_lookup(css_to_blkcg(curr_css()), q);
|
||||
ssg_blkg = BLKG_TO_SSG_BLKG(blkg);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (IS_ERR_OR_NULL(ssg_blkg))
|
||||
return 0;
|
||||
|
||||
if (atomic_read(&ssg_blkg->current_rqs) < ssg_blkg->max_available_rqs)
|
||||
return 0;
|
||||
|
||||
return ssg_blkg->shallow_depth;
|
||||
}
|
||||
|
||||
void ssg_blkcg_depth_updated(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
struct blkcg_gq *blkg;
|
||||
struct ssg_blkg *ssg_blkg;
|
||||
struct ssg_blkcg *ssg_blkcg;
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(blkg, pos_css, q->root_blkg) {
|
||||
ssg_blkg = BLKG_TO_SSG_BLKG(blkg);
|
||||
if (IS_ERR_OR_NULL(ssg_blkg))
|
||||
continue;
|
||||
|
||||
ssg_blkcg = BLKCG_TO_SSG_BLKCG(blkg->blkcg);
|
||||
if (IS_ERR_OR_NULL(ssg_blkcg))
|
||||
continue;
|
||||
|
||||
atomic_set(&ssg_blkg->current_rqs, 0);
|
||||
ssg_blkcg_set_shallow_depth(ssg_blkcg, ssg_blkg, hctx->sched_tags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void ssg_blkcg_inc_rq(struct blkcg_gq *blkg)
|
||||
{
|
||||
struct ssg_blkg *ssg_blkg = BLKG_TO_SSG_BLKG(blkg);
|
||||
|
||||
if (IS_ERR_OR_NULL(ssg_blkg))
|
||||
return;
|
||||
|
||||
atomic_inc(&ssg_blkg->current_rqs);
|
||||
}
|
||||
|
||||
void ssg_blkcg_dec_rq(struct blkcg_gq *blkg)
|
||||
{
|
||||
struct ssg_blkg *ssg_blkg = BLKG_TO_SSG_BLKG(blkg);
|
||||
|
||||
if (IS_ERR_OR_NULL(ssg_blkg))
|
||||
return;
|
||||
|
||||
atomic_dec(&ssg_blkg->current_rqs);
|
||||
}
|
||||
|
||||
int ssg_blkcg_check_boost(struct blkcg_gq *blkg)
|
||||
{
|
||||
struct ssg_blkcg *ssg_blkcg;
|
||||
|
||||
if (IS_ERR_OR_NULL(blkg))
|
||||
return 0;
|
||||
|
||||
ssg_blkcg = BLKCG_TO_SSG_BLKCG(blkg->blkcg);
|
||||
if (IS_ERR_OR_NULL(ssg_blkcg))
|
||||
return 0;
|
||||
|
||||
if (ssg_blkcg->boost_on)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __ssg_blkcg_update_shallow_depth(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
struct blkcg_gq *blkg;
|
||||
struct ssg_blkg *ssg_blkg;
|
||||
struct ssg_blkcg *ssg_blkcg;
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(blkg, pos_css, q->root_blkg) {
|
||||
ssg_blkg = BLKG_TO_SSG_BLKG(blkg);
|
||||
if (IS_ERR_OR_NULL(ssg_blkg))
|
||||
continue;
|
||||
|
||||
ssg_blkcg = BLKCG_TO_SSG_BLKCG(blkg->blkcg);
|
||||
if (IS_ERR_OR_NULL(ssg_blkcg))
|
||||
continue;
|
||||
|
||||
ssg_blkcg_set_shallow_depth(ssg_blkcg, ssg_blkg, hctx->sched_tags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void ssg_blkcg_update_shallow_depth(struct blkcg *blkcg)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned long i;
|
||||
|
||||
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node)
|
||||
queue_for_each_hw_ctx(blkg->q, hctx, i)
|
||||
__ssg_blkcg_update_shallow_depth(hctx);
|
||||
}
|
||||
|
||||
static int ssg_blkcg_show_max_available_ratio(struct seq_file *sf, void *v)
|
||||
{
|
||||
struct ssg_blkcg *ssg_blkcg = CSS_TO_SSG_BLKCG(seq_css(sf));
|
||||
|
||||
if (IS_ERR_OR_NULL(ssg_blkcg))
|
||||
return -EINVAL;
|
||||
|
||||
seq_printf(sf, "%d\n", ssg_blkcg->max_available_ratio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ssg_blkcg_show_boost_on(struct seq_file *sf, void *v)
|
||||
{
|
||||
struct ssg_blkcg *ssg_blkcg = CSS_TO_SSG_BLKCG(seq_css(sf));
|
||||
|
||||
if (IS_ERR_OR_NULL(ssg_blkcg))
|
||||
return -EINVAL;
|
||||
|
||||
seq_printf(sf, "%d\n", ssg_blkcg->boost_on);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ssg_blkcg_set_boost_on(struct cgroup_subsys_state *css,
|
||||
struct cftype *cftype, u64 boost_on)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct ssg_blkcg *ssg_blkcg = CSS_TO_SSG_BLKCG(css);
|
||||
|
||||
if (IS_ERR_OR_NULL(ssg_blkcg))
|
||||
return -EINVAL;
|
||||
|
||||
if (boost_on > 1)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
ssg_blkcg->boost_on = boost_on;
|
||||
ssg_blkcg_update_shallow_depth(blkcg);
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ssg_blkcg_show_boost_reserved_ratio(struct seq_file *sf, void *v)
|
||||
{
|
||||
|
||||
seq_printf(sf, "%d\n", boost_reserved_ratio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ssg_blkcg_set_boost_reserved_ratio(struct cgroup_subsys_state *css,
|
||||
struct cftype *cftype, u64 ratio)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
|
||||
if (ratio > MIN_AVAILABLE_RATIO)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
boost_reserved_ratio = ratio;
|
||||
ssg_blkcg_update_shallow_depth(blkcg);
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct cftype ssg_blkg_files[] = {
|
||||
{
|
||||
.name = "ssg.max_available_ratio",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = ssg_blkcg_show_max_available_ratio,
|
||||
},
|
||||
{
|
||||
.name = "ssg.boost_on",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = ssg_blkcg_show_boost_on,
|
||||
.write_u64 = ssg_blkcg_set_boost_on,
|
||||
},
|
||||
{
|
||||
.name = "ssg.boost_reserved_ratio",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = ssg_blkcg_show_boost_reserved_ratio,
|
||||
.write_u64 = ssg_blkcg_set_boost_reserved_ratio,
|
||||
},
|
||||
|
||||
{} /* terminate */
|
||||
};
|
||||
|
||||
static struct blkcg_policy ssg_blkcg_policy = {
|
||||
.legacy_cftypes = ssg_blkg_files,
|
||||
|
||||
.cpd_alloc_fn = ssg_blkcg_cpd_alloc,
|
||||
.cpd_free_fn = ssg_blkcg_cpd_free,
|
||||
|
||||
.pd_alloc_fn = ssg_blkcg_pd_alloc,
|
||||
.pd_init_fn = ssg_blkcg_pd_init,
|
||||
.pd_free_fn = ssg_blkcg_pd_free,
|
||||
};
|
||||
|
||||
int ssg_blkcg_activate(struct request_queue *q)
|
||||
{
|
||||
return blkcg_activate_policy(q->disk, &ssg_blkcg_policy);
|
||||
}
|
||||
|
||||
void ssg_blkcg_deactivate(struct request_queue *q)
|
||||
{
|
||||
blkcg_deactivate_policy(q->disk, &ssg_blkcg_policy);
|
||||
}
|
||||
|
||||
int ssg_blkcg_init(void)
|
||||
{
|
||||
return blkcg_policy_register(&ssg_blkcg_policy);
|
||||
}
|
||||
|
||||
void ssg_blkcg_exit(void)
|
||||
{
|
||||
blkcg_policy_unregister(&ssg_blkcg_policy);
|
||||
}
|
870
block/ssg-iosched.c
Normal file
870
block/ssg-iosched.c
Normal file
@@ -0,0 +1,870 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* SamSung Generic I/O scheduler
|
||||
* for the blk-mq scheduling framework
|
||||
*
|
||||
* Copyright (C) 2021 Jisoo Oh <jisoo2146.oh@samsung.com>
|
||||
* Copyright (C) 2021 Manjong Lee <mj0123.lee@samsung.com>
|
||||
* Copyright (C) 2021 Changheun Lee <nanich.lee@samsung.com>
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/sbitmap.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "elevator.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "ssg.h"
|
||||
#include "blk-sec.h"
|
||||
|
||||
#define MAX_ASYNC_WRITE_RQS 8
|
||||
|
||||
/* ssg data direction definitions */
|
||||
#define BOOST 2 /* read 0, write 1 */
|
||||
|
||||
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
|
||||
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
|
||||
static const int boost_expire = HZ / 2; /* max time before a boost is submitted. */
|
||||
static const int max_write_starvation = 2; /* max times reads can starve a write */
|
||||
static const int max_normal_starvation = 2; /* max times boosts can starve normal reads or writes */
|
||||
static const int congestion_threshold = 90; /* percentage of congestion threshold */
|
||||
static const int max_tgroup_io_ratio = 50; /* maximum service ratio for each thread group */
|
||||
static const int max_async_write_ratio = 25; /* maximum service ratio for async write */
|
||||
|
||||
static inline struct ssg_request_info *ssg_rq_info(struct ssg_data *ssg,
|
||||
struct request *rq)
|
||||
{
|
||||
if (unlikely(!ssg->rq_info))
|
||||
return NULL;
|
||||
|
||||
if (unlikely(!rq))
|
||||
return NULL;
|
||||
|
||||
if (unlikely(rq->internal_tag < 0))
|
||||
return NULL;
|
||||
|
||||
if (unlikely(rq->internal_tag >= rq->q->nr_requests))
|
||||
return NULL;
|
||||
|
||||
return &ssg->rq_info[rq->internal_tag];
|
||||
}
|
||||
|
||||
static inline int ssg_rq_data_dir(struct ssg_data *ssg, struct request *rq)
|
||||
{
|
||||
struct ssg_request_info *rqi = ssg_rq_info(ssg, rq);
|
||||
|
||||
if (likely(rqi) && rqi->blkcg_boost)
|
||||
return BOOST;
|
||||
|
||||
return rq_data_dir(rq);
|
||||
}
|
||||
|
||||
static inline struct rb_root *ssg_rb_root(struct ssg_data *ssg, struct request *rq)
|
||||
{
|
||||
return &ssg->sort_list[ssg_rq_data_dir(ssg, rq)];
|
||||
}
|
||||
|
||||
/*
|
||||
* get the request after `rq' in sector-sorted order
|
||||
*/
|
||||
static inline struct request *ssg_latter_request(struct request *rq)
|
||||
{
|
||||
struct rb_node *node = rb_next(&rq->rb_node);
|
||||
|
||||
if (node)
|
||||
return rb_entry_rq(node);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void ssg_add_rq_rb(struct ssg_data *ssg, struct request *rq)
|
||||
{
|
||||
struct rb_root *root = ssg_rb_root(ssg, rq);
|
||||
|
||||
elv_rb_add(root, rq);
|
||||
}
|
||||
|
||||
static inline void ssg_del_rq_rb(struct ssg_data *ssg, struct request *rq)
|
||||
{
|
||||
const int data_dir = ssg_rq_data_dir(ssg, rq);
|
||||
|
||||
if (ssg->next_rq[data_dir] == rq)
|
||||
ssg->next_rq[data_dir] = ssg_latter_request(rq);
|
||||
|
||||
elv_rb_del(ssg_rb_root(ssg, rq), rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* remove rq from rbtree and fifo.
|
||||
*/
|
||||
static void ssg_remove_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct ssg_data *ssg = q->elevator->elevator_data;
|
||||
|
||||
list_del_init(&rq->queuelist);
|
||||
|
||||
/*
|
||||
* We might not be on the rbtree, if we are doing an insert merge
|
||||
*/
|
||||
if (!RB_EMPTY_NODE(&rq->rb_node))
|
||||
ssg_del_rq_rb(ssg, rq);
|
||||
|
||||
elv_rqhash_del(q, rq);
|
||||
if (q->last_merge == rq)
|
||||
q->last_merge = NULL;
|
||||
}
|
||||
|
||||
static void ssg_request_merged(struct request_queue *q, struct request *req,
|
||||
enum elv_merge type)
|
||||
{
|
||||
struct ssg_data *ssg = q->elevator->elevator_data;
|
||||
|
||||
/*
|
||||
* if the merge was a front merge, we need to reposition request
|
||||
*/
|
||||
if (type == ELEVATOR_FRONT_MERGE) {
|
||||
elv_rb_del(ssg_rb_root(ssg, req), req);
|
||||
ssg_add_rq_rb(ssg, req);
|
||||
}
|
||||
}
|
||||
|
||||
static void ssg_merged_requests(struct request_queue *q, struct request *req,
|
||||
struct request *next)
|
||||
{
|
||||
/*
|
||||
* if next expires before rq, assign its expire time to rq
|
||||
* and move into next position (next will be deleted) in fifo
|
||||
*/
|
||||
if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
|
||||
if (time_before((unsigned long)next->fifo_time,
|
||||
(unsigned long)req->fifo_time)) {
|
||||
list_move(&req->queuelist, &next->queuelist);
|
||||
req->fifo_time = next->fifo_time;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* kill knowledge of next, this one is a goner
|
||||
*/
|
||||
ssg_remove_request(q, next);
|
||||
}
|
||||
|
||||
/*
|
||||
* move an entry to dispatch queue
|
||||
*/
|
||||
static void ssg_move_request(struct ssg_data *ssg, struct request *rq)
|
||||
{
|
||||
const int data_dir = ssg_rq_data_dir(ssg, rq);
|
||||
|
||||
ssg->next_rq[READ] = NULL;
|
||||
ssg->next_rq[WRITE] = NULL;
|
||||
ssg->next_rq[BOOST] = NULL;
|
||||
ssg->next_rq[data_dir] = ssg_latter_request(rq);
|
||||
|
||||
/*
|
||||
* take it off the sort and fifo list
|
||||
*/
|
||||
ssg_remove_request(rq->q, rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* ssg_check_fifo returns 0 if there are no expired requests on the fifo,
|
||||
* 1 otherwise. Requires !list_empty(&ssg->fifo_list[data_dir])
|
||||
*/
|
||||
static inline int ssg_check_fifo(struct ssg_data *ssg, int ddir)
|
||||
{
|
||||
struct request *rq = rq_entry_fifo(ssg->fifo_list[ddir].next);
|
||||
|
||||
/*
|
||||
* rq is expired!
|
||||
*/
|
||||
if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For the specified data direction, return the next request to
|
||||
* dispatch using arrival ordered lists.
|
||||
*/
|
||||
static struct request *ssg_fifo_request(struct ssg_data *ssg, int data_dir)
|
||||
{
|
||||
if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE && data_dir != BOOST))
|
||||
return NULL;
|
||||
|
||||
if (list_empty(&ssg->fifo_list[data_dir]))
|
||||
return NULL;
|
||||
|
||||
return rq_entry_fifo(ssg->fifo_list[data_dir].next);
|
||||
}
|
||||
|
||||
/*
|
||||
* For the specified data direction, return the next request to
|
||||
* dispatch using sector position sorted lists.
|
||||
*/
|
||||
static struct request *ssg_next_request(struct ssg_data *ssg, int data_dir)
|
||||
{
|
||||
if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE && data_dir != BOOST))
|
||||
return NULL;
|
||||
|
||||
return ssg->next_rq[data_dir];
|
||||
}
|
||||
|
||||
/*
|
||||
* ssg_dispatch_requests selects the best request according to
|
||||
* read/write expire, etc
|
||||
*/
|
||||
static struct request *__ssg_dispatch_request(struct ssg_data *ssg)
|
||||
{
|
||||
struct request *rq, *next_rq;
|
||||
bool reads, writes, boosts;
|
||||
int data_dir;
|
||||
|
||||
if (!list_empty(&ssg->dispatch)) {
|
||||
rq = list_first_entry(&ssg->dispatch, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
goto done;
|
||||
}
|
||||
|
||||
reads = !list_empty(&ssg->fifo_list[READ]);
|
||||
writes = !list_empty(&ssg->fifo_list[WRITE]);
|
||||
boosts = !list_empty(&ssg->fifo_list[BOOST]);
|
||||
|
||||
if (boosts) {
|
||||
if ((reads || writes) &&
|
||||
(ssg->starved_normal++ >= ssg->max_normal_starvation))
|
||||
goto dispatch_normal;
|
||||
|
||||
data_dir = BOOST;
|
||||
|
||||
goto dispatch_find_request;
|
||||
}
|
||||
|
||||
dispatch_normal:
|
||||
/*
|
||||
* select the appropriate data direction (read / write)
|
||||
*/
|
||||
if (reads) {
|
||||
BUG_ON(RB_EMPTY_ROOT(&ssg->sort_list[READ]));
|
||||
|
||||
if (writes &&
|
||||
(ssg->starved_writes++ >= ssg->max_write_starvation))
|
||||
goto dispatch_writes;
|
||||
|
||||
ssg->starved_normal = 0;
|
||||
data_dir = READ;
|
||||
|
||||
goto dispatch_find_request;
|
||||
}
|
||||
|
||||
/*
|
||||
* there are either no reads or writes have been starved
|
||||
*/
|
||||
|
||||
if (writes) {
|
||||
dispatch_writes:
|
||||
BUG_ON(RB_EMPTY_ROOT(&ssg->sort_list[WRITE]));
|
||||
|
||||
ssg->starved_normal = 0;
|
||||
ssg->starved_writes = 0;
|
||||
|
||||
data_dir = WRITE;
|
||||
|
||||
goto dispatch_find_request;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
dispatch_find_request:
|
||||
/*
|
||||
* we are not running a batch, find best request for selected data_dir
|
||||
*/
|
||||
next_rq = ssg_next_request(ssg, data_dir);
|
||||
if (ssg_check_fifo(ssg, data_dir) || !next_rq) {
|
||||
/*
|
||||
* A deadline has expired, the last request was in the other
|
||||
* direction, or we have run out of higher-sectored requests.
|
||||
* Start again from the request with the earliest expiry time.
|
||||
*/
|
||||
rq = ssg_fifo_request(ssg, data_dir);
|
||||
} else {
|
||||
/*
|
||||
* The last req was the same dir and we have a next request in
|
||||
* sort order. No expired requests so continue on from here.
|
||||
*/
|
||||
rq = next_rq;
|
||||
}
|
||||
|
||||
if (!rq)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* rq is the selected appropriate request.
|
||||
*/
|
||||
ssg_move_request(ssg, rq);
|
||||
done:
|
||||
rq->rq_flags |= RQF_STARTED;
|
||||
return rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* One confusing aspect here is that we get called for a specific
|
||||
* hardware queue, but we may return a request that is for a
|
||||
* different hardware queue. This is because ssg-iosched has shared
|
||||
* state for all hardware queues, in terms of sorting, FIFOs, etc.
|
||||
*/
|
||||
static struct request *ssg_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct ssg_data *ssg = hctx->queue->elevator->elevator_data;
|
||||
struct request *rq;
|
||||
struct ssg_request_info *rqi;
|
||||
|
||||
spin_lock(&ssg->lock);
|
||||
rq = __ssg_dispatch_request(ssg);
|
||||
spin_unlock(&ssg->lock);
|
||||
|
||||
rqi = ssg_rq_info(ssg, rq);
|
||||
if (likely(rqi)) {
|
||||
rqi->sector = blk_rq_pos(rq);
|
||||
rqi->data_size = blk_rq_bytes(rq);
|
||||
}
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
||||
static void ssg_completed_request(struct request *rq, u64 now)
|
||||
{
|
||||
struct ssg_data *ssg = rq->q->elevator->elevator_data;
|
||||
struct ssg_request_info *rqi;
|
||||
|
||||
rqi = ssg_rq_info(ssg, rq);
|
||||
if (likely(rqi && rqi->sector == blk_rq_pos(rq))) {
|
||||
ssg_stat_account_io_done(ssg, rq, rqi->data_size, now);
|
||||
blk_sec_stat_account_io_complete(rq, rqi->data_size, rqi->pio);
|
||||
}
|
||||
}
|
||||
|
||||
static void ssg_set_shallow_depth(struct ssg_data *ssg, struct blk_mq_tags *tags)
|
||||
{
|
||||
unsigned int depth = tags->bitmap_tags.sb.depth;
|
||||
unsigned int map_nr = tags->bitmap_tags.sb.map_nr;
|
||||
|
||||
ssg->max_async_write_rqs =
|
||||
max_t(int, depth * max_async_write_ratio / 100U, 1);
|
||||
ssg->max_async_write_rqs =
|
||||
min_t(int, ssg->max_async_write_rqs, MAX_ASYNC_WRITE_RQS);
|
||||
ssg->async_write_shallow_depth =
|
||||
max_t(unsigned int, ssg->max_async_write_rqs / map_nr, 1);
|
||||
|
||||
ssg->max_tgroup_rqs =
|
||||
max_t(int, depth * max_tgroup_io_ratio / 100U, 1);
|
||||
ssg->tgroup_shallow_depth =
|
||||
max_t(unsigned int, ssg->max_tgroup_rqs / map_nr, 1);
|
||||
}
|
||||
|
||||
static void ssg_depth_updated(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct ssg_data *ssg = q->elevator->elevator_data;
|
||||
struct blk_mq_tags *tags = hctx->sched_tags;
|
||||
unsigned int depth = tags->bitmap_tags.sb.depth;
|
||||
|
||||
ssg->congestion_threshold_rqs = depth * congestion_threshold / 100U;
|
||||
|
||||
kfree(ssg->rq_info);
|
||||
ssg->rq_info = kmalloc_array(depth, sizeof(struct ssg_request_info),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (ZERO_OR_NULL_PTR(ssg->rq_info))
|
||||
ssg->rq_info = NULL;
|
||||
|
||||
ssg_set_shallow_depth(ssg, tags);
|
||||
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags,
|
||||
ssg->async_write_shallow_depth);
|
||||
|
||||
ssg_blkcg_depth_updated(hctx);
|
||||
ssg_wb_depth_updated(hctx);
|
||||
}
|
||||
|
||||
static inline bool ssg_op_is_async_write(unsigned int op)
|
||||
{
|
||||
return (op & REQ_OP_MASK) == REQ_OP_WRITE && !op_is_sync(op);
|
||||
}
|
||||
|
||||
static unsigned int ssg_async_write_shallow_depth(unsigned int op,
|
||||
struct blk_mq_alloc_data *data)
|
||||
{
|
||||
struct ssg_data *ssg = data->q->elevator->elevator_data;
|
||||
|
||||
if (!ssg_op_is_async_write(op))
|
||||
return 0;
|
||||
|
||||
if (atomic_read(&ssg->async_write_rqs) < ssg->max_async_write_rqs)
|
||||
return 0;
|
||||
|
||||
return ssg->async_write_shallow_depth;
|
||||
}
|
||||
|
||||
static unsigned int ssg_tgroup_shallow_depth(struct blk_mq_alloc_data *data)
|
||||
{
|
||||
struct ssg_data *ssg = data->q->elevator->elevator_data;
|
||||
pid_t tgid = task_tgid_nr(current->group_leader);
|
||||
int nr_requests = data->q->nr_requests;
|
||||
int tgroup_rqs = 0;
|
||||
int i;
|
||||
|
||||
if (unlikely(!ssg->rq_info))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < nr_requests; i++)
|
||||
if (tgid == ssg->rq_info[i].tgid)
|
||||
tgroup_rqs++;
|
||||
|
||||
if (tgroup_rqs < ssg->max_tgroup_rqs)
|
||||
return 0;
|
||||
|
||||
return ssg->tgroup_shallow_depth;
|
||||
}
|
||||
|
||||
static void ssg_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
|
||||
{
|
||||
struct ssg_data *ssg = data->q->elevator->elevator_data;
|
||||
unsigned int shallow_depth = ssg_blkcg_shallow_depth(data->q);
|
||||
|
||||
shallow_depth = min_not_zero(shallow_depth,
|
||||
ssg_async_write_shallow_depth(op, data));
|
||||
|
||||
if (atomic_read(&ssg->allocated_rqs) > ssg->congestion_threshold_rqs)
|
||||
shallow_depth = min_not_zero(shallow_depth,
|
||||
ssg_tgroup_shallow_depth(data));
|
||||
|
||||
data->shallow_depth = shallow_depth;
|
||||
}
|
||||
|
||||
static int ssg_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
||||
{
|
||||
struct ssg_data *ssg = hctx->queue->elevator->elevator_data;
|
||||
struct blk_mq_tags *tags = hctx->sched_tags;
|
||||
|
||||
ssg_set_shallow_depth(ssg, tags);
|
||||
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags,
|
||||
ssg->async_write_shallow_depth);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ssg_exit_queue(struct elevator_queue *e)
|
||||
{
|
||||
struct ssg_data *ssg = e->elevator_data;
|
||||
|
||||
ssg_blkcg_deactivate(ssg->queue);
|
||||
|
||||
BUG_ON(!list_empty(&ssg->fifo_list[READ]));
|
||||
BUG_ON(!list_empty(&ssg->fifo_list[WRITE]));
|
||||
BUG_ON(!list_empty(&ssg->fifo_list[BOOST]));
|
||||
|
||||
ssg_stat_exit(ssg);
|
||||
ssg_wb_exit(ssg);
|
||||
blk_sec_stat_account_exit(e);
|
||||
blk_stat_disable_accounting(ssg->queue);
|
||||
|
||||
kfree(ssg->rq_info);
|
||||
kfree(ssg);
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize elevator private data (ssg_data).
|
||||
*/
|
||||
static int ssg_init_queue(struct request_queue *q, struct elevator_type *e)
|
||||
{
|
||||
struct ssg_data *ssg;
|
||||
struct elevator_queue *eq;
|
||||
|
||||
if (blk_queue_is_zoned(q))
|
||||
return -EINVAL;
|
||||
|
||||
eq = elevator_alloc(q, e);
|
||||
if (!eq)
|
||||
return -ENOMEM;
|
||||
|
||||
ssg = kzalloc_node(sizeof(*ssg), GFP_KERNEL, q->node);
|
||||
if (!ssg) {
|
||||
kobject_put(&eq->kobj);
|
||||
return -ENOMEM;
|
||||
}
|
||||
eq->elevator_data = ssg;
|
||||
|
||||
ssg->queue = q;
|
||||
INIT_LIST_HEAD(&ssg->fifo_list[READ]);
|
||||
INIT_LIST_HEAD(&ssg->fifo_list[WRITE]);
|
||||
INIT_LIST_HEAD(&ssg->fifo_list[BOOST]);
|
||||
ssg->sort_list[READ] = RB_ROOT;
|
||||
ssg->sort_list[WRITE] = RB_ROOT;
|
||||
ssg->sort_list[BOOST] = RB_ROOT;
|
||||
ssg->fifo_expire[READ] = read_expire;
|
||||
ssg->fifo_expire[WRITE] = write_expire;
|
||||
ssg->fifo_expire[BOOST] = boost_expire;
|
||||
ssg->max_normal_starvation = max_normal_starvation;
|
||||
ssg->max_write_starvation = max_write_starvation;
|
||||
ssg->front_merges = 1;
|
||||
|
||||
atomic_set(&ssg->allocated_rqs, 0);
|
||||
atomic_set(&ssg->async_write_rqs, 0);
|
||||
ssg->congestion_threshold_rqs =
|
||||
q->nr_requests * congestion_threshold / 100U;
|
||||
ssg->rq_info = kmalloc_array(q->nr_requests,
|
||||
sizeof(struct ssg_request_info),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (ZERO_OR_NULL_PTR(ssg->rq_info))
|
||||
ssg->rq_info = NULL;
|
||||
|
||||
spin_lock_init(&ssg->lock);
|
||||
INIT_LIST_HEAD(&ssg->dispatch);
|
||||
|
||||
ssg_blkcg_activate(q);
|
||||
|
||||
q->elevator = eq;
|
||||
|
||||
ssg_stat_init(ssg);
|
||||
blk_stat_enable_accounting(q);
|
||||
blk_sec_stat_account_init(q);
|
||||
ssg_wb_init(ssg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ssg_request_merge(struct request_queue *q, struct request **rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct ssg_data *ssg = q->elevator->elevator_data;
|
||||
sector_t sector = bio_end_sector(bio);
|
||||
struct request *__rq;
|
||||
|
||||
if (!ssg->front_merges)
|
||||
return ELEVATOR_NO_MERGE;
|
||||
|
||||
__rq = elv_rb_find(&ssg->sort_list[bio_data_dir(bio)], sector);
|
||||
if (__rq) {
|
||||
BUG_ON(sector != blk_rq_pos(__rq));
|
||||
|
||||
if (elv_bio_merge_ok(__rq, bio)) {
|
||||
*rq = __rq;
|
||||
return ELEVATOR_FRONT_MERGE;
|
||||
}
|
||||
}
|
||||
|
||||
return ELEVATOR_NO_MERGE;
|
||||
}
|
||||
|
||||
static bool ssg_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs)
|
||||
{
|
||||
struct ssg_data *ssg = q->elevator->elevator_data;
|
||||
struct request *free = NULL;
|
||||
bool ret;
|
||||
|
||||
spin_lock(&ssg->lock);
|
||||
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
|
||||
spin_unlock(&ssg->lock);
|
||||
|
||||
if (free)
|
||||
blk_mq_free_request(free);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* add rq to rbtree and fifo
|
||||
*/
|
||||
static void ssg_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct ssg_data *ssg = q->elevator->elevator_data;
|
||||
const int data_dir = ssg_rq_data_dir(ssg, rq);
|
||||
|
||||
LIST_HEAD(free);
|
||||
|
||||
if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
|
||||
blk_mq_free_requests(&free);
|
||||
return;
|
||||
}
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
if ((flags & BLK_MQ_INSERT_AT_HEAD) || blk_rq_is_passthrough(rq)) {
|
||||
if (flags & BLK_MQ_INSERT_AT_HEAD)
|
||||
list_add(&rq->queuelist, &ssg->dispatch);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &ssg->dispatch);
|
||||
} else {
|
||||
ssg_add_rq_rb(ssg, rq);
|
||||
|
||||
if (rq_mergeable(rq)) {
|
||||
elv_rqhash_add(q, rq);
|
||||
if (!q->last_merge)
|
||||
q->last_merge = rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* set expire time and add to fifo list
|
||||
*/
|
||||
rq->fifo_time = jiffies + ssg->fifo_expire[data_dir];
|
||||
list_add_tail(&rq->queuelist, &ssg->fifo_list[data_dir]);
|
||||
}
|
||||
}
|
||||
|
||||
static void ssg_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list, blk_insert_t flags)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct ssg_data *ssg = q->elevator->elevator_data;
|
||||
|
||||
spin_lock(&ssg->lock);
|
||||
while (!list_empty(list)) {
|
||||
struct request *rq;
|
||||
|
||||
rq = list_first_entry(list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
ssg_insert_request(hctx, rq, flags);
|
||||
}
|
||||
spin_unlock(&ssg->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Nothing to do here. This is defined only to ensure that .finish_request
|
||||
* method is called upon request completion.
|
||||
*/
|
||||
static void ssg_prepare_request(struct request *rq)
|
||||
{
|
||||
struct ssg_data *ssg = rq->q->elevator->elevator_data;
|
||||
struct ssg_request_info *rqi;
|
||||
|
||||
atomic_inc(&ssg->allocated_rqs);
|
||||
|
||||
ssg_wb_run_ctrl_work(ssg, rq);
|
||||
|
||||
rqi = ssg_rq_info(ssg, rq);
|
||||
if (likely(rqi)) {
|
||||
rqi->tgid = task_tgid_nr(current->group_leader);
|
||||
|
||||
rcu_read_lock();
|
||||
rqi->blkg = blkg_lookup(css_to_blkcg(curr_css()), rq->q);
|
||||
ssg_blkcg_inc_rq(rqi->blkg);
|
||||
|
||||
if (ssg_blkcg_check_boost(rqi->blkg))
|
||||
rqi->blkcg_boost = 1;
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
blk_sec_stat_account_io_prepare(rq, &rqi->pio);
|
||||
}
|
||||
|
||||
if (ssg_op_is_async_write(rq->cmd_flags))
|
||||
atomic_inc(&ssg->async_write_rqs);
|
||||
}
|
||||
|
||||
static void ssg_finish_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct ssg_data *ssg = q->elevator->elevator_data;
|
||||
struct ssg_request_info *rqi;
|
||||
|
||||
if (unlikely(!(rq->rq_flags & RQF_USE_SCHED)))
|
||||
return;
|
||||
|
||||
atomic_dec(&ssg->allocated_rqs);
|
||||
|
||||
rqi = ssg_rq_info(ssg, rq);
|
||||
if (likely(rqi)) {
|
||||
rqi->tgid = 0;
|
||||
|
||||
ssg_blkcg_dec_rq(rqi->blkg);
|
||||
rqi->blkg = NULL;
|
||||
|
||||
rqi->blkcg_boost = 0;
|
||||
|
||||
blk_sec_stat_account_io_finish(rq, &rqi->pio);
|
||||
}
|
||||
|
||||
if (ssg_op_is_async_write(rq->cmd_flags))
|
||||
atomic_dec(&ssg->async_write_rqs);
|
||||
}
|
||||
|
||||
static bool ssg_has_work(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct ssg_data *ssg = hctx->queue->elevator->elevator_data;
|
||||
|
||||
return !list_empty_careful(&ssg->dispatch) ||
|
||||
!list_empty_careful(&ssg->fifo_list[READ]) ||
|
||||
!list_empty_careful(&ssg->fifo_list[WRITE]) ||
|
||||
!list_empty_careful(&ssg->fifo_list[BOOST]);
|
||||
}
|
||||
|
||||
/*
|
||||
* sysfs parts below
|
||||
*/
|
||||
static ssize_t ssg_var_show(int var, char *page)
|
||||
{
|
||||
return sprintf(page, "%d\n", var);
|
||||
}
|
||||
|
||||
static void ssg_var_store(int *var, const char *page)
|
||||
{
|
||||
long val;
|
||||
|
||||
if (!kstrtol(page, 10, &val))
|
||||
*var = val;
|
||||
}
|
||||
|
||||
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
|
||||
static ssize_t __FUNC(struct elevator_queue *e, char *page) \
|
||||
{ \
|
||||
struct ssg_data *ssg = e->elevator_data; \
|
||||
int __data = __VAR; \
|
||||
if (__CONV) \
|
||||
__data = jiffies_to_msecs(__data); \
|
||||
return ssg_var_show(__data, (page)); \
|
||||
}
|
||||
SHOW_FUNCTION(ssg_read_expire_show, ssg->fifo_expire[READ], 1);
|
||||
SHOW_FUNCTION(ssg_write_expire_show, ssg->fifo_expire[WRITE], 1);
|
||||
SHOW_FUNCTION(ssg_max_write_starvation_show, ssg->max_write_starvation, 0);
|
||||
SHOW_FUNCTION(ssg_front_merges_show, ssg->front_merges, 0);
|
||||
SHOW_FUNCTION(ssg_max_tgroup_rqs_show, ssg->max_tgroup_rqs, 0);
|
||||
SHOW_FUNCTION(ssg_max_async_write_rqs_show, ssg->max_async_write_rqs, 0);
|
||||
SHOW_FUNCTION(ssg_tgroup_shallow_depth_show, ssg->tgroup_shallow_depth, 0);
|
||||
SHOW_FUNCTION(ssg_async_write_shallow_depth_show, ssg->async_write_shallow_depth, 0);
|
||||
#undef SHOW_FUNCTION
|
||||
|
||||
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
|
||||
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
|
||||
{ \
|
||||
struct ssg_data *ssg = e->elevator_data; \
|
||||
int __data; \
|
||||
ssg_var_store(&__data, (page)); \
|
||||
if (__data < (MIN)) \
|
||||
__data = (MIN); \
|
||||
else if (__data > (MAX)) \
|
||||
__data = (MAX); \
|
||||
if (__CONV) \
|
||||
*(__PTR) = msecs_to_jiffies(__data); \
|
||||
else \
|
||||
*(__PTR) = __data; \
|
||||
return count; \
|
||||
}
|
||||
STORE_FUNCTION(ssg_read_expire_store, &ssg->fifo_expire[READ], 0, INT_MAX, 1);
|
||||
STORE_FUNCTION(ssg_write_expire_store, &ssg->fifo_expire[WRITE], 0, INT_MAX, 1);
|
||||
STORE_FUNCTION(ssg_max_write_starvation_store, &ssg->max_write_starvation, INT_MIN, INT_MAX, 0);
|
||||
STORE_FUNCTION(ssg_front_merges_store, &ssg->front_merges, 0, 1, 0);
|
||||
#undef STORE_FUNCTION
|
||||
|
||||
#define SSG_ATTR(name) \
|
||||
__ATTR(name, 0644, ssg_##name##_show, ssg_##name##_store)
|
||||
#define SSG_ATTR_RO(name) \
|
||||
__ATTR(name, 0444, ssg_##name##_show, NULL)
|
||||
#define SSG_STAT_ATTR_RO(name) \
|
||||
__ATTR(name, 0444, ssg_stat_##name##_show, NULL)
|
||||
|
||||
static struct elv_fs_entry ssg_attrs[] = {
|
||||
SSG_ATTR(read_expire),
|
||||
SSG_ATTR(write_expire),
|
||||
SSG_ATTR(max_write_starvation),
|
||||
SSG_ATTR(front_merges),
|
||||
SSG_ATTR_RO(max_tgroup_rqs),
|
||||
SSG_ATTR_RO(max_async_write_rqs),
|
||||
SSG_ATTR_RO(tgroup_shallow_depth),
|
||||
SSG_ATTR_RO(async_write_shallow_depth),
|
||||
|
||||
SSG_STAT_ATTR_RO(read_latency),
|
||||
SSG_STAT_ATTR_RO(write_latency),
|
||||
SSG_STAT_ATTR_RO(flush_latency),
|
||||
SSG_STAT_ATTR_RO(discard_latency),
|
||||
SSG_STAT_ATTR_RO(inflight),
|
||||
SSG_STAT_ATTR_RO(rqs_info),
|
||||
|
||||
#if IS_ENABLED(CONFIG_MQ_IOSCHED_SSG_WB)
|
||||
SSG_ATTR(wb_on_rqs),
|
||||
SSG_ATTR(wb_off_rqs),
|
||||
SSG_ATTR(wb_on_dirty_bytes),
|
||||
SSG_ATTR(wb_off_dirty_bytes),
|
||||
SSG_ATTR(wb_on_sync_write_bytes),
|
||||
SSG_ATTR(wb_off_sync_write_bytes),
|
||||
SSG_ATTR(wb_on_dirty_busy_written_bytes),
|
||||
SSG_ATTR(wb_on_dirty_busy_msecs),
|
||||
SSG_ATTR(wb_off_delay_msecs),
|
||||
SSG_ATTR_RO(wb_triggered),
|
||||
#endif
|
||||
|
||||
__ATTR_NULL
|
||||
};
|
||||
|
||||
static struct elevator_type ssg_iosched = {
|
||||
.ops = {
|
||||
.insert_requests = ssg_insert_requests,
|
||||
.dispatch_request = ssg_dispatch_request,
|
||||
.completed_request = ssg_completed_request,
|
||||
.prepare_request = ssg_prepare_request,
|
||||
.finish_request = ssg_finish_request,
|
||||
.next_request = elv_rb_latter_request,
|
||||
.former_request = elv_rb_former_request,
|
||||
.bio_merge = ssg_bio_merge,
|
||||
.request_merge = ssg_request_merge,
|
||||
.requests_merged = ssg_merged_requests,
|
||||
.request_merged = ssg_request_merged,
|
||||
.has_work = ssg_has_work,
|
||||
.limit_depth = ssg_limit_depth,
|
||||
.depth_updated = ssg_depth_updated,
|
||||
.init_hctx = ssg_init_hctx,
|
||||
.init_sched = ssg_init_queue,
|
||||
.exit_sched = ssg_exit_queue,
|
||||
},
|
||||
|
||||
.elevator_attrs = ssg_attrs,
|
||||
.elevator_name = "ssg",
|
||||
.elevator_alias = "ssg",
|
||||
.elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE,
|
||||
.elevator_owner = THIS_MODULE,
|
||||
};
|
||||
MODULE_ALIAS("ssg");
|
||||
|
||||
static int __init ssg_iosched_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = elv_register(&ssg_iosched);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = ssg_blkcg_init();
|
||||
if (ret) {
|
||||
elv_unregister(&ssg_iosched);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit ssg_iosched_exit(void)
|
||||
{
|
||||
ssg_blkcg_exit();
|
||||
elv_unregister(&ssg_iosched);
|
||||
}
|
||||
|
||||
module_init(ssg_iosched_init);
|
||||
module_exit(ssg_iosched_exit);
|
||||
|
||||
MODULE_AUTHOR("Jisoo Oh");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SSG IO Scheduler");
|
298
block/ssg-stat.c
Normal file
298
block/ssg-stat.c
Normal file
@@ -0,0 +1,298 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Statistics of SamSung Generic I/O scheduler
|
||||
*
|
||||
* Copyright (C) 2021 Changheun Lee <nanich.lee@samsung.com>
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/sbitmap.h>
|
||||
|
||||
#include "elevator.h"
|
||||
#include "blk-mq.h"
|
||||
#include "ssg.h"
|
||||
|
||||
#define IO_TYPES (REQ_OP_DISCARD + 1)
|
||||
|
||||
static unsigned int byte_table[] = {
|
||||
4096, // 4KB
|
||||
32768, // 32KB
|
||||
65536, // 64KB
|
||||
131072, // 128KB
|
||||
524288, // 512KB
|
||||
1048576, // 1MB
|
||||
|
||||
UINT_MAX // should be last in this array
|
||||
};
|
||||
#define BYTE_TABLE_SIZE (sizeof(byte_table)/sizeof(unsigned int))
|
||||
|
||||
static u64 nsec_table[] = {
|
||||
500000, // 0.5ms
|
||||
1000000, // 1ms
|
||||
2000000, // 2ms
|
||||
3000000, // 3ms
|
||||
4000000, // 4ms
|
||||
5000000, // 5ms
|
||||
10000000, // 10ms
|
||||
20000000, // 20ms
|
||||
|
||||
ULLONG_MAX // should be last in this array
|
||||
};
|
||||
#define NSEC_TABLE_SIZE (sizeof(nsec_table)/sizeof(u64))
|
||||
|
||||
struct ssg_stats {
|
||||
u64 io_latency_cnt[IO_TYPES][BYTE_TABLE_SIZE][NSEC_TABLE_SIZE];
|
||||
};
|
||||
|
||||
struct ssg_bt_tags_iter_data {
|
||||
struct blk_mq_tags *tags;
|
||||
void *data;
|
||||
bool reserved;
|
||||
};
|
||||
|
||||
typedef bool (ssg_tag_iter_fn)(struct sbitmap *, unsigned int, void *);
|
||||
|
||||
static unsigned int byte_to_index(unsigned int byte)
|
||||
{
|
||||
unsigned int idx;
|
||||
|
||||
for (idx = 0; idx < BYTE_TABLE_SIZE; idx++)
|
||||
if (byte <= byte_table[idx])
|
||||
return idx;
|
||||
|
||||
return BYTE_TABLE_SIZE - 1;
|
||||
}
|
||||
|
||||
static unsigned int nsec_to_index(u64 nsec)
|
||||
{
|
||||
unsigned int idx;
|
||||
|
||||
for (idx = 0; idx < NSEC_TABLE_SIZE; idx++)
|
||||
if (nsec <= nsec_table[idx])
|
||||
return idx;
|
||||
|
||||
return NSEC_TABLE_SIZE - 1;
|
||||
}
|
||||
|
||||
static void update_io_latency(struct ssg_data *ssg, struct request *rq,
|
||||
unsigned int data_size, u64 now)
|
||||
{
|
||||
struct ssg_stats *stats;
|
||||
int type = req_op(rq);
|
||||
int byte_idx, ns_idx;
|
||||
|
||||
if (type > REQ_OP_DISCARD)
|
||||
return;
|
||||
|
||||
if (rq->io_start_time_ns > now)
|
||||
return;
|
||||
|
||||
byte_idx = byte_to_index(data_size);
|
||||
ns_idx = nsec_to_index(now - rq->io_start_time_ns);
|
||||
|
||||
stats = get_cpu_ptr(ssg->stats);
|
||||
stats->io_latency_cnt[type][byte_idx][ns_idx]++;
|
||||
put_cpu_ptr(stats);
|
||||
}
|
||||
|
||||
void ssg_stat_account_io_done(struct ssg_data *ssg, struct request *rq,
|
||||
unsigned int data_size, u64 now)
|
||||
{
|
||||
if (unlikely(!ssg->stats))
|
||||
return;
|
||||
|
||||
update_io_latency(ssg, rq, data_size, now);
|
||||
}
|
||||
|
||||
static int print_io_latency(struct ssg_stats __percpu *stats, int io_type,
|
||||
char *buf, int buf_size)
|
||||
{
|
||||
u64 sum[BYTE_TABLE_SIZE][NSEC_TABLE_SIZE] = { 0, };
|
||||
int cpu;
|
||||
int len = 0;
|
||||
int byte_idx, ns_idx;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct ssg_stats *s = per_cpu_ptr(stats, cpu);
|
||||
|
||||
for (byte_idx = 0; byte_idx < BYTE_TABLE_SIZE; byte_idx++)
|
||||
for (ns_idx = 0; ns_idx < NSEC_TABLE_SIZE; ns_idx++)
|
||||
sum[byte_idx][ns_idx] +=
|
||||
s->io_latency_cnt[io_type][byte_idx][ns_idx];
|
||||
}
|
||||
|
||||
for (byte_idx = 0; byte_idx < BYTE_TABLE_SIZE; byte_idx++) {
|
||||
len += snprintf(buf + len, buf_size - len, "%u:",
|
||||
byte_table[byte_idx] / 1024);
|
||||
for (ns_idx = 0; ns_idx < NSEC_TABLE_SIZE; ns_idx++)
|
||||
len += snprintf(buf + len, buf_size - len, " %llu",
|
||||
sum[byte_idx][ns_idx]);
|
||||
len += snprintf(buf + len, buf_size - len, "\n");
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
#define IO_LATENCY_SHOW_FUNC(__FUNC, __IO_TYPE) \
|
||||
ssize_t __FUNC(struct elevator_queue *e, char *page) \
|
||||
{ \
|
||||
struct ssg_data *ssg = e->elevator_data; \
|
||||
if (unlikely(!ssg->stats)) \
|
||||
return 0; \
|
||||
return print_io_latency(ssg->stats, \
|
||||
__IO_TYPE, page, PAGE_SIZE); \
|
||||
}
|
||||
IO_LATENCY_SHOW_FUNC(ssg_stat_read_latency_show, REQ_OP_READ);
|
||||
IO_LATENCY_SHOW_FUNC(ssg_stat_write_latency_show, REQ_OP_WRITE);
|
||||
IO_LATENCY_SHOW_FUNC(ssg_stat_flush_latency_show, REQ_OP_FLUSH);
|
||||
IO_LATENCY_SHOW_FUNC(ssg_stat_discard_latency_show, REQ_OP_DISCARD);
|
||||
|
||||
static void ssg_all_tag_iter(struct blk_mq_tags *tags, ssg_tag_iter_fn *fn, struct ssg_bt_tags_iter_data *iter_data)
|
||||
{
|
||||
iter_data->tags = tags;
|
||||
|
||||
if (tags->nr_reserved_tags) {
|
||||
iter_data->reserved = true;
|
||||
sbitmap_for_each_set(&tags->breserved_tags.sb, fn, iter_data);
|
||||
}
|
||||
|
||||
iter_data->reserved = false;
|
||||
sbitmap_for_each_set(&tags->bitmap_tags.sb, fn, iter_data);
|
||||
}
|
||||
|
||||
static bool ssg_count_inflight(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
{
|
||||
struct ssg_bt_tags_iter_data *iter_data = data;
|
||||
struct blk_mq_tags *tags = iter_data->tags;
|
||||
unsigned int *inflight = iter_data->data;
|
||||
bool reserved = iter_data->reserved;
|
||||
struct request *rq;
|
||||
|
||||
if (!reserved)
|
||||
bitnr += tags->nr_reserved_tags;
|
||||
|
||||
rq = tags->static_rqs[bitnr];
|
||||
|
||||
if (!rq)
|
||||
return true;
|
||||
|
||||
if (req_op(rq) < IO_TYPES)
|
||||
inflight[req_op(rq)]++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void get_ssg_inflight(struct request_queue *q, unsigned int *inflight)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned long i;
|
||||
struct ssg_bt_tags_iter_data iter_data = {
|
||||
.data = inflight,
|
||||
};
|
||||
|
||||
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
|
||||
ssg_all_tag_iter(q->sched_shared_tags, ssg_count_inflight, &iter_data);
|
||||
} else {
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
/*
|
||||
* If no software queues are currently mapped to this
|
||||
* hardware queue, there's nothing to check
|
||||
*/
|
||||
if (!blk_mq_hw_queue_mapped(hctx))
|
||||
continue;
|
||||
|
||||
ssg_all_tag_iter(hctx->sched_tags, ssg_count_inflight, &iter_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t ssg_stat_inflight_show(struct elevator_queue *e, char *page)
|
||||
{
|
||||
struct ssg_data *ssg = e->elevator_data;
|
||||
unsigned int inflight[IO_TYPES] = {0, };
|
||||
|
||||
if (unlikely(!ssg->stats))
|
||||
return 0;
|
||||
|
||||
get_ssg_inflight(ssg->queue, inflight);
|
||||
|
||||
return snprintf(page, PAGE_SIZE, "%u %u %u\n", inflight[REQ_OP_READ],
|
||||
inflight[REQ_OP_WRITE], inflight[REQ_OP_DISCARD]);
|
||||
}
|
||||
|
||||
static bool print_ssg_rq_info(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
{
|
||||
struct ssg_bt_tags_iter_data *iter_data = data;
|
||||
struct blk_mq_tags *tags = iter_data->tags;
|
||||
bool reserved = iter_data->reserved;
|
||||
char *page = iter_data->data;
|
||||
struct request *rq;
|
||||
int len = strlen(page);
|
||||
|
||||
if (!reserved)
|
||||
bitnr += tags->nr_reserved_tags;
|
||||
|
||||
rq = tags->static_rqs[bitnr];
|
||||
|
||||
if (!rq)
|
||||
return true;
|
||||
|
||||
scnprintf(page + len, PAGE_SIZE - len, "%d %d %x %x %llu %u %llu %d\n",
|
||||
rq->tag, rq->internal_tag, req_op(rq), rq->rq_flags,
|
||||
blk_rq_pos(rq), blk_rq_bytes(rq), rq->start_time_ns, rq->state);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void print_ssg_rqs(struct request_queue *q, char *page)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned long i;
|
||||
struct ssg_bt_tags_iter_data iter_data = {
|
||||
.data = page,
|
||||
};
|
||||
|
||||
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
|
||||
ssg_all_tag_iter(q->sched_shared_tags, print_ssg_rq_info, &iter_data);
|
||||
} else {
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
/*
|
||||
* If no software queues are currently mapped to this
|
||||
* hardware queue, there's nothing to check
|
||||
*/
|
||||
if (!blk_mq_hw_queue_mapped(hctx))
|
||||
continue;
|
||||
|
||||
ssg_all_tag_iter(hctx->sched_tags, print_ssg_rq_info, &iter_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t ssg_stat_rqs_info_show(struct elevator_queue *e, char *page)
|
||||
{
|
||||
struct ssg_data *ssg = e->elevator_data;
|
||||
|
||||
if (unlikely(!ssg->stats))
|
||||
return 0;
|
||||
|
||||
print_ssg_rqs(ssg->queue, page);
|
||||
|
||||
return strlen(page);
|
||||
}
|
||||
|
||||
int ssg_stat_init(struct ssg_data *ssg)
|
||||
{
|
||||
ssg->stats = alloc_percpu_gfp(struct ssg_stats,
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!ssg->stats)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ssg_stat_exit(struct ssg_data *ssg)
|
||||
{
|
||||
if (ssg->stats)
|
||||
free_percpu(ssg->stats);
|
||||
}
|
372
block/ssg-wb.c
Normal file
372
block/ssg-wb.c
Normal file
@@ -0,0 +1,372 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Write Booster of SamSung Generic I/O scheduler
|
||||
*
|
||||
* Copyright (C) 2022 Jisoo Oh <jisoo2146.oh@samsung.com>
|
||||
* Copyright (C) 2023 Changheun Lee <nanich.lee@samsung.com>
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/sbitmap.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include "elevator.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-sec.h"
|
||||
#include "ssg.h"
|
||||
|
||||
struct wb_params {
|
||||
int on_rqs;
|
||||
int off_rqs;
|
||||
int on_dirty_bytes;
|
||||
int off_dirty_bytes;
|
||||
int on_sync_write_bytes;
|
||||
int off_sync_write_bytes;
|
||||
int on_dirty_busy_written_pages;
|
||||
int on_dirty_busy_jiffies;
|
||||
int off_delay_jiffies;
|
||||
unsigned long dirty_busy_start_jiffies;
|
||||
unsigned long dirty_busy_start_written_pages;
|
||||
};
|
||||
|
||||
struct ssg_wb_data {
|
||||
struct wb_params params;
|
||||
struct request_queue *queue;
|
||||
struct delayed_work wb_ctrl_work;
|
||||
struct delayed_work wb_deferred_off_work;
|
||||
|
||||
atomic_t wb_triggered;
|
||||
};
|
||||
|
||||
struct io_amount_data {
|
||||
unsigned int allocated_rqs;
|
||||
unsigned int sync_write_bytes;
|
||||
unsigned long dirty_bytes;
|
||||
};
|
||||
|
||||
struct ssg_wb_iter_data {
|
||||
struct blk_mq_tags *tags;
|
||||
void *data;
|
||||
bool reserved;
|
||||
};
|
||||
|
||||
static const int _on_rqs_ratio = 90;
|
||||
static const int _off_rqs_ratio = 40;
|
||||
static const int _on_dirty_bytes = 50*1024*1024;
|
||||
static const int _off_dirty_bytes = 25*1024*1024;
|
||||
static const int _on_sync_write_bytes = 2*1024*1024;
|
||||
static const int _off_sync_write_bytes = 1*1024*1024;
|
||||
static const int _on_dirty_busy_written_bytes = 100*1024*1024;
|
||||
static const int _on_dirty_busy_msecs = 1000;
|
||||
static const int _off_delay_msecs = 5000;
|
||||
|
||||
#define may_wb_on(io_amount, params) \
|
||||
((io_amount).allocated_rqs >= params.on_rqs || \
|
||||
(io_amount).dirty_bytes >= params.on_dirty_bytes || \
|
||||
(io_amount).sync_write_bytes >= params.on_sync_write_bytes || \
|
||||
(params.dirty_busy_start_written_pages && \
|
||||
(global_node_page_state(NR_WRITTEN) - params.dirty_busy_start_written_pages) \
|
||||
> params.on_dirty_busy_written_pages))
|
||||
|
||||
#define may_wb_off(io_amount, params) \
|
||||
((io_amount).allocated_rqs < params.off_rqs && \
|
||||
(io_amount).dirty_bytes < params.off_dirty_bytes && \
|
||||
(io_amount).sync_write_bytes < params.off_sync_write_bytes)
|
||||
|
||||
static void trigger_wb_on(struct ssg_wb_data *ssg_wb)
|
||||
{
|
||||
cancel_delayed_work_sync(&ssg_wb->wb_deferred_off_work);
|
||||
blk_sec_wb_ctrl(true, WB_REQ_IOSCHED);
|
||||
atomic_set(&ssg_wb->wb_triggered, true);
|
||||
}
|
||||
|
||||
static void wb_off_work(struct work_struct *work)
|
||||
{
|
||||
blk_sec_wb_ctrl(false, WB_REQ_IOSCHED);
|
||||
}
|
||||
|
||||
static void trigger_wb_off(struct ssg_wb_data *ssg_wb)
|
||||
{
|
||||
queue_delayed_work(blk_sec_common_wq,
|
||||
&ssg_wb->wb_deferred_off_work, ssg_wb->params.off_delay_jiffies);
|
||||
|
||||
atomic_set(&ssg_wb->wb_triggered, false);
|
||||
}
|
||||
|
||||
static void init_wb_params(struct ssg_wb_data *ssg_wb)
|
||||
{
|
||||
ssg_wb->params.on_rqs = ssg_wb->queue->nr_requests * _on_rqs_ratio / 100U;
|
||||
ssg_wb->params.off_rqs = ssg_wb->queue->nr_requests * _off_rqs_ratio / 100U;
|
||||
ssg_wb->params.on_dirty_bytes = _on_dirty_bytes;
|
||||
ssg_wb->params.off_dirty_bytes = _off_dirty_bytes;
|
||||
ssg_wb->params.on_sync_write_bytes = _on_sync_write_bytes;
|
||||
ssg_wb->params.off_sync_write_bytes = _off_sync_write_bytes;
|
||||
ssg_wb->params.on_dirty_busy_written_pages = _on_dirty_busy_written_bytes / PAGE_SIZE;
|
||||
ssg_wb->params.on_dirty_busy_jiffies = msecs_to_jiffies(_on_dirty_busy_msecs);
|
||||
ssg_wb->params.off_delay_jiffies = msecs_to_jiffies(_off_delay_msecs);
|
||||
ssg_wb->params.dirty_busy_start_written_pages = 0;
|
||||
ssg_wb->params.dirty_busy_start_jiffies = 0;
|
||||
}
|
||||
|
||||
static bool wb_count_io(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
{
|
||||
struct ssg_wb_iter_data *iter_data = data;
|
||||
struct blk_mq_tags *tags = iter_data->tags;
|
||||
struct io_amount_data *io_amount = iter_data->data;
|
||||
bool reserved = iter_data->reserved;
|
||||
struct request *rq;
|
||||
|
||||
if (!reserved)
|
||||
bitnr += tags->nr_reserved_tags;
|
||||
|
||||
rq = tags->static_rqs[bitnr];
|
||||
if (!rq)
|
||||
return true;
|
||||
|
||||
io_amount->allocated_rqs++;
|
||||
if (req_op(rq) == REQ_OP_WRITE && rq->cmd_flags & REQ_SYNC)
|
||||
io_amount->sync_write_bytes += blk_rq_bytes(rq);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void wb_all_tag_iter(struct blk_mq_tags *tags, struct io_amount_data *io_amount)
|
||||
{
|
||||
struct ssg_wb_iter_data iter_data = {
|
||||
.data = io_amount,
|
||||
.tags = tags,
|
||||
};
|
||||
|
||||
if (tags->nr_reserved_tags) {
|
||||
iter_data.reserved = true;
|
||||
sbitmap_for_each_set(&tags->breserved_tags.sb, wb_count_io, &iter_data);
|
||||
}
|
||||
|
||||
iter_data.reserved = false;
|
||||
sbitmap_for_each_set(&tags->bitmap_tags.sb, wb_count_io, &iter_data);
|
||||
}
|
||||
|
||||
static void wb_get_io_amount(struct request_queue *q, struct io_amount_data *io_amount)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned long i;
|
||||
|
||||
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
|
||||
wb_all_tag_iter(q->sched_shared_tags, io_amount);
|
||||
} else {
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
/*
|
||||
* If no software queues are currently mapped to this
|
||||
* hardware queue, there's nothing to check
|
||||
*/
|
||||
if (!blk_mq_hw_queue_mapped(hctx))
|
||||
continue;
|
||||
|
||||
wb_all_tag_iter(hctx->sched_tags, io_amount);
|
||||
}
|
||||
}
|
||||
|
||||
io_amount->dirty_bytes = (global_node_page_state(NR_FILE_DIRTY) +
|
||||
global_node_page_state(NR_WRITEBACK)) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
static void update_dirty_busy_status(struct ssg_wb_data *ssg_wb, struct io_amount_data *io_amount)
|
||||
{
|
||||
if (time_after(jiffies, ssg_wb->params.dirty_busy_start_jiffies + ssg_wb->params.on_dirty_busy_jiffies)) {
|
||||
ssg_wb->params.dirty_busy_start_jiffies = 0;
|
||||
ssg_wb->params.dirty_busy_start_written_pages = 0;
|
||||
}
|
||||
|
||||
if (!ssg_wb->params.dirty_busy_start_jiffies && io_amount->dirty_bytes >= ssg_wb->params.off_dirty_bytes) {
|
||||
ssg_wb->params.dirty_busy_start_jiffies = jiffies;
|
||||
ssg_wb->params.dirty_busy_start_written_pages = global_node_page_state(NR_WRITTEN);
|
||||
}
|
||||
}
|
||||
|
||||
static void wb_ctrl_work(struct work_struct *work)
|
||||
{
|
||||
struct ssg_wb_data *ssg_wb = container_of(to_delayed_work(work),
|
||||
struct ssg_wb_data, wb_ctrl_work);
|
||||
struct io_amount_data io_amount = {
|
||||
.allocated_rqs = 0,
|
||||
.sync_write_bytes = 0,
|
||||
};
|
||||
|
||||
wb_get_io_amount(ssg_wb->queue, &io_amount);
|
||||
update_dirty_busy_status(ssg_wb, &io_amount);
|
||||
|
||||
if (atomic_read(&ssg_wb->wb_triggered)) {
|
||||
if (may_wb_off(io_amount, ssg_wb->params))
|
||||
trigger_wb_off(ssg_wb);
|
||||
} else {
|
||||
if (may_wb_on(io_amount, ssg_wb->params))
|
||||
trigger_wb_on(ssg_wb);
|
||||
}
|
||||
|
||||
if (atomic_read(&ssg_wb->wb_triggered))
|
||||
queue_delayed_work(blk_sec_common_wq, &ssg_wb->wb_ctrl_work,
|
||||
ssg_wb->params.off_delay_jiffies);
|
||||
}
|
||||
|
||||
void ssg_wb_run_ctrl_work(struct ssg_data *ssg, struct request *rq)
|
||||
{
|
||||
struct ssg_wb_data *ssg_wb = ssg->wb_data;
|
||||
|
||||
if (!ssg_wb)
|
||||
return;
|
||||
|
||||
if (atomic_read(&ssg_wb->wb_triggered))
|
||||
return;
|
||||
|
||||
if (((rq->cmd_flags & REQ_OP_MASK) == REQ_OP_READ)
|
||||
&& atomic_read(&ssg->allocated_rqs) < ssg_wb->params.on_rqs)
|
||||
return;
|
||||
|
||||
if (!work_busy(&ssg_wb->wb_ctrl_work.work))
|
||||
queue_delayed_work(blk_sec_common_wq, &ssg_wb->wb_ctrl_work, 0);
|
||||
}
|
||||
|
||||
void ssg_wb_depth_updated(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct ssg_data *ssg = q->elevator->elevator_data;
|
||||
struct ssg_wb_data *ssg_wb = ssg->wb_data;
|
||||
int nr_rqs;
|
||||
|
||||
if (!ssg_wb)
|
||||
return;
|
||||
|
||||
nr_rqs = hctx->sched_tags->bitmap_tags.sb.depth;
|
||||
ssg_wb->params.on_rqs = nr_rqs * _on_rqs_ratio / 100U;
|
||||
ssg_wb->params.off_rqs = nr_rqs * _off_rqs_ratio / 100U;
|
||||
}
|
||||
|
||||
void ssg_wb_init(struct ssg_data *ssg)
|
||||
{
|
||||
struct ssg_wb_data *ssg_wb;
|
||||
struct gendisk *gd = ssg->queue->disk;
|
||||
|
||||
if (!gd)
|
||||
return;
|
||||
|
||||
if (!blk_sec_wb_is_supported(gd))
|
||||
return;
|
||||
|
||||
ssg_wb = kzalloc(sizeof(*ssg_wb), GFP_KERNEL);
|
||||
if (!ssg_wb)
|
||||
return;
|
||||
|
||||
ssg_wb->queue = ssg->queue;
|
||||
|
||||
INIT_DELAYED_WORK(&ssg_wb->wb_ctrl_work, wb_ctrl_work);
|
||||
INIT_DELAYED_WORK(&ssg_wb->wb_deferred_off_work, wb_off_work);
|
||||
|
||||
init_wb_params(ssg_wb);
|
||||
|
||||
atomic_set(&ssg_wb->wb_triggered, false);
|
||||
ssg->wb_data = ssg_wb;
|
||||
}
|
||||
|
||||
void ssg_wb_exit(struct ssg_data *ssg)
|
||||
{
|
||||
struct ssg_wb_data *ssg_wb = ssg->wb_data;
|
||||
|
||||
if (!ssg_wb)
|
||||
return;
|
||||
|
||||
cancel_delayed_work_sync(&ssg_wb->wb_ctrl_work);
|
||||
cancel_delayed_work_sync(&ssg_wb->wb_deferred_off_work);
|
||||
|
||||
if (atomic_read(&ssg_wb->wb_triggered))
|
||||
blk_sec_wb_ctrl(false, WB_REQ_IOSCHED);
|
||||
|
||||
ssg->wb_data = NULL;
|
||||
kfree(ssg_wb);
|
||||
}
|
||||
|
||||
/* sysfs */
|
||||
#define SHOW_FUNC(__NAME, __VAR, __CONV) \
|
||||
ssize_t ssg_wb_##__NAME##_show(struct elevator_queue *e, char *page) \
|
||||
{ \
|
||||
struct ssg_data *ssg = e->elevator_data; \
|
||||
struct ssg_wb_data *ssg_wb = ssg->wb_data; \
|
||||
int val; \
|
||||
\
|
||||
if (!ssg_wb) \
|
||||
return 0; \
|
||||
\
|
||||
if (__CONV == 1) \
|
||||
val = jiffies_to_msecs(__VAR); \
|
||||
else if (__CONV == 2) \
|
||||
val = __VAR * PAGE_SIZE; \
|
||||
else \
|
||||
val = __VAR; \
|
||||
\
|
||||
return snprintf(page, PAGE_SIZE, "%d\n", val); \
|
||||
}
|
||||
SHOW_FUNC(on_rqs, ssg_wb->params.on_rqs, 0);
|
||||
SHOW_FUNC(off_rqs, ssg_wb->params.off_rqs, 0);
|
||||
SHOW_FUNC(on_dirty_bytes, ssg_wb->params.on_dirty_bytes, 0);
|
||||
SHOW_FUNC(off_dirty_bytes, ssg_wb->params.off_dirty_bytes, 0);
|
||||
SHOW_FUNC(on_sync_write_bytes, ssg_wb->params.on_sync_write_bytes, 0);
|
||||
SHOW_FUNC(off_sync_write_bytes, ssg_wb->params.off_sync_write_bytes, 0);
|
||||
SHOW_FUNC(on_dirty_busy_written_bytes, ssg_wb->params.on_dirty_busy_written_pages, 2);
|
||||
SHOW_FUNC(on_dirty_busy_msecs, ssg_wb->params.on_dirty_busy_jiffies, 1);
|
||||
SHOW_FUNC(off_delay_msecs, ssg_wb->params.off_delay_jiffies, 1);
|
||||
#undef SHOW_FUNC
|
||||
|
||||
#define STORE_FUNC(__NAME, __PTR, __VAR, __COND, __CONV) \
|
||||
ssize_t ssg_wb_##__NAME##_store(struct elevator_queue *e, \
|
||||
const char *page, size_t count) \
|
||||
{ \
|
||||
struct ssg_data *ssg = e->elevator_data; \
|
||||
struct ssg_wb_data *ssg_wb = ssg->wb_data; \
|
||||
int __VAR; \
|
||||
\
|
||||
if (!ssg_wb) \
|
||||
return count; \
|
||||
\
|
||||
if (kstrtoint(page, 10, &__VAR)) \
|
||||
return count; \
|
||||
\
|
||||
if (!(__COND)) \
|
||||
return count; \
|
||||
\
|
||||
if (__CONV == 1) \
|
||||
*(__PTR) = msecs_to_jiffies(__VAR); \
|
||||
else if (__CONV == 2) \
|
||||
*(__PTR) = __VAR / PAGE_SIZE; \
|
||||
else \
|
||||
*(__PTR) = __VAR; \
|
||||
\
|
||||
return count; \
|
||||
}
|
||||
STORE_FUNC(on_rqs, &ssg_wb->params.on_rqs, val,
|
||||
val >= ssg_wb->params.off_rqs, 0);
|
||||
STORE_FUNC(off_rqs, &ssg_wb->params.off_rqs, val,
|
||||
val >= 0 && val <= ssg_wb->params.on_rqs, 0);
|
||||
STORE_FUNC(on_dirty_bytes, &ssg_wb->params.on_dirty_bytes, val,
|
||||
val >= ssg_wb->params.off_dirty_bytes, 0);
|
||||
STORE_FUNC(off_dirty_bytes, &ssg_wb->params.off_dirty_bytes, val,
|
||||
val >= 0 && val <= ssg_wb->params.on_dirty_bytes, 0);
|
||||
STORE_FUNC(on_sync_write_bytes, &ssg_wb->params.on_sync_write_bytes, val,
|
||||
val >= ssg_wb->params.off_sync_write_bytes, 0);
|
||||
STORE_FUNC(off_sync_write_bytes, &ssg_wb->params.off_sync_write_bytes, val,
|
||||
val >= 0 && val <= ssg_wb->params.on_sync_write_bytes, 0);
|
||||
STORE_FUNC(on_dirty_busy_written_bytes, &ssg_wb->params.on_dirty_busy_written_pages, val,
|
||||
val >= 0, 2);
|
||||
STORE_FUNC(on_dirty_busy_msecs, &ssg_wb->params.on_dirty_busy_jiffies, val,
|
||||
val >= 0, 1);
|
||||
STORE_FUNC(off_delay_msecs, &ssg_wb->params.off_delay_jiffies, val, val >= 0, 1);
|
||||
#undef STORE_FUNC
|
||||
|
||||
ssize_t ssg_wb_triggered_show(struct elevator_queue *e, char *page)
|
||||
{
|
||||
struct ssg_data *ssg = e->elevator_data;
|
||||
struct ssg_wb_data *ssg_wb = ssg->wb_data;
|
||||
|
||||
if (!ssg_wb)
|
||||
return 0;
|
||||
|
||||
return snprintf(page, PAGE_SIZE, "%d\n", atomic_read(&ssg_wb->wb_triggered));
|
||||
}
|
199
block/ssg.h
Normal file
199
block/ssg.h
Normal file
@@ -0,0 +1,199 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef SSG_H
|
||||
#define SSG_H
|
||||
|
||||
#include "blk-cgroup.h"
|
||||
|
||||
struct ssg_request_info {
|
||||
pid_t tgid;
|
||||
|
||||
sector_t sector;
|
||||
unsigned int data_size;
|
||||
unsigned int blkcg_boost;
|
||||
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
void *pio;
|
||||
};
|
||||
|
||||
struct ssg_data {
|
||||
struct request_queue *queue;
|
||||
|
||||
/*
|
||||
* requests are present on both sort_list and fifo_list
|
||||
*/
|
||||
struct rb_root sort_list[3];
|
||||
struct list_head fifo_list[3];
|
||||
|
||||
/*
|
||||
* next in sort order. read, write or both are NULL
|
||||
*/
|
||||
struct request *next_rq[3];
|
||||
unsigned int starved_writes; /* times reads have starved writes */
|
||||
unsigned int starved_normal; /* times boosts have starved normal reads or writes */
|
||||
|
||||
/*
|
||||
* settings that change how the i/o scheduler behaves
|
||||
*/
|
||||
int fifo_expire[3];
|
||||
int max_write_starvation;
|
||||
int max_normal_starvation;
|
||||
int front_merges;
|
||||
|
||||
/*
|
||||
* to control request allocation
|
||||
*/
|
||||
atomic_t allocated_rqs;
|
||||
atomic_t async_write_rqs;
|
||||
int congestion_threshold_rqs;
|
||||
int max_tgroup_rqs;
|
||||
int max_async_write_rqs;
|
||||
unsigned int tgroup_shallow_depth; /* thread group shallow depth for each tag map */
|
||||
unsigned int async_write_shallow_depth; /* async write shallow depth for each tag map */
|
||||
|
||||
/*
|
||||
* I/O context information for each request
|
||||
*/
|
||||
struct ssg_request_info *rq_info;
|
||||
|
||||
/*
|
||||
* Statistics
|
||||
*/
|
||||
void __percpu *stats;
|
||||
|
||||
spinlock_t lock;
|
||||
struct list_head dispatch;
|
||||
|
||||
/*
|
||||
* Write booster
|
||||
*/
|
||||
void *wb_data;
|
||||
};
|
||||
|
||||
static inline struct cgroup_subsys_state *curr_css(void)
|
||||
{
|
||||
return task_css(current, io_cgrp_id);
|
||||
}
|
||||
|
||||
/* ssg-stat.c */
|
||||
extern int ssg_stat_init(struct ssg_data *ssg);
|
||||
extern void ssg_stat_exit(struct ssg_data *ssg);
|
||||
extern void ssg_stat_account_io_done(struct ssg_data *ssg,
|
||||
struct request *rq, unsigned int data_size, u64 now);
|
||||
extern ssize_t ssg_stat_read_latency_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_stat_write_latency_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_stat_flush_latency_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_stat_discard_latency_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_stat_inflight_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_stat_rqs_info_show(struct elevator_queue *e, char *page);
|
||||
|
||||
/* ssg-cgroup.c */
|
||||
#if IS_ENABLED(CONFIG_MQ_IOSCHED_SSG_CGROUP)
|
||||
struct ssg_blkcg {
|
||||
struct blkcg_policy_data cpd; /* must be the first member */
|
||||
|
||||
int max_available_ratio;
|
||||
int boost_on;
|
||||
};
|
||||
|
||||
struct ssg_blkg {
|
||||
struct blkg_policy_data pd; /* must be the first member */
|
||||
|
||||
atomic_t current_rqs;
|
||||
int max_available_rqs;
|
||||
unsigned int shallow_depth; /* shallow depth for each tag map to get sched tag */
|
||||
};
|
||||
|
||||
extern int ssg_blkcg_init(void);
|
||||
extern void ssg_blkcg_exit(void);
|
||||
extern int ssg_blkcg_activate(struct request_queue *q);
|
||||
extern void ssg_blkcg_deactivate(struct request_queue *q);
|
||||
extern unsigned int ssg_blkcg_shallow_depth(struct request_queue *q);
|
||||
extern void ssg_blkcg_depth_updated(struct blk_mq_hw_ctx *hctx);
|
||||
extern void ssg_blkcg_inc_rq(struct blkcg_gq *blkg);
|
||||
extern void ssg_blkcg_dec_rq(struct blkcg_gq *blkg);
|
||||
extern int ssg_blkcg_check_boost(struct blkcg_gq *blkg);
|
||||
#else
|
||||
static inline int ssg_blkcg_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void ssg_blkcg_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int ssg_blkcg_activate(struct request_queue *q)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void ssg_blkcg_deactivate(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
|
||||
static inline unsigned int ssg_blkcg_shallow_depth(struct request_queue *q)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void ssg_blkcg_depth_updated(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void ssg_blkcg_inc_rq(struct blkcg_gq *blkg)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void ssg_blkcg_dec_rq(struct blkcg_gq *blkg)
|
||||
{
|
||||
}
|
||||
static inline int ssg_blkcg_check_boost(struct blkcg_gq *blkg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ssg-wb.c */
|
||||
#if IS_ENABLED(CONFIG_MQ_IOSCHED_SSG_WB)
|
||||
extern void ssg_wb_run_ctrl_work(struct ssg_data *ssg, struct request *rq);
|
||||
extern void ssg_wb_depth_updated(struct blk_mq_hw_ctx *hctx);
|
||||
extern void ssg_wb_init(struct ssg_data *ssg);
|
||||
extern void ssg_wb_exit(struct ssg_data *ssg);
|
||||
extern ssize_t ssg_wb_on_rqs_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_wb_on_rqs_store(struct elevator_queue *e, const char *page, size_t count);
|
||||
extern ssize_t ssg_wb_off_rqs_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_wb_off_rqs_store(struct elevator_queue *e, const char *page, size_t count);
|
||||
extern ssize_t ssg_wb_on_dirty_bytes_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_wb_on_dirty_bytes_store(struct elevator_queue *e, const char *page, size_t count);
|
||||
extern ssize_t ssg_wb_off_dirty_bytes_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_wb_off_dirty_bytes_store(struct elevator_queue *e, const char *page, size_t count);
|
||||
extern ssize_t ssg_wb_on_sync_write_bytes_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_wb_on_sync_write_bytes_store(struct elevator_queue *e, const char *page, size_t count);
|
||||
extern ssize_t ssg_wb_off_sync_write_bytes_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_wb_off_sync_write_bytes_store(struct elevator_queue *e, const char *page, size_t count);
|
||||
extern ssize_t ssg_wb_on_dirty_busy_written_bytes_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_wb_on_dirty_busy_written_bytes_store(struct elevator_queue *e, const char *page, size_t count);
|
||||
extern ssize_t ssg_wb_on_dirty_busy_msecs_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_wb_on_dirty_busy_msecs_store(struct elevator_queue *e, const char *page, size_t count);
|
||||
extern ssize_t ssg_wb_off_delay_msecs_show(struct elevator_queue *e, char *page);
|
||||
extern ssize_t ssg_wb_off_delay_msecs_store(struct elevator_queue *e, const char *page, size_t count);
|
||||
extern ssize_t ssg_wb_triggered_show(struct elevator_queue *e, char *page);
|
||||
#else
|
||||
static inline void ssg_wb_run_ctrl_work(struct ssg_data *ssg, struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void ssg_wb_depth_updated(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void ssg_wb_init(struct ssg_data *ssg)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void ssg_wb_exit(struct ssg_data *ssg)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#endif // SSG_H
|
Reference in New Issue
Block a user