forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
189 lines
6.0 KiB
189 lines
6.0 KiB
#ifndef BLK_THROTTLE_H |
|
#define BLK_THROTTLE_H |
|
|
|
#include "blk-cgroup-rwstat.h" |
|
|
|
/* |
|
* To implement hierarchical throttling, throtl_grps form a tree and bios |
|
* are dispatched upwards level by level until they reach the top and get |
|
* issued. When dispatching bios from the children and local group at each |
|
* level, if the bios are dispatched into a single bio_list, there's a risk |
|
* of a local or child group which can queue many bios at once filling up |
|
* the list starving others. |
|
* |
|
* To avoid such starvation, dispatched bios are queued separately |
|
* according to where they came from. When they are again dispatched to |
|
* the parent, they're popped in round-robin order so that no single source |
|
* hogs the dispatch window. |
|
* |
|
* throtl_qnode is used to keep the queued bios separated by their sources. |
|
* Bios are queued to throtl_qnode which in turn is queued to |
|
* throtl_service_queue and then dispatched in round-robin order. |
|
* |
|
* It's also used to track the reference counts on blkg's. A qnode always |
|
* belongs to a throtl_grp and gets queued on itself or the parent, so |
|
* incrementing the reference of the associated throtl_grp when a qnode is |
|
* queued and decrementing when dequeued is enough to keep the whole blkg |
|
* tree pinned while bios are in flight. |
|
*/ |
|
struct throtl_qnode { |
|
struct list_head node; /* service_queue->queued[] */ |
|
struct bio_list bios; /* queued bios */ |
|
struct throtl_grp *tg; /* tg this qnode belongs to */ |
|
}; |
|
|
|
struct throtl_service_queue { |
|
struct throtl_service_queue *parent_sq; /* the parent service_queue */ |
|
|
|
/* |
|
* Bios queued directly to this service_queue or dispatched from |
|
* children throtl_grp's. |
|
*/ |
|
struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ |
|
unsigned int nr_queued[2]; /* number of queued bios */ |
|
|
|
/* |
|
* RB tree of active children throtl_grp's, which are sorted by |
|
* their ->disptime. |
|
*/ |
|
struct rb_root_cached pending_tree; /* RB tree of active tgs */ |
|
unsigned int nr_pending; /* # queued in the tree */ |
|
unsigned long first_pending_disptime; /* disptime of the first tg */ |
|
struct timer_list pending_timer; /* fires on first_pending_disptime */ |
|
}; |
|
|
|
enum tg_state_flags { |
|
THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ |
|
THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ |
|
THROTL_TG_HAS_IOPS_LIMIT = 1 << 2, /* tg has iops limit */ |
|
THROTL_TG_CANCELING = 1 << 3, /* starts to cancel bio */ |
|
}; |
|
|
|
enum { |
|
LIMIT_LOW, |
|
LIMIT_MAX, |
|
LIMIT_CNT, |
|
}; |
|
|
|
struct throtl_grp { |
|
/* must be the first member */ |
|
struct blkg_policy_data pd; |
|
|
|
/* active throtl group service_queue member */ |
|
struct rb_node rb_node; |
|
|
|
/* throtl_data this group belongs to */ |
|
struct throtl_data *td; |
|
|
|
/* this group's service queue */ |
|
struct throtl_service_queue service_queue; |
|
|
|
/* |
|
* qnode_on_self is used when bios are directly queued to this |
|
* throtl_grp so that local bios compete fairly with bios |
|
* dispatched from children. qnode_on_parent is used when bios are |
|
* dispatched from this throtl_grp into its parent and will compete |
|
* with the sibling qnode_on_parents and the parent's |
|
* qnode_on_self. |
|
*/ |
|
struct throtl_qnode qnode_on_self[2]; |
|
struct throtl_qnode qnode_on_parent[2]; |
|
|
|
/* |
|
* Dispatch time in jiffies. This is the estimated time when group |
|
* will unthrottle and is ready to dispatch more bio. It is used as |
|
* key to sort active groups in service tree. |
|
*/ |
|
unsigned long disptime; |
|
|
|
unsigned int flags; |
|
|
|
/* are there any throtl rules between this group and td? */ |
|
bool has_rules[2]; |
|
|
|
/* internally used bytes per second rate limits */ |
|
uint64_t bps[2][LIMIT_CNT]; |
|
/* user configured bps limits */ |
|
uint64_t bps_conf[2][LIMIT_CNT]; |
|
|
|
/* internally used IOPS limits */ |
|
unsigned int iops[2][LIMIT_CNT]; |
|
/* user configured IOPS limits */ |
|
unsigned int iops_conf[2][LIMIT_CNT]; |
|
|
|
/* Number of bytes dispatched in current slice */ |
|
uint64_t bytes_disp[2]; |
|
/* Number of bio's dispatched in current slice */ |
|
unsigned int io_disp[2]; |
|
|
|
unsigned long last_low_overflow_time[2]; |
|
|
|
uint64_t last_bytes_disp[2]; |
|
unsigned int last_io_disp[2]; |
|
|
|
unsigned long last_check_time; |
|
|
|
unsigned long latency_target; /* us */ |
|
unsigned long latency_target_conf; /* us */ |
|
/* When did we start a new slice */ |
|
unsigned long slice_start[2]; |
|
unsigned long slice_end[2]; |
|
|
|
unsigned long last_finish_time; /* ns / 1024 */ |
|
unsigned long checked_last_finish_time; /* ns / 1024 */ |
|
unsigned long avg_idletime; /* ns / 1024 */ |
|
unsigned long idletime_threshold; /* us */ |
|
unsigned long idletime_threshold_conf; /* us */ |
|
|
|
unsigned int bio_cnt; /* total bios */ |
|
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ |
|
unsigned long bio_cnt_reset_time; |
|
|
|
struct blkg_rwstat stat_bytes; |
|
struct blkg_rwstat stat_ios; |
|
}; |
|
|
|
extern struct blkcg_policy blkcg_policy_throtl; |
|
|
|
static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) |
|
{ |
|
return pd ? container_of(pd, struct throtl_grp, pd) : NULL; |
|
} |
|
|
|
static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) |
|
{ |
|
return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); |
|
} |
|
|
|
/* |
|
* Internal throttling interface |
|
*/ |
|
#ifndef CONFIG_BLK_DEV_THROTTLING |
|
static inline int blk_throtl_init(struct request_queue *q) { return 0; } |
|
static inline void blk_throtl_exit(struct request_queue *q) { } |
|
static inline void blk_throtl_register_queue(struct request_queue *q) { } |
|
static inline bool blk_throtl_bio(struct bio *bio) { return false; } |
|
static inline void blk_throtl_cancel_bios(struct request_queue *q) { } |
|
#else /* CONFIG_BLK_DEV_THROTTLING */ |
|
int blk_throtl_init(struct request_queue *q); |
|
void blk_throtl_exit(struct request_queue *q); |
|
void blk_throtl_register_queue(struct request_queue *q); |
|
bool __blk_throtl_bio(struct bio *bio); |
|
void blk_throtl_cancel_bios(struct request_queue *q); |
|
static inline bool blk_throtl_bio(struct bio *bio) |
|
{ |
|
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); |
|
|
|
/* no need to throttle bps any more if the bio has been throttled */ |
|
if (bio_flagged(bio, BIO_THROTTLED) && |
|
!(tg->flags & THROTL_TG_HAS_IOPS_LIMIT)) |
|
return false; |
|
|
|
if (!tg->has_rules[bio_data_dir(bio)]) |
|
return false; |
|
|
|
return __blk_throtl_bio(bio); |
|
} |
|
#endif /* CONFIG_BLK_DEV_THROTTLING */ |
|
|
|
#endif
|
|
|