forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
358 lines
8.3 KiB
358 lines
8.3 KiB
// SPDX-License-Identifier: GPL-2.0-only |
|
/* |
|
* TCP Illinois congestion control. |
|
* Home page: |
|
* http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html |
|
* |
|
* The algorithm is described in: |
|
* "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm |
|
* for High-Speed Networks" |
|
* http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf |
|
* |
|
* Implemented from description in paper and ns-2 simulation. |
|
* Copyright (C) 2007 Stephen Hemminger <[email protected]> |
|
*/ |
|
|
|
#include <linux/module.h> |
|
#include <linux/skbuff.h> |
|
#include <linux/inet_diag.h> |
|
#include <asm/div64.h> |
|
#include <net/tcp.h> |
|
|
|
#define ALPHA_SHIFT 7 |
|
#define ALPHA_SCALE (1u<<ALPHA_SHIFT) |
|
#define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ |
|
#define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ |
|
#define ALPHA_BASE ALPHA_SCALE /* 1.0 */ |
|
#define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ |
|
|
|
#define BETA_SHIFT 6 |
|
#define BETA_SCALE (1u<<BETA_SHIFT) |
|
#define BETA_MIN (BETA_SCALE/8) /* 0.125 */ |
|
#define BETA_MAX (BETA_SCALE/2) /* 0.5 */ |
|
#define BETA_BASE BETA_MAX |
|
|
|
static int win_thresh __read_mostly = 15; |
|
module_param(win_thresh, int, 0); |
|
MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing"); |
|
|
|
static int theta __read_mostly = 5; |
|
module_param(theta, int, 0); |
|
MODULE_PARM_DESC(theta, "# of fast RTT's before full growth"); |
|
|
|
/* TCP Illinois Parameters */ |
|
struct illinois { |
|
u64 sum_rtt; /* sum of rtt's measured within last rtt */ |
|
u16 cnt_rtt; /* # of rtts measured within last rtt */ |
|
u32 base_rtt; /* min of all rtt in usec */ |
|
u32 max_rtt; /* max of all rtt in usec */ |
|
u32 end_seq; /* right edge of current RTT */ |
|
u32 alpha; /* Additive increase */ |
|
u32 beta; /* Muliplicative decrease */ |
|
u16 acked; /* # packets acked by current ACK */ |
|
u8 rtt_above; /* average rtt has gone above threshold */ |
|
u8 rtt_low; /* # of rtts measurements below threshold */ |
|
}; |
|
|
|
static void rtt_reset(struct sock *sk) |
|
{ |
|
struct tcp_sock *tp = tcp_sk(sk); |
|
struct illinois *ca = inet_csk_ca(sk); |
|
|
|
ca->end_seq = tp->snd_nxt; |
|
ca->cnt_rtt = 0; |
|
ca->sum_rtt = 0; |
|
|
|
/* TODO: age max_rtt? */ |
|
} |
|
|
|
static void tcp_illinois_init(struct sock *sk) |
|
{ |
|
struct illinois *ca = inet_csk_ca(sk); |
|
|
|
ca->alpha = ALPHA_MAX; |
|
ca->beta = BETA_BASE; |
|
ca->base_rtt = 0x7fffffff; |
|
ca->max_rtt = 0; |
|
|
|
ca->acked = 0; |
|
ca->rtt_low = 0; |
|
ca->rtt_above = 0; |
|
|
|
rtt_reset(sk); |
|
} |
|
|
|
/* Measure RTT for each ack. */ |
|
static void tcp_illinois_acked(struct sock *sk, const struct ack_sample *sample) |
|
{ |
|
struct illinois *ca = inet_csk_ca(sk); |
|
s32 rtt_us = sample->rtt_us; |
|
|
|
ca->acked = sample->pkts_acked; |
|
|
|
/* dup ack, no rtt sample */ |
|
if (rtt_us < 0) |
|
return; |
|
|
|
/* ignore bogus values, this prevents wraparound in alpha math */ |
|
if (rtt_us > RTT_MAX) |
|
rtt_us = RTT_MAX; |
|
|
|
/* keep track of minimum RTT seen so far */ |
|
if (ca->base_rtt > rtt_us) |
|
ca->base_rtt = rtt_us; |
|
|
|
/* and max */ |
|
if (ca->max_rtt < rtt_us) |
|
ca->max_rtt = rtt_us; |
|
|
|
++ca->cnt_rtt; |
|
ca->sum_rtt += rtt_us; |
|
} |
|
|
|
/* Maximum queuing delay */ |
|
static inline u32 max_delay(const struct illinois *ca) |
|
{ |
|
return ca->max_rtt - ca->base_rtt; |
|
} |
|
|
|
/* Average queuing delay */ |
|
static inline u32 avg_delay(const struct illinois *ca) |
|
{ |
|
u64 t = ca->sum_rtt; |
|
|
|
do_div(t, ca->cnt_rtt); |
|
return t - ca->base_rtt; |
|
} |
|
|
|
/* |
|
* Compute value of alpha used for additive increase. |
|
* If small window then use 1.0, equivalent to Reno. |
|
* |
|
* For larger windows, adjust based on average delay. |
|
* A. If average delay is at minimum (we are uncongested), |
|
* then use large alpha (10.0) to increase faster. |
|
* B. If average delay is at maximum (getting congested) |
|
* then use small alpha (0.3) |
|
* |
|
* The result is a convex window growth curve. |
|
*/ |
|
static u32 alpha(struct illinois *ca, u32 da, u32 dm) |
|
{ |
|
u32 d1 = dm / 100; /* Low threshold */ |
|
|
|
if (da <= d1) { |
|
/* If never got out of low delay zone, then use max */ |
|
if (!ca->rtt_above) |
|
return ALPHA_MAX; |
|
|
|
/* Wait for 5 good RTT's before allowing alpha to go alpha max. |
|
* This prevents one good RTT from causing sudden window increase. |
|
*/ |
|
if (++ca->rtt_low < theta) |
|
return ca->alpha; |
|
|
|
ca->rtt_low = 0; |
|
ca->rtt_above = 0; |
|
return ALPHA_MAX; |
|
} |
|
|
|
ca->rtt_above = 1; |
|
|
|
/* |
|
* Based on: |
|
* |
|
* (dm - d1) amin amax |
|
* k1 = ------------------- |
|
* amax - amin |
|
* |
|
* (dm - d1) amin |
|
* k2 = ---------------- - d1 |
|
* amax - amin |
|
* |
|
* k1 |
|
* alpha = ---------- |
|
* k2 + da |
|
*/ |
|
|
|
dm -= d1; |
|
da -= d1; |
|
return (dm * ALPHA_MAX) / |
|
(dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN); |
|
} |
|
|
|
/* |
|
* Beta used for multiplicative decrease. |
|
* For small window sizes returns same value as Reno (0.5) |
|
* |
|
* If delay is small (10% of max) then beta = 1/8 |
|
* If delay is up to 80% of max then beta = 1/2 |
|
* In between is a linear function |
|
*/ |
|
static u32 beta(u32 da, u32 dm) |
|
{ |
|
u32 d2, d3; |
|
|
|
d2 = dm / 10; |
|
if (da <= d2) |
|
return BETA_MIN; |
|
|
|
d3 = (8 * dm) / 10; |
|
if (da >= d3 || d3 <= d2) |
|
return BETA_MAX; |
|
|
|
/* |
|
* Based on: |
|
* |
|
* bmin d3 - bmax d2 |
|
* k3 = ------------------- |
|
* d3 - d2 |
|
* |
|
* bmax - bmin |
|
* k4 = ------------- |
|
* d3 - d2 |
|
* |
|
* b = k3 + k4 da |
|
*/ |
|
return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da) |
|
/ (d3 - d2); |
|
} |
|
|
|
/* Update alpha and beta values once per RTT */ |
|
static void update_params(struct sock *sk) |
|
{ |
|
struct tcp_sock *tp = tcp_sk(sk); |
|
struct illinois *ca = inet_csk_ca(sk); |
|
|
|
if (tp->snd_cwnd < win_thresh) { |
|
ca->alpha = ALPHA_BASE; |
|
ca->beta = BETA_BASE; |
|
} else if (ca->cnt_rtt > 0) { |
|
u32 dm = max_delay(ca); |
|
u32 da = avg_delay(ca); |
|
|
|
ca->alpha = alpha(ca, da, dm); |
|
ca->beta = beta(da, dm); |
|
} |
|
|
|
rtt_reset(sk); |
|
} |
|
|
|
/* |
|
* In case of loss, reset to default values |
|
*/ |
|
static void tcp_illinois_state(struct sock *sk, u8 new_state) |
|
{ |
|
struct illinois *ca = inet_csk_ca(sk); |
|
|
|
if (new_state == TCP_CA_Loss) { |
|
ca->alpha = ALPHA_BASE; |
|
ca->beta = BETA_BASE; |
|
ca->rtt_low = 0; |
|
ca->rtt_above = 0; |
|
rtt_reset(sk); |
|
} |
|
} |
|
|
|
/* |
|
* Increase window in response to successful acknowledgment. |
|
*/ |
|
static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
|
{ |
|
struct tcp_sock *tp = tcp_sk(sk); |
|
struct illinois *ca = inet_csk_ca(sk); |
|
|
|
if (after(ack, ca->end_seq)) |
|
update_params(sk); |
|
|
|
/* RFC2861 only increase cwnd if fully utilized */ |
|
if (!tcp_is_cwnd_limited(sk)) |
|
return; |
|
|
|
/* In slow start */ |
|
if (tcp_in_slow_start(tp)) |
|
tcp_slow_start(tp, acked); |
|
|
|
else { |
|
u32 delta; |
|
|
|
/* snd_cwnd_cnt is # of packets since last cwnd increment */ |
|
tp->snd_cwnd_cnt += ca->acked; |
|
ca->acked = 1; |
|
|
|
/* This is close approximation of: |
|
* tp->snd_cwnd += alpha/tp->snd_cwnd |
|
*/ |
|
delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; |
|
if (delta >= tp->snd_cwnd) { |
|
tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd, |
|
(u32)tp->snd_cwnd_clamp); |
|
tp->snd_cwnd_cnt = 0; |
|
} |
|
} |
|
} |
|
|
|
static u32 tcp_illinois_ssthresh(struct sock *sk) |
|
{ |
|
struct tcp_sock *tp = tcp_sk(sk); |
|
struct illinois *ca = inet_csk_ca(sk); |
|
|
|
/* Multiplicative decrease */ |
|
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); |
|
} |
|
|
|
/* Extract info for Tcp socket info provided via netlink. */ |
|
static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, |
|
union tcp_cc_info *info) |
|
{ |
|
const struct illinois *ca = inet_csk_ca(sk); |
|
|
|
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { |
|
info->vegas.tcpv_enabled = 1; |
|
info->vegas.tcpv_rttcnt = ca->cnt_rtt; |
|
info->vegas.tcpv_minrtt = ca->base_rtt; |
|
info->vegas.tcpv_rtt = 0; |
|
|
|
if (info->vegas.tcpv_rttcnt > 0) { |
|
u64 t = ca->sum_rtt; |
|
|
|
do_div(t, info->vegas.tcpv_rttcnt); |
|
info->vegas.tcpv_rtt = t; |
|
} |
|
*attr = INET_DIAG_VEGASINFO; |
|
return sizeof(struct tcpvegas_info); |
|
} |
|
return 0; |
|
} |
|
|
|
static struct tcp_congestion_ops tcp_illinois __read_mostly = { |
|
.init = tcp_illinois_init, |
|
.ssthresh = tcp_illinois_ssthresh, |
|
.undo_cwnd = tcp_reno_undo_cwnd, |
|
.cong_avoid = tcp_illinois_cong_avoid, |
|
.set_state = tcp_illinois_state, |
|
.get_info = tcp_illinois_info, |
|
.pkts_acked = tcp_illinois_acked, |
|
|
|
.owner = THIS_MODULE, |
|
.name = "illinois", |
|
}; |
|
|
|
static int __init tcp_illinois_register(void) |
|
{ |
|
BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE); |
|
return tcp_register_congestion_control(&tcp_illinois); |
|
} |
|
|
|
static void __exit tcp_illinois_unregister(void) |
|
{ |
|
tcp_unregister_congestion_control(&tcp_illinois); |
|
} |
|
|
|
module_init(tcp_illinois_register); |
|
module_exit(tcp_illinois_unregister); |
|
|
|
MODULE_AUTHOR("Stephen Hemminger, Shao Liu"); |
|
MODULE_LICENSE("GPL"); |
|
MODULE_DESCRIPTION("TCP Illinois"); |
|
MODULE_VERSION("1.0");
|
|
|