diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 5 | ||||
-rw-r--r-- | net/ipv4/ip_options.c | 7 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_xmit.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_recent.c | 7 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_core.c | 3 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_rule.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_standalone.c | 2 | ||||
-rw-r--r-- | net/ipv4/raw.c | 4 | ||||
-rw-r--r-- | net/ipv4/route.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_bic.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 48 | ||||
-rw-r--r-- | net/ipv4/tcp_htcp.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_illinois.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 74 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_lp.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.h | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_veno.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_westwood.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_yeah.c | 4 |
25 files changed, 123 insertions, 107 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 06c08e5740fb..e68103475cca 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -831,7 +831,7 @@ const struct proto_ops inet_stream_ops = { .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, - .sendmsg = inet_sendmsg, + .sendmsg = tcp_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = tcp_sendpage, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index abf6352f990f..5b77bdaa57dd 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1056,10 +1056,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (!in_dev) { if (event == NETDEV_REGISTER) { in_dev = inetdev_init(dev); + if (!in_dev) + return notifier_from_errno(-ENOMEM); if (dev == &loopback_dev) { - if (!in_dev) - panic("devinet: " - "Failed to create loopback\n"); IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 251346828cb4..2f14745a9e1f 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -513,11 +513,8 @@ void ip_options_undo(struct ip_options * opt) static struct ip_options *ip_options_get_alloc(const int optlen) { - struct ip_options *opt = kmalloc(sizeof(*opt) + ((optlen + 3) & ~3), - GFP_KERNEL); - if (opt) - memset(opt, 0, sizeof(*opt)); - return opt; + return kzalloc(sizeof(struct ip_options) + ((optlen + 3) & ~3), + GFP_KERNEL); } static int ip_options_get_finish(struct ip_options **optp, diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 900ce29db382..666e080a74a3 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -128,7 +128,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) #define IP_VS_XMIT(skb, rt) \ do { \ (skb)->ipvs_property = 1; \ - (skb)->ip_summed = CHECKSUM_NONE; \ + skb_forward_csum(skb); \ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ (rt)->u.dst.dev, dst_output); \ } while (0) diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 321804315659..6d0c0f7364ad 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -387,12 +387,17 @@ static int recent_seq_open(struct inode *inode, struct file *file) st = kzalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) return -ENOMEM; + ret = seq_open(file, &recent_seq_ops); - if (ret) + if (ret) { kfree(st); + goto out; + } + st->table = pde->data; seq = file->private_data; seq->private = st; +out: return ret; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 64552afd01cb..d9b5177989c6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -509,3 +509,9 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) module_init(nf_conntrack_l3proto_ipv4_init); module_exit(nf_conntrack_l3proto_ipv4_fini); + +void need_ipv4_conntrack(void) +{ + return; +} +EXPORT_SYMBOL_GPL(need_ipv4_conntrack); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 27c7918e442a..b3dd5de9a258 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -294,15 +294,14 @@ static int exp_open(struct inode *inode, struct file *file) struct ct_expect_iter_state *st; int ret; - st = kmalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); - if (st == NULL) + st = kzalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL); + if (!st) return -ENOMEM; ret = seq_open(file, &exp_seq_ops); if (ret) goto out_free; seq = file->private_data; seq->private = st; - memset(st, 0, sizeof(struct ct_expect_iter_state)); return ret; out_free: kfree(st); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index e848d8d6292f..deab27facbad 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -77,7 +77,8 @@ static inline unsigned int hash_by_src(const struct nf_conntrack_tuple *tuple) { /* Original src, to ensure we map it consistently if poss. */ - return jhash_3words((__force u32)tuple->src.u3.ip, tuple->src.u.all, + return jhash_3words((__force u32)tuple->src.u3.ip, + (__force u32)tuple->src.u.all, tuple->dst.protonum, 0) % nf_nat_htable_size; } diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 0f45427e5fdc..76ec59ae524d 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -192,7 +192,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum) = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); - u_int16_t all + __be16 all = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 332814dac503..46cc99def165 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -328,7 +328,7 @@ static int __init nf_nat_standalone_init(void) { int ret = 0; - need_conntrack(); + need_ipv4_conntrack(); #ifdef CONFIG_XFRM BUG_ON(ip_nat_decode_session != NULL); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 24d7c9f31918..c6d71526f625 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -900,8 +900,9 @@ static int raw_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct raw_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct raw_iter_state *s; + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; rc = seq_open(file, &raw_seq_ops); @@ -910,7 +911,6 @@ static int raw_seq_open(struct inode *inode, struct file *file) seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df42b7fb3268..c7ca94bd152c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -374,8 +374,9 @@ static int rt_cache_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct rt_cache_iter_state *s; + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; rc = seq_open(file, &rt_cache_seq_ops); @@ -383,7 +384,6 @@ static int rt_cache_seq_open(struct inode *inode, struct file *file) goto out_kfree; seq = file->private_data; seq->private = s; - memset(s, 0, sizeof(*s)); out: return rc; out_kfree: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index da4c0b6ab79a..7e740112b238 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -658,9 +658,10 @@ static inline int select_size(struct sock *sk) return tmp; } -int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { + struct sock *sk = sock->sk; struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 519de091a94d..4586211e3757 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) +static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index d17da30d82d6..485d7ea35f75 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -246,38 +246,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } - -/* Keep track of minimum rtt */ -static inline void measure_delay(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - u32 delay; - - /* No time stamp */ - if (!(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) || - /* Discard delay samples right after fast recovery */ - (s32)(tcp_time_stamp - ca->epoch_start) < HZ) - return; - - delay = (tcp_time_stamp - tp->rx_opt.rcv_tsecr)<<3; - if (delay == 0) - delay = 1; - - /* first time call or link delay decreases */ - if (ca->delay_min == 0 || ca->delay_min > delay) - ca->delay_min = delay; -} - static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - if (data_acked) - measure_delay(sk); - if (!tcp_is_cwnd_limited(sk, in_flight)) return; @@ -334,17 +308,33 @@ static void bictcp_state(struct sock *sk, u8 new_state) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) +static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) { const struct inet_connection_sock *icsk = inet_csk(sk); + struct bictcp *ca = inet_csk_ca(sk); + u32 delay; if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { - struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } -} + /* Some calls are for duplicates without timetamps */ + if (rtt_us < 0) + return; + + /* Discard delay samples right after fast recovery */ + if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) + return; + + delay = usecs_to_jiffies(rtt_us) << 3; + if (delay == 0) + delay = 1; + + /* first time call or link delay decreases */ + if (ca->delay_min == 0 || ca->delay_min > delay) + ca->delay_min = delay; +} static struct tcp_congestion_ops cubictcp = { .init = bictcp_init, diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 08a02e6045c9..5215691f2760 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -76,20 +76,17 @@ static u32 htcp_cwnd_undo(struct sock *sk) return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta); } -static inline void measure_rtt(struct sock *sk) +static inline void measure_rtt(struct sock *sk, u32 srtt) { const struct inet_connection_sock *icsk = inet_csk(sk); - const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - u32 srtt = tp->srtt >> 3; /* keep track of minimum RTT seen so far, minRTT is zero at first */ if (ca->minRTT > srtt || !ca->minRTT) ca->minRTT = srtt; /* max RTT */ - if (icsk->icsk_ca_state == TCP_CA_Open - && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) { + if (icsk->icsk_ca_state == TCP_CA_Open) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; if (ca->maxRTT < srtt @@ -98,7 +95,7 @@ static inline void measure_rtt(struct sock *sk) } } -static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last) +static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); @@ -108,6 +105,9 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t if (icsk->icsk_ca_state == TCP_CA_Open) ca->pkts_acked = pkts_acked; + if (rtt > 0) + measure_rtt(sk, usecs_to_jiffies(rtt)); + if (!use_bandwidth_switch) return; @@ -237,8 +237,6 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp); else { - measure_rtt(sk); - /* In dangerous area, increase slowly. * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd */ diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index cc5de6f69d46..64f1cbaf96e8 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -83,18 +83,16 @@ static void tcp_illinois_init(struct sock *sk) } /* Measure RTT for each ack. */ -static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last) +static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt) { struct illinois *ca = inet_csk_ca(sk); - u32 rtt; ca->acked = pkts_acked; - if (ktime_equal(last, net_invalid_timestamp())) + /* dup ack, no rtt sample */ + if (rtt < 0) return; - rtt = ktime_to_us(net_timedelta(last)); - /* ignore bogus values, this prevents wraparound in alpha math */ if (rtt > RTT_MAX) rtt = RTT_MAX; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fec8a7a4dbaf..f030435e0eb4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -102,11 +102,14 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ +#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ +#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained DSACK info */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) +#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) #define IsReno(tp) ((tp)->rx_opt.sack_ok == 0) #define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) @@ -964,12 +967,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* Check for D-SACK. */ if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { + flag |= FLAG_DSACKING_ACK; found_dup_sack = 1; tp->rx_opt.sack_ok |= 4; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1 && !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { + flag |= FLAG_DSACKING_ACK; found_dup_sack = 1; tp->rx_opt.sack_ok |= 4; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); @@ -1851,19 +1856,22 @@ static inline u32 tcp_cwnd_min(const struct sock *sk) } /* Decrease cwnd each second ack. */ -static void tcp_cwnd_down(struct sock *sk) +static void tcp_cwnd_down(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); int decr = tp->snd_cwnd_cnt + 1; - tp->snd_cwnd_cnt = decr&1; - decr >>= 1; + if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) || + (IsReno(tp) && !(flag&FLAG_NOT_DUP))) { + tp->snd_cwnd_cnt = decr&1; + decr >>= 1; - if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) - tp->snd_cwnd -= decr; + if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) + tp->snd_cwnd -= decr; - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); - tp->snd_cwnd_stamp = tcp_time_stamp; + tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); + tp->snd_cwnd_stamp = tcp_time_stamp; + } } /* Nothing was retransmitted or returned timestamp is less @@ -2060,7 +2068,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) } tcp_moderate_cwnd(tp); } else { - tcp_cwnd_down(sk); + tcp_cwnd_down(sk, flag); } } @@ -2104,12 +2112,13 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) * tcp_xmit_retransmit_queue(). */ static void -tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, - int prior_packets, int flag) +tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP)); + int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP)); + int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) && + (tp->fackets_out > tp->reordering)); /* Some technical things: * 1. Reno does not count dupacks (sacked_out) automatically. */ @@ -2186,14 +2195,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* F. Process state. */ switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: - if (prior_snd_una == tp->snd_una) { + if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (IsReno(tp) && is_dupack) tcp_add_reno_sack(sk); } else { int acked = prior_packets - tp->packets_out; if (IsReno(tp)) tcp_remove_reno_sacks(sk, acked); - is_dupack = tcp_try_undo_partial(sk, acked); + do_lost = tcp_try_undo_partial(sk, acked); } break; case TCP_CA_Loss: @@ -2209,7 +2218,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* Loss is undone; fall through to processing in Open state. */ default: if (IsReno(tp)) { - if (tp->snd_una != prior_snd_una) + if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); if (is_dupack) tcp_add_reno_sack(sk); @@ -2258,9 +2267,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tcp_set_ca_state(sk, TCP_CA_Recovery); } - if (is_dupack || tcp_head_timedout(sk)) + if (do_lost || tcp_head_timedout(sk)) tcp_update_scoreboard(sk); - tcp_cwnd_down(sk); + tcp_cwnd_down(sk, flag); tcp_xmit_retransmit_queue(sk); } @@ -2490,12 +2499,23 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk); - /* Is the ACK triggering packet unambiguous? */ - if (acked & FLAG_RETRANS_DATA_ACKED) - last_ackt = net_invalid_timestamp(); + if (ca_ops->pkts_acked) { + s32 rtt_us = -1; + + /* Is the ACK triggering packet unambiguous? */ + if (!(acked & FLAG_RETRANS_DATA_ACKED)) { + /* High resolution needed and available? */ + if (ca_ops->flags & TCP_CONG_RTT_STAMP && + !ktime_equal(last_ackt, + net_invalid_timestamp())) + rtt_us = ktime_us_delta(ktime_get_real(), + last_ackt); + else if (seq_rtt > 0) + rtt_us = jiffies_to_usecs(seq_rtt); + } - if (ca_ops->pkts_acked) - ca_ops->pkts_acked(sk, pkts_acked, last_ackt); + ca_ops->pkts_acked(sk, pkts_acked, rtt_us); + } } #if FASTRETRANS_DEBUG > 0 @@ -2667,7 +2687,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) * to prove that the RTO is indeed spurious. It transfers the control * from F-RTO to the conventional RTO recovery */ -static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) +static int tcp_process_frto(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -2687,8 +2707,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) * ACK isn't duplicate nor advances window, e.g., opposite dir * data, winupdate */ - if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) && - !(flag&FLAG_FORWARD_PROGRESS)) + if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP)) return 1; if (!(flag&FLAG_DATA_ACKED)) { @@ -2768,6 +2787,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) goto old_ack; + if (after(ack, prior_snd_una)) + flag |= FLAG_SND_UNA_ADVANCED; + if (sysctl_tcp_abc) { if (icsk->icsk_ca_state < TCP_CA_CWR) tp->bytes_acked += ack - prior_snd_una; @@ -2820,14 +2842,14 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, &seq_rtt); if (tp->frto_counter) - frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag); + frto_cwnd = tcp_process_frto(sk, flag); if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight, 0); - tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); + tcp_fastretrans_alert(sk, prior_packets, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight, 1); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3f5f7423b95c..9c94627c8c7e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2425,7 +2425,6 @@ struct proto tcp_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, - .sendmsg = tcp_sendmsg, .recvmsg = tcp_recvmsg, .backlog_rcv = tcp_v4_do_rcv, .hash = tcp_v4_hash, diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 80e140e3ec2d..e7f5ef92cbd8 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -260,13 +260,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) * newReno in increase case. * We work it out by following the idea from TCP-LP's paper directly */ -static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last) +static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); - if (!ktime_equal(last, net_invalid_timestamp())) - tcp_lp_rtt_sample(sk, ktime_to_us(net_timedelta(last))); + if (rtt_us > 0) + tcp_lp_rtt_sample(sk, rtt_us); /* calc inference */ if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 914e0307f7af..b49dedcda52d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -112,16 +112,16 @@ EXPORT_SYMBOL_GPL(tcp_vegas_init); * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) +void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us) { struct vegas *vegas = inet_csk_ca(sk); u32 vrtt; - if (ktime_equal(last, net_invalid_timestamp())) + if (rtt_us < 0) return; /* Never allow zero rtt or baseRTT */ - vrtt = ktime_to_us(net_timedelta(last)) + 1; + vrtt = rtt_us + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index 502fa8183634..6c0eea2f8249 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -17,7 +17,7 @@ struct vegas { extern void tcp_vegas_init(struct sock *sk); extern void tcp_vegas_state(struct sock *sk, u8 ca_state); -extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last); +extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 7a55ddf86032..8fb2aee0b1a4 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -69,16 +69,16 @@ static void tcp_veno_init(struct sock *sk) } /* Do rtt sampling needed for Veno. */ -static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) +static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us) { struct veno *veno = inet_csk_ca(sk); u32 vrtt; - if (ktime_equal(last, net_invalid_timestamp())) + if (rtt_us < 0) return; /* Never allow zero rtt or baseRTT */ - vrtt = ktime_to_us(net_timedelta(last)) + 1; + vrtt = rtt_us + 1; /* Filter to find propagation delay: */ if (vrtt < veno->basertt) diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index e61e09dd513e..20151d6a6241 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -100,11 +100,12 @@ static void westwood_filter(struct westwood *w, u32 delta) * Called after processing group of packets. * but all westwood needs is the last sample of srtt. */ -static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) +static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, s32 rtt) { struct westwood *w = inet_csk_ca(sk); - if (cnt > 0) - w->rtt = tcp_sk(sk)->srtt >> 3; + + if (rtt > 0) + w->rtt = usecs_to_jiffies(rtt); } /* diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index c04b7c6ec702..c107fba7430e 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -58,7 +58,7 @@ static void tcp_yeah_init(struct sock *sk) } -static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last) +static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) { const struct inet_connection_sock *icsk = inet_csk(sk); struct yeah *yeah = inet_csk_ca(sk); @@ -66,7 +66,7 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last) if (icsk->icsk_ca_state == TCP_CA_Open) yeah->pkts_acked = pkts_acked; - tcp_vegas_pkts_acked(sk, pkts_acked, last); + tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); } static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, |