summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-11 21:19:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-11 21:19:00 -0400
commitca321885b0511a85e2d1cd40caafedbeb18f4af6 (patch)
tree0042e8674aff7ae5785db467836d8d4101906f70 /net
parent052db7ec86dff26f734031c3ef5c2c03a94af0af (diff)
parent01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: "This set fixes a bunch of fallout from the changes that went in during this merge window, particularly: - Fix fsl_pq_mdio (Claudiu Manoil) and fm10k (Pranith Kumar) build failures. - Several networking drivers do atomic_set() on page counts where that's not exactly legal. From Eric Dumazet. - Make __skb_flow_get_ports() work cleanly with unaligned data, from Alexander Duyck. - Fix some kernel-doc buglets in rfkill and netlabel, from Fabian Frederick. - Unbalanced enable_irq_wake usage in bcmgenet and systemport drivers, from Florian Fainelli. - pxa168_eth needs to depend on HAS_DMA, from Geert Uytterhoeven. - Multi-dequeue in the qdisc layer severely bypasses the fairness limits the previous code used to enforce, reintroduce in a way that at the same time doesn't compromise bulk dequeue opportunities. From Jesper Dangaard Brouer. - macvlan receive path unnecessarily hops through a softirq by using netif_rx() instead of netif_receive_skb(). From Jason Baron" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (51 commits) net: systemport: avoid unbalanced enable_irq_wake calls net: bcmgenet: avoid unbalanced enable_irq_wake calls net: bcmgenet: fix off-by-one in incrementing read pointer net: fix races in page->_count manipulation mlx4: fix race accessing page->_count ixgbe: fix race accessing page->_count igb: fix race accessing page->_count fm10k: fix race accessing page->_count net/phy: micrel: Add clock support for KSZ8021/KSZ8031 flow-dissector: Fix alignment issue in __skb_flow_get_ports net: filter: fix the comments Documentation: replace __sk_run_filter with __bpf_prog_run macvlan: optimize the receive path macvlan: pass 'bool' type to macvlan_count_rx() drivers: net: xgene: Add 10GbE ethtool support drivers: net: xgene: Add 10GbE support drivers: net: xgene: Preparing for adding 10GbE support dtb: Add 10GbE node to APM X-Gene SoC device tree Documentation: dts: Update section header for APM X-Gene MAINTAINERS: Update APM X-Gene section ...
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig1
-rw-r--r--net/core/filter.c9
-rw-r--r--net/core/flow_dissector.c36
-rw-r--r--net/core/skbuff.c35
-rw-r--r--net/netfilter/nft_reject.c10
-rw-r--r--net/netlabel/netlabel_kapi.c1
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/sched/sch_generic.c20
8 files changed, 69 insertions, 47 deletions
diff --git a/net/Kconfig b/net/Kconfig
index d6b138e2c263..6272420a721b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -6,6 +6,7 @@ menuconfig NET
bool "Networking support"
select NLATTR
select GENERIC_NET_UTILS
+ select ANON_INODES
---help---
Unless you really know what you are doing, you should say Y here.
The reason is that some programs need kernel networking support even
diff --git a/net/core/filter.c b/net/core/filter.c
index fcd3f6742a6a..647b12265e18 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -51,9 +51,9 @@
* @skb: buffer to filter
*
* Run the filter code and then cut skb->data to correct size returned by
- * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to sk_run_filter. It returns 0 if the packet should
+ * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
@@ -566,11 +566,8 @@ err:
/* Security:
*
- * A BPF program is able to use 16 cells of memory to store intermediate
- * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
- *
* As we dont want to clear mem[] array for each packet going through
- * sk_run_filter(), we check that filter loaded by user never try to read
+ * __bpf_prog_run(), we check that filter loaded by user never try to read
* a cell if not previously written, and we check all branches to be sure
* a malicious user doesn't try to abuse us.
*/
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 8560dea58803..45084938c403 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -100,6 +100,13 @@ ip:
if (ip_is_fragment(iph))
ip_proto = 0;
+ /* skip the address processing if skb is NULL. The assumption
+ * here is that if there is no skb we are not looking for flow
+ * info but lengths and protocols.
+ */
+ if (!skb)
+ break;
+
iph_to_flow_copy_addrs(flow, iph);
break;
}
@@ -114,17 +121,15 @@ ipv6:
return false;
ip_proto = iph->nexthdr;
- flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
- flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
nhoff += sizeof(struct ipv6hdr);
- /* skip the flow label processing if skb is NULL. The
- * assumption here is that if there is no skb we are not
- * looking for flow info as much as we are length.
- */
+ /* see comment above in IPv4 section */
if (!skb)
break;
+ flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
+ flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+
flow_label = ip6_flowlabel(iph);
if (flow_label) {
/* Awesome, IPv6 packet has a flow label so we can
@@ -231,9 +236,13 @@ ipv6:
flow->n_proto = proto;
flow->ip_proto = ip_proto;
- flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
flow->thoff = (u16) nhoff;
+ /* unless skb is set we don't need to record port info */
+ if (skb)
+ flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+ data, hlen);
+
return true;
}
EXPORT_SYMBOL(__skb_flow_dissect);
@@ -334,15 +343,16 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
switch (keys->ip_proto) {
case IPPROTO_TCP: {
- const struct tcphdr *tcph;
- struct tcphdr _tcph;
+ /* access doff as u8 to avoid unaligned access */
+ const u8 *doff;
+ u8 _doff;
- tcph = __skb_header_pointer(skb, poff, sizeof(_tcph),
- data, hlen, &_tcph);
- if (!tcph)
+ doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
+ data, hlen, &_doff);
+ if (!doff)
return poff;
- poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
+ poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
break;
}
case IPPROTO_UDP:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7b3df0d518ab..829d013745ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -360,18 +360,29 @@ refill:
goto end;
}
nc->frag.size = PAGE_SIZE << order;
-recycle:
- atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+ /* Even if we own the page, we do not use atomic_set().
+ * This would break get_page_unless_zero() users.
+ */
+ atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
+ &nc->frag.page->_count);
nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
nc->frag.offset = 0;
}
if (nc->frag.offset + fragsz > nc->frag.size) {
- /* avoid unnecessary locked operations if possible */
- if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
- atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
- goto recycle;
- goto refill;
+ if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
+ if (!atomic_sub_and_test(nc->pagecnt_bias,
+ &nc->frag.page->_count))
+ goto refill;
+ /* OK, page count is 0, we can safely set it */
+ atomic_set(&nc->frag.page->_count,
+ NETDEV_PAGECNT_MAX_BIAS);
+ } else {
+ atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
+ &nc->frag.page->_count);
+ }
+ nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+ nc->frag.offset = 0;
}
data = page_address(nc->frag.page) + nc->frag.offset;
@@ -4126,11 +4137,11 @@ EXPORT_SYMBOL(skb_vlan_untag);
/**
* alloc_skb_with_frags - allocate skb with page frags
*
- * header_len: size of linear part
- * data_len: needed length in frags
- * max_page_order: max page order desired.
- * errcode: pointer to error code if any
- * gfp_mask: allocation mask
+ * @header_len: size of linear part
+ * @data_len: needed length in frags
+ * @max_page_order: max page order desired.
+ * @errcode: pointer to error code if any
+ * @gfp_mask: allocation mask
*
* This can be used to allocate a paged skb, given a maximal order for frags.
*/
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index ec8a456092a7..57d3e1af5630 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -72,7 +72,7 @@ nla_put_failure:
}
EXPORT_SYMBOL_GPL(nft_reject_dump);
-static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
+static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
[NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH,
[NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH,
[NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH,
@@ -81,8 +81,7 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
int nft_reject_icmp_code(u8 code)
{
- if (code > NFT_REJECT_ICMPX_MAX)
- return -EINVAL;
+ BUG_ON(code > NFT_REJECT_ICMPX_MAX);
return icmp_code_v4[code];
}
@@ -90,7 +89,7 @@ int nft_reject_icmp_code(u8 code)
EXPORT_SYMBOL_GPL(nft_reject_icmp_code);
-static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
+static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
[NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE,
[NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH,
[NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH,
@@ -99,8 +98,7 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
int nft_reject_icmpv6_code(u8 code)
{
- if (code > NFT_REJECT_ICMPX_MAX)
- return -EINVAL;
+ BUG_ON(code > NFT_REJECT_ICMPX_MAX);
return icmp_code_v6[code];
}
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 0b4692dd1c5e..a845cd4cf21e 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -246,7 +246,6 @@ int netlbl_cfg_unlbl_static_add(struct net *net,
* @addr: IP address in network byte order (struct in[6]_addr)
* @mask: address mask in network byte order (struct in[6]_addr)
* @family: address family
- * @secid: LSM secid value for the entry
* @audit_info: NetLabel audit information
*
* Description:
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index b3b16c070a7f..fa7cd792791c 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
/**
* __rfkill_switch_all - Toggle state of all switches of given type
* @type: type of interfaces to be affected
- * @state: the new state
+ * @blocked: the new state
*
* This function sets the state of all switches of given type,
* unless a specific switch is claimed by userspace (in which case,
@@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
/**
* rfkill_switch_all - Toggle state of all switches of given type
* @type: type of interfaces to be affected
- * @state: the new state
+ * @blocked: the new state
*
* Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state).
* Please refer to __rfkill_switch_all() for details.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 38d58e6cef07..6efca30894aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -57,7 +57,8 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
static void try_bulk_dequeue_skb(struct Qdisc *q,
struct sk_buff *skb,
- const struct netdev_queue *txq)
+ const struct netdev_queue *txq,
+ int *packets)
{
int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
@@ -70,6 +71,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
bytelimit -= nskb->len; /* covers GSO len */
skb->next = nskb;
skb = nskb;
+ (*packets)++; /* GSO counts as one pkt */
}
skb->next = NULL;
}
@@ -77,11 +79,13 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
* A requeued skb (via q->gso_skb) can also be a SKB list.
*/
-static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
+static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
+ int *packets)
{
struct sk_buff *skb = q->gso_skb;
const struct netdev_queue *txq = q->dev_queue;
+ *packets = 1;
*validate = true;
if (unlikely(skb)) {
/* check the reason of requeuing without tx lock first */
@@ -98,7 +102,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
!netif_xmit_frozen_or_stopped(txq)) {
skb = q->dequeue(q);
if (skb && qdisc_may_bulk(q))
- try_bulk_dequeue_skb(q, skb, txq);
+ try_bulk_dequeue_skb(q, skb, txq, packets);
}
}
return skb;
@@ -204,7 +208,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
* >0 - queue is not empty.
*
*/
-static inline int qdisc_restart(struct Qdisc *q)
+static inline int qdisc_restart(struct Qdisc *q, int *packets)
{
struct netdev_queue *txq;
struct net_device *dev;
@@ -213,7 +217,7 @@ static inline int qdisc_restart(struct Qdisc *q)
bool validate;
/* Dequeue packet */
- skb = dequeue_skb(q, &validate);
+ skb = dequeue_skb(q, &validate, packets);
if (unlikely(!skb))
return 0;
@@ -227,14 +231,16 @@ static inline int qdisc_restart(struct Qdisc *q)
void __qdisc_run(struct Qdisc *q)
{
int quota = weight_p;
+ int packets;
- while (qdisc_restart(q)) {
+ while (qdisc_restart(q, &packets)) {
/*
* Ordered by possible occurrence: Postpone processing if
* 1. we've exceeded packet quota
* 2. another process needs the CPU;
*/
- if (--quota <= 0 || need_resched()) {
+ quota -= packets;
+ if (quota <= 0 || need_resched()) {
__netif_schedule(q);
break;
}