diff options
Diffstat (limited to 'net')
103 files changed, 3293 insertions, 2879 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 9012b1c922b6..75d427763992 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -114,8 +114,11 @@ EXPORT_SYMBOL(vlan_dev_vlan_proto); static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { - if (skb_cow(skb, skb_headroom(skb)) < 0) + if (skb_cow(skb, skb_headroom(skb)) < 0) { + kfree_skb(skb); return NULL; + } + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; return skb; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ad2ac3c00398..9d0223b16b46 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -385,6 +385,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: + case SIOCSHWTSTAMP: + case SIOCGHWTSTAMP: if (netif_device_present(real_dev) && ops->ndo_do_ioctl) err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); break; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 01a1082e02b3..8ceabc073658 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1808,7 +1808,7 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) long amount = 0; if (skb) - amount = skb->len - sizeof(struct ddpehdr); + amount = skb->len - sizeof(struct ddpehdr); rc = put_user(amount, (int __user *)argp); break; } diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index fc47baa888c5..f40cb0436eba 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -900,32 +900,24 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, bat_kobj = &bat_priv->soft_iface->dev.kobj; - uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) + - strlen(batadv_uev_type_str[type]) + 1, - GFP_ATOMIC); + uevent_env[0] = kasprintf(GFP_ATOMIC, + "%s%s", BATADV_UEV_TYPE_VAR, + batadv_uev_type_str[type]); if (!uevent_env[0]) goto out; - sprintf(uevent_env[0], "%s%s", BATADV_UEV_TYPE_VAR, - batadv_uev_type_str[type]); - - uevent_env[1] = kmalloc(strlen(BATADV_UEV_ACTION_VAR) + - strlen(batadv_uev_action_str[action]) + 1, - GFP_ATOMIC); + uevent_env[1] = kasprintf(GFP_ATOMIC, + "%s%s", BATADV_UEV_ACTION_VAR, + batadv_uev_action_str[action]); if (!uevent_env[1]) goto out; - sprintf(uevent_env[1], "%s%s", BATADV_UEV_ACTION_VAR, - batadv_uev_action_str[action]); - /* If the event is DEL, ignore the data field */ if (action != BATADV_UEV_DEL) { - uevent_env[2] = kmalloc(strlen(BATADV_UEV_DATA_VAR) + - strlen(data) + 1, GFP_ATOMIC); + uevent_env[2] = kasprintf(GFP_ATOMIC, + "%s%s", BATADV_UEV_DATA_VAR, data); if (!uevent_env[2]) goto out; - - sprintf(uevent_env[2], "%s%s", BATADV_UEV_DATA_VAR, data); } ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index b524c36c1273..0bb9d8b63dd2 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -93,7 +93,7 @@ static void fdb_rcu_free(struct rcu_head *head) static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr) { int err; - struct net_bridge_port *p, *tmp; + struct net_bridge_port *p; ASSERT_RTNL(); @@ -107,11 +107,9 @@ static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr) return; undo: - list_for_each_entry(tmp, &br->port_list, list) { - if (tmp == p) - break; - if (!br_promisc_port(tmp)) - dev_uc_del(tmp->dev, addr); + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + if (!br_promisc_port(p)) + dev_uc_del(p->dev, addr); } } @@ -678,6 +676,7 @@ errout: int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx) { struct net_bridge *br = netdev_priv(dev); @@ -693,6 +692,19 @@ int br_fdb_dump(struct sk_buff *skb, if (idx < cb->args[0]) goto skip; + if (filter_dev && + (!f->dst || f->dst->dev != filter_dev)) { + if (filter_dev != dev) + goto skip; + /* !f->dst is a speacial case for bridge + * It means the MAC belongs to the bridge + * Therefore need a little more filtering + * we only want to dump the !f->dst case + */ + if (f->dst) + goto skip; + } + if (fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index abfa0b65a111..b4845f4b2bb4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2216,6 +2216,43 @@ unlock: EXPORT_SYMBOL_GPL(br_multicast_list_adjacent); /** + * br_multicast_has_querier_anywhere - Checks for a querier on a bridge + * @dev: The bridge port providing the bridge on which to check for a querier + * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 + * + * Checks whether the given interface has a bridge on top and if so returns + * true if a valid querier exists anywhere on the bridged link layer. + * Otherwise returns false. + */ +bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto) +{ + struct net_bridge *br; + struct net_bridge_port *port; + struct ethhdr eth; + bool ret = false; + + rcu_read_lock(); + if (!br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + memset(ð, 0, sizeof(eth)); + eth.h_proto = htons(proto); + + ret = br_multicast_querier_exists(br, ð); + +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere); + +/** * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port * @dev: The bridge port adjacent to which to check for a querier * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 23caf5b0309e..62a7fa2e3569 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -399,7 +399,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 nlh_flags); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, int idx); + struct net_device *dev, struct net_device *fdev, int idx); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index e8437094d15f..43f750e88e19 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -908,8 +908,7 @@ static int caif_release(struct socket *sock) sock->sk = NULL; WARN_ON(IS_ERR(cf_sk->debugfs_socket_dir)); - if (cf_sk->debugfs_socket_dir != NULL) - debugfs_remove_recursive(cf_sk->debugfs_socket_dir); + debugfs_remove_recursive(cf_sk->debugfs_socket_dir); lock_sock(&(cf_sk->sk)); sk->sk_state = CAIF_DISCONNECTED; diff --git a/net/core/dev.c b/net/core/dev.c index 30eedf677913..6e2a2cd82321 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2738,8 +2738,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. - * This permits __QDISC_STATE_RUNNING owner to get the lock more often - * and dequeue packets faster. + * This permits __QDISC___STATE_RUNNING owner to get the lock more + * often and dequeue packets faster. */ contended = qdisc_is_running(q); if (unlikely(contended)) @@ -5432,13 +5432,9 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) */ ret = 0; - if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ + if ((old_flags ^ flags) & IFF_UP) ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); - if (!ret) - dev_set_rx_mode(dev); - } - if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; unsigned int old_flags = dev->flags; @@ -7095,7 +7091,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) rtnl_lock_unregistering(net_list); list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { - if (dev->rtnl_link_ops) + if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) dev->rtnl_link_ops->dellink(dev, &dev_kill_list); else unregister_netdevice_queue(dev, &dev_kill_list); diff --git a/net/core/dst.c b/net/core/dst.c index 80d6286c8b62..a028409ee438 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -269,6 +269,15 @@ again: } EXPORT_SYMBOL(dst_destroy); +static void dst_destroy_rcu(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); +} + void dst_release(struct dst_entry *dst) { if (dst) { @@ -276,11 +285,8 @@ void dst_release(struct dst_entry *dst) newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); - } + if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) + call_rcu(&dst->rcu_head, dst_destroy_rcu); } } EXPORT_SYMBOL(dst_release); diff --git a/net/core/filter.c b/net/core/filter.c index 735fad897496..87af1e3e56c0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -84,15 +84,6 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns return NULL; } -static inline void *load_pointer(const struct sk_buff *skb, int k, - unsigned int size, void *buffer) -{ - if (k >= 0) - return skb_header_pointer(skb, k, size, buffer); - - return bpf_internal_load_pointer_neg_helper(skb, k, size); -} - /** * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff @@ -537,7 +528,7 @@ load_word: * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness */ - ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp); + ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be32(ptr); CONT; @@ -547,7 +538,7 @@ load_word: LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */ off = IMM; load_half: - ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp); + ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be16(ptr); CONT; @@ -557,7 +548,7 @@ load_half: LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */ off = IMM; load_byte: - ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp); + ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp); if (likely(ptr != NULL)) { BPF_R0 = *(u8 *)ptr; CONT; @@ -840,11 +831,11 @@ int sk_convert_filter(struct sock_filter *prog, int len, BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); - if (len <= 0 || len >= BPF_MAXINSNS) + if (len <= 0 || len > BPF_MAXINSNS) return -EINVAL; if (new_prog) { - addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL); + addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL); if (!addrs) return -ENOMEM; } @@ -1101,7 +1092,7 @@ static int check_load_and_stores(struct sock_filter *filter, int flen) BUILD_BUG_ON(BPF_MEMWORDS > 16); - masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); + masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL); if (!masks) return -ENOMEM; @@ -1382,7 +1373,7 @@ static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp, fp_new = sock_kmalloc(sk, len, GFP_KERNEL); if (fp_new) { *fp_new = *fp; - /* As we're kepping orig_prog in fp_new along, + /* As we're keeping orig_prog in fp_new along, * we need to make sure we're not evicting it * from the old fp. */ @@ -1524,8 +1515,8 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, /** * sk_unattached_filter_create - create an unattached filter - * @fprog: the filter program * @pfp: the unattached filter that is created + * @fprog: the filter program * * Create a filter independent of any socket. We first run some * sanity checks on it to make sure it does not explode on us later. diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 107ed12a5323..5f362c1d0332 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -80,6 +80,8 @@ ip: case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; + __be32 flow_label; + ipv6: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); if (!iph) @@ -89,6 +91,21 @@ ipv6: flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); + + flow_label = ip6_flowlabel(iph); + if (flow_label) { + /* Awesome, IPv6 packet has a flow label so we can + * use that to represent the ports without any + * further dissection. + */ + flow->n_proto = proto; + flow->ip_proto = ip_proto; + flow->ports = flow_label; + flow->thoff = (u16)nhoff; + + return true; + } + break; } case htons(ETH_P_8021AD): @@ -175,6 +192,7 @@ ipv6: break; } + flow->n_proto = proto; flow->ip_proto = ip_proto; flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); flow->thoff = (u16) nhoff; @@ -195,12 +213,33 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) return jhash_3words(a, b, c, hashrnd); } -static __always_inline u32 __flow_hash_1word(u32 a) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys) { - __flow_hash_secret_init(); - return jhash_1word(a, hashrnd); + u32 hash; + + /* get a consistent hash (same value on both flow directions) */ + if (((__force u32)keys->dst < (__force u32)keys->src) || + (((__force u32)keys->dst == (__force u32)keys->src) && + ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) { + swap(keys->dst, keys->src); + swap(keys->port16[0], keys->port16[1]); + } + + hash = __flow_hash_3words((__force u32)keys->dst, + (__force u32)keys->src, + (__force u32)keys->ports); + if (!hash) + hash = 1; + + return hash; } +u32 flow_hash_from_keys(struct flow_keys *keys) +{ + return __flow_hash_from_keys(keys); +} +EXPORT_SYMBOL(flow_hash_from_keys); + /* * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value @@ -210,7 +249,6 @@ static __always_inline u32 __flow_hash_1word(u32 a) void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; - u32 hash; if (!skb_flow_dissect(skb, &keys)) return; @@ -218,21 +256,9 @@ void __skb_get_hash(struct sk_buff *skb) if (keys.ports) skb->l4_hash = 1; - /* get a consistent hash (same value on both flow directions) */ - if (((__force u32)keys.dst < (__force u32)keys.src) || - (((__force u32)keys.dst == (__force u32)keys.src) && - ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { - swap(keys.dst, keys.src); - swap(keys.port16[0], keys.port16[1]); - } - - hash = __flow_hash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports); - if (!hash) - hash = 1; + skb->sw_hash = 1; - skb->hash = hash; + skb->hash = __flow_hash_from_keys(&keys); } EXPORT_SYMBOL(__skb_get_hash); @@ -240,7 +266,7 @@ EXPORT_SYMBOL(__skb_get_hash); * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. */ -u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, +u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, unsigned int num_tx_queues) { u32 hash; @@ -260,13 +286,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, qcount = dev->tc_to_txq[tc].count; } - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol; - hash = __flow_hash_1word(hash); - - return (u16) (((u64) hash * qcount) >> 32) + qoffset; + return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset; } EXPORT_SYMBOL(__skb_tx_hash); @@ -338,17 +358,10 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) if (map) { if (map->len == 1) queue_index = map->queues[0]; - else { - u32 hash; - if (skb->sk && skb->sk->sk_hash) - hash = skb->sk->sk_hash; - else - hash = (__force u16) skb->protocol ^ - skb->hash; - hash = __flow_hash_1word(hash); + else queue_index = map->queues[ - ((u64)hash * map->len) >> 32]; - } + ((u64)skb_get_hash(skb) * map->len) >> 32]; + if (unlikely(queue_index >= dev->real_num_tx_queues)) queue_index = -1; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e33937fb32a0..907fb5e36c02 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -822,7 +822,8 @@ void __netpoll_cleanup(struct netpoll *np) RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); - } + } else + RCU_INIT_POINTER(np->dev->npinfo, NULL); } EXPORT_SYMBOL_GPL(__netpoll_cleanup); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fc17a9d309ac..5b05e364b8ae 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -69,8 +69,9 @@ * for running devices in the if_list and sends packets until count is 0 it * also the thread checks the thread->control which is used for inter-process * communication. controlling process "posts" operations to the threads this - * way. The if_lock should be possible to remove when add/rem_device is merged - * into this too. + * way. + * The if_list is RCU protected, and the if_lock remains to protect updating + * of if_list, from "add_device" as it invoked from userspace (via proc write). * * By design there should only be *one* "controlling" process. In practice * multiple write accesses gives unpredictable result. Understood by "write" @@ -208,7 +209,7 @@ #define T_REMDEVALL (1<<2) /* Remove all devs */ #define T_REMDEV (1<<3) /* Remove one dev */ -/* If lock -- can be removed after some work */ +/* If lock -- protects updating of if_list */ #define if_lock(t) spin_lock(&(t->if_lock)); #define if_unlock(t) spin_unlock(&(t->if_lock)); @@ -241,6 +242,7 @@ struct pktgen_dev { struct proc_dir_entry *entry; /* proc file */ struct pktgen_thread *pg_thread;/* the owner */ struct list_head list; /* chaining in the thread's run-queue */ + struct rcu_head rcu; /* freed by RCU */ int running; /* if false, the test will stop */ @@ -1737,14 +1739,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) seq_puts(seq, "Running: "); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) if (pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odevname); seq_puts(seq, "\nStopped: "); - list_for_each_entry(pkt_dev, &t->if_list, list) + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) if (!pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odevname); @@ -1753,7 +1755,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) else seq_puts(seq, "\nResult: NA\n"); - if_unlock(t); + rcu_read_unlock(); return 0; } @@ -1878,10 +1880,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn, pkt_dev = pktgen_find_dev(t, ifname, exact); if (pkt_dev) { if (remove) { - if_lock(t); pkt_dev->removal_mark = 1; t->control |= T_REMDEV; - if_unlock(t); } break; } @@ -1931,7 +1931,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; @@ -1946,6 +1947,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d dev->name); break; } + rcu_read_unlock(); } } @@ -2997,8 +2999,8 @@ static void pktgen_run(struct pktgen_thread *t) func_enter(); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { /* * setup odev and create initial packet. @@ -3007,18 +3009,18 @@ static void pktgen_run(struct pktgen_thread *t) if (pkt_dev->odev) { pktgen_clear_counters(pkt_dev); - pkt_dev->running = 1; /* Cranke yeself! */ pkt_dev->skb = NULL; pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); set_pkt_overhead(pkt_dev); strcpy(pkt_dev->result, "Starting"); + pkt_dev->running = 1; /* Cranke yeself! */ started++; } else strcpy(pkt_dev->result, "Error starting"); } - if_unlock(t); + rcu_read_unlock(); if (started) t->control &= ~(T_STOP); } @@ -3041,27 +3043,25 @@ static int thread_is_running(const struct pktgen_thread *t) { const struct pktgen_dev *pkt_dev; - list_for_each_entry(pkt_dev, &t->if_list, list) - if (pkt_dev->running) + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) + if (pkt_dev->running) { + rcu_read_unlock(); return 1; + } + rcu_read_unlock(); return 0; } static int pktgen_wait_thread_run(struct pktgen_thread *t) { - if_lock(t); - while (thread_is_running(t)) { - if_unlock(t); - msleep_interruptible(100); if (signal_pending(current)) goto signal; - if_lock(t); } - if_unlock(t); return 1; signal: return 0; @@ -3166,10 +3166,10 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) return -EINVAL; } + pkt_dev->running = 0; kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; pkt_dev->stopped_at = ktime_get(); - pkt_dev->running = 0; show_results(pkt_dev, nr_frags); @@ -3180,9 +3180,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) { struct pktgen_dev *pkt_dev, *best = NULL; - if_lock(t); - - list_for_each_entry(pkt_dev, &t->if_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { if (!pkt_dev->running) continue; if (best == NULL) @@ -3190,7 +3189,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) best = pkt_dev; } - if_unlock(t); + rcu_read_unlock(); + return best; } @@ -3200,13 +3200,13 @@ static void pktgen_stop(struct pktgen_thread *t) func_enter(); - if_lock(t); + rcu_read_lock(); - list_for_each_entry(pkt_dev, &t->if_list, list) { + list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { pktgen_stop_device(pkt_dev); } - if_unlock(t); + rcu_read_unlock(); } /* @@ -3220,8 +3220,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) func_enter(); - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3235,8 +3233,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) break; } - - if_unlock(t); } static void pktgen_rem_all_ifs(struct pktgen_thread *t) @@ -3248,8 +3244,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) /* Remove all devices, free mem */ - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3258,8 +3252,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) pktgen_remove_device(t, cur); } - - if_unlock(t); } static void pktgen_rem_thread(struct pktgen_thread *t) @@ -3407,10 +3399,10 @@ static int pktgen_thread_worker(void *arg) pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); - set_current_state(TASK_INTERRUPTIBLE); - set_freezable(); + __set_current_state(TASK_RUNNING); + while (!kthread_should_stop()) { pkt_dev = next_to_run(t); @@ -3424,8 +3416,6 @@ static int pktgen_thread_worker(void *arg) continue; } - __set_current_state(TASK_RUNNING); - if (likely(pkt_dev)) { pktgen_xmit(pkt_dev); @@ -3456,9 +3446,8 @@ static int pktgen_thread_worker(void *arg) } try_to_freeze(); - - set_current_state(TASK_INTERRUPTIBLE); } + set_current_state(TASK_INTERRUPTIBLE); pr_debug("%s stopping all device\n", t->tsk->comm); pktgen_stop(t); @@ -3485,8 +3474,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, struct pktgen_dev *p, *pkt_dev = NULL; size_t len = strlen(ifname); - if_lock(t); - list_for_each_entry(p, &t->if_list, list) + rcu_read_lock(); + list_for_each_entry_rcu(p, &t->if_list, list) if (strncmp(p->odevname, ifname, len) == 0) { if (p->odevname[len]) { if (exact || p->odevname[len] != '@') @@ -3496,7 +3485,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, break; } - if_unlock(t); + rcu_read_unlock(); pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev); return pkt_dev; } @@ -3510,6 +3499,12 @@ static int add_dev_to_thread(struct pktgen_thread *t, { int rv = 0; + /* This function cannot be called concurrently, as its called + * under pktgen_thread_lock mutex, but it can run from + * userspace on another CPU than the kthread. The if_lock() + * is used here to sync with concurrent instances of + * _rem_dev_from_if_list() invoked via kthread, which is also + * updating the if_list */ if_lock(t); if (pkt_dev->pg_thread) { @@ -3518,9 +3513,9 @@ static int add_dev_to_thread(struct pktgen_thread *t, goto out; } - list_add(&pkt_dev->list, &t->if_list); - pkt_dev->pg_thread = t; pkt_dev->running = 0; + pkt_dev->pg_thread = t; + list_add_rcu(&pkt_dev->list, &t->if_list); out: if_unlock(t); @@ -3675,11 +3670,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t, struct list_head *q, *n; struct pktgen_dev *p; + if_lock(t); list_for_each_safe(q, n, &t->if_list) { p = list_entry(q, struct pktgen_dev, list); if (p == pkt_dev) - list_del(&p->list); + list_del_rcu(&p->list); } + if_unlock(t); } static int pktgen_remove_device(struct pktgen_thread *t, @@ -3699,20 +3696,22 @@ static int pktgen_remove_device(struct pktgen_thread *t, pkt_dev->odev = NULL; } - /* And update the thread if_list */ - - _rem_dev_from_if_list(t, pkt_dev); - + /* Remove proc before if_list entry, because add_device uses + * list to determine if interface already exist, avoid race + * with proc_create_data() */ if (pkt_dev->entry) proc_remove(pkt_dev->entry); + /* And update the thread if_list */ + _rem_dev_from_if_list(t, pkt_dev); + #ifdef CONFIG_XFRM free_SAs(pkt_dev); #endif vfree(pkt_dev->flows); if (pkt_dev->page) put_page(pkt_dev->page); - kfree(pkt_dev); + kfree_rcu(pkt_dev, rcu); return 0; } @@ -3812,6 +3811,7 @@ static void __exit pg_cleanup(void) { unregister_netdevice_notifier(&pktgen_notifier_block); unregister_pernet_subsys(&pg_net_ops); + /* Don't need rcu_barrier() due to use of kfree_rcu() */ } module_init(pg_init); diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c index d3027a73fd4b..12ab7b4be609 100644 --- a/net/core/ptp_classifier.c +++ b/net/core/ptp_classifier.c @@ -52,14 +52,43 @@ * test_8021q: * jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ? * ldh [16] ; load inner type - * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ? + * jneq #0x88f7, test_8021q_ipv4 ; ETH_P_1588 ? * ldb [18] ; load payload * and #0x8 ; as we don't have ports here, test * jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these * ldh [18] ; reload payload * and #0xf ; mask PTP_CLASS_VMASK - * or #0x40 ; PTP_CLASS_V2_VLAN + * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2 + * ret a ; return PTP class + * + * ; PTP over UDP over IPv4 over 802.1Q over Ethernet + * test_8021q_ipv4: + * jneq #0x800, test_8021q_ipv6 ; ETH_P_IP ? + * ldb [27] ; load proto + * jneq #17, drop_8021q_ipv4 ; IPPROTO_UDP ? + * ldh [24] ; load frag offset field + * jset #0x1fff, drop_8021q_ipv4; don't allow fragments + * ldxb 4*([18]&0xf) ; load IP header len + * ldh [x + 20] ; load UDP dst port + * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ? + * ldh [x + 26] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4 + * ret a ; return PTP class + * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE + * + * ; PTP over UDP over IPv6 over 802.1Q over Ethernet + * test_8021q_ipv6: + * jneq #0x86dd, drop_8021q_ipv6 ; ETH_P_IPV6 ? + * ldb [24] ; load proto + * jneq #17, drop_8021q_ipv6 ; IPPROTO_UDP ? + * ldh [60] ; load UDP dst port + * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ? + * ldh [66] ; load payload + * and #0xf ; mask PTP_CLASS_VMASK + * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6 * ret a ; return PTP class + * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE * * ; PTP over Ethernet * test_ieee1588: @@ -113,16 +142,39 @@ void __init ptp_classifier_init(void) { 0x44, 0, 0, 0x00000020 }, { 0x16, 0, 0, 0x00000000 }, { 0x06, 0, 0, 0x00000000 }, - { 0x15, 0, 9, 0x00008100 }, + { 0x15, 0, 32, 0x00008100 }, { 0x28, 0, 0, 0x00000010 }, - { 0x15, 0, 15, 0x000088f7 }, + { 0x15, 0, 7, 0x000088f7 }, { 0x30, 0, 0, 0x00000012 }, { 0x54, 0, 0, 0x00000008 }, - { 0x15, 0, 12, 0x00000000 }, + { 0x15, 0, 35, 0x00000000 }, { 0x28, 0, 0, 0x00000012 }, { 0x54, 0, 0, 0x0000000f }, - { 0x44, 0, 0, 0x00000040 }, + { 0x44, 0, 0, 0x00000070 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x15, 0, 12, 0x00000800 }, + { 0x30, 0, 0, 0x0000001b }, + { 0x15, 0, 9, 0x00000011 }, + { 0x28, 0, 0, 0x00000018 }, + { 0x45, 7, 0, 0x00001fff }, + { 0xb1, 0, 0, 0x00000012 }, + { 0x48, 0, 0, 0x00000014 }, + { 0x15, 0, 4, 0x0000013f }, + { 0x48, 0, 0, 0x0000001a }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000050 }, + { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, + { 0x15, 0, 8, 0x000086dd }, + { 0x30, 0, 0, 0x00000018 }, + { 0x15, 0, 6, 0x00000011 }, + { 0x28, 0, 0, 0x0000003c }, + { 0x15, 0, 4, 0x0000013f }, + { 0x28, 0, 0, 0x00000042 }, + { 0x54, 0, 0, 0x0000000f }, + { 0x44, 0, 0, 0x00000060 }, { 0x16, 0, 0, 0x00000000 }, + { 0x06, 0, 0, 0x00000000 }, { 0x15, 0, 7, 0x000088f7 }, { 0x30, 0, 0, 0x0000000e }, { 0x54, 0, 0, 0x00000008 }, diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 467f326126e0..04db318e6218 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -41,27 +41,27 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, unsigned int nr_table_entries) { size_t lopt_size = sizeof(struct listen_sock); - struct listen_sock *lopt; + struct listen_sock *lopt = NULL; nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); nr_table_entries = max_t(u32, nr_table_entries, 8); nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); lopt_size += nr_table_entries * sizeof(struct request_sock *); - if (lopt_size > PAGE_SIZE) + + if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + lopt = kzalloc(lopt_size, GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); + if (!lopt) lopt = vzalloc(lopt_size); - else - lopt = kzalloc(lopt_size, GFP_KERNEL); - if (lopt == NULL) + if (!lopt) return -ENOMEM; - for (lopt->max_qlen_log = 3; - (1 << lopt->max_qlen_log) < nr_table_entries; - lopt->max_qlen_log++); - get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); queue->rskq_accept_head = NULL; lopt->nr_table_entries = nr_table_entries; + lopt->max_qlen_log = ilog2(nr_table_entries); write_lock_bh(&queue->syn_wait_lock); queue->listen_opt = lopt; @@ -72,22 +72,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, void __reqsk_queue_destroy(struct request_sock_queue *queue) { - struct listen_sock *lopt; - size_t lopt_size; - - /* - * this is an error recovery path only - * no locking needed and the lopt is not NULL - */ - - lopt = queue->listen_opt; - lopt_size = sizeof(struct listen_sock) + - lopt->nr_table_entries * sizeof(struct request_sock *); - - if (lopt_size > PAGE_SIZE) - vfree(lopt); - else - kfree(lopt); + /* This is an error recovery path only, no locking needed */ + kvfree(queue->listen_opt); } static inline struct listen_sock *reqsk_queue_yank_listen_sk( @@ -107,8 +93,6 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) { /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - size_t lopt_size = sizeof(struct listen_sock) + - lopt->nr_table_entries * sizeof(struct request_sock *); if (lopt->qlen != 0) { unsigned int i; @@ -125,10 +109,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) } WARN_ON(lopt->qlen != 0); - if (lopt_size > PAGE_SIZE) - vfree(lopt); - else - kfree(lopt); + kvfree(lopt); } /* diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1063996f8317..1f8a59e02c48 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -299,7 +299,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops) if (rtnl_link_ops_get(ops->kind)) return -EEXIST; - if (!ops->dellink) + /* The check for setup is here because if ops + * does not have that filled up, it is not possible + * to use the ops for creating device. So do not + * fill up dellink as well. That disables rtnl_dellink. + */ + if (ops->setup && !ops->dellink) ops->dellink = unregister_netdevice_queue; list_add_tail(&ops->list, &link_ops); @@ -1777,7 +1782,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) return -ENODEV; ops = dev->rtnl_link_ops; - if (!ops) + if (!ops || !ops->dellink) return -EOPNOTSUPP; ops->dellink(dev, &list_kill); @@ -2038,6 +2043,9 @@ replay: return -EOPNOTSUPP; } + if (!ops->setup) + return -EOPNOTSUPP; + if (!ifname[0]) snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); @@ -2509,6 +2517,7 @@ skip: int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx) { int err; @@ -2526,28 +2535,72 @@ EXPORT_SYMBOL(ndo_dflt_fdb_dump); static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0; - struct net *net = sock_net(skb->sk); struct net_device *dev; + struct nlattr *tb[IFLA_MAX+1]; + struct net_device *bdev = NULL; + struct net_device *br_dev = NULL; + const struct net_device_ops *ops = NULL; + const struct net_device_ops *cops = NULL; + struct ifinfomsg *ifm = nlmsg_data(cb->nlh); + struct net *net = sock_net(skb->sk); + int brport_idx = 0; + int br_idx = 0; + int idx = 0; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (dev->priv_flags & IFF_BRIDGE_PORT) { - struct net_device *br_dev; - const struct net_device_ops *ops; + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, + ifla_policy) == 0) { + if (tb[IFLA_MASTER]) + br_idx = nla_get_u32(tb[IFLA_MASTER]); + } + + brport_idx = ifm->ifi_index; + + if (br_idx) { + br_dev = __dev_get_by_index(net, br_idx); + if (!br_dev) + return -ENODEV; + + ops = br_dev->netdev_ops; + bdev = br_dev; + } + + for_each_netdev(net, dev) { + if (brport_idx && (dev->ifindex != brport_idx)) + continue; + + if (!br_idx) { /* user did not specify a specific bridge */ + if (dev->priv_flags & IFF_BRIDGE_PORT) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; + } + + bdev = dev; + } else { + if (dev != br_dev && + !(dev->priv_flags & IFF_BRIDGE_PORT)) + continue; - br_dev = netdev_master_upper_dev_get(dev); - ops = br_dev->netdev_ops; - if (ops->ndo_fdb_dump) - idx = ops->ndo_fdb_dump(skb, cb, dev, idx); + if (br_dev != netdev_master_upper_dev_get(dev) && + !(dev->priv_flags & IFF_EBRIDGE)) + continue; + + bdev = br_dev; + cops = ops; + } + + if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (cops && cops->ndo_fdb_dump) + idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, + idx); } + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); - else - idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); + idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev, + idx); + + cops = NULL; } - rcu_read_unlock(); cb->args[0] = idx; return skb->len; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9cd5344fad73..c1a33033cbe2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2993,7 +2993,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, skb_put(nskb, len), len, 0); SKB_GSO_CB(nskb)->csum_start = - skb_headroom(nskb) + offset; + skb_headroom(nskb) + doffset; continue; } diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 6521dfd8b7c8..a8770391ea5b 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -43,31 +43,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) return; type = classify(skb); + if (type == PTP_CLASS_NONE) + return; + + phydev = skb->dev->phydev; + if (likely(phydev->drv->txtstamp)) { + if (!atomic_inc_not_zero(&sk->sk_refcnt)) + return; - switch (type) { - case PTP_CLASS_V1_IPV4: - case PTP_CLASS_V1_IPV6: - case PTP_CLASS_V2_IPV4: - case PTP_CLASS_V2_IPV6: - case PTP_CLASS_V2_L2: - case PTP_CLASS_V2_VLAN: - phydev = skb->dev->phydev; - if (likely(phydev->drv->txtstamp)) { - if (!atomic_inc_not_zero(&sk->sk_refcnt)) - return; - - clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) { - sock_put(sk); - return; - } - - clone->sk = sk; - phydev->drv->txtstamp(phydev, clone, type); + clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) { + sock_put(sk); + return; } - break; - default: - break; + + clone->sk = sk; + phydev->drv->txtstamp(phydev, clone, type); } } EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); @@ -114,20 +105,12 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) __skb_pull(skb, ETH_HLEN); - switch (type) { - case PTP_CLASS_V1_IPV4: - case PTP_CLASS_V1_IPV6: - case PTP_CLASS_V2_IPV4: - case PTP_CLASS_V2_IPV6: - case PTP_CLASS_V2_L2: - case PTP_CLASS_V2_VLAN: - phydev = skb->dev->phydev; - if (likely(phydev->drv->rxtstamp)) - return phydev->drv->rxtstamp(phydev, skb, type); - break; - default: - break; - } + if (type == PTP_CLASS_NONE) + return false; + + phydev = skb->dev->phydev; + if (likely(phydev->drv->rxtstamp)) + return phydev->drv->rxtstamp(phydev, skb, type); return false; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4db3c2a1679c..04cb17d4b0ce 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -386,7 +386,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = inet6_reqsk_alloc(&dccp6_request_sock_ops); + req = inet_reqsk_alloc(&dccp6_request_sock_ops); if (req == NULL) goto drop; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index c69eb9c4fbb8..b50dc436db1f 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -55,11 +55,9 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); #if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - tw->tw_v6_daddr = sk->sk_v6_daddr; tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - tw->tw_ipv6only = np->ipv6only; + tw->tw_ipv6only = sk->sk_ipv6only; } #endif /* Linkage updates. */ diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 5db37cef50a9..0a49632fac47 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -351,8 +351,7 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd) for (i = 0; i < pd->nr_chips; i++) { port_index = 0; while (port_index < DSA_MAX_PORTS) { - if (pd->chip[i].port_names[port_index]) - kfree(pd->chip[i].port_names[port_index]); + kfree(pd->chip[i].port_names[port_index]); port_index++; } kfree(pd->chip[i].rtable); diff --git a/net/hsr/Makefile b/net/hsr/Makefile index b68359f181cc..9ae972a820f4 100644 --- a/net/hsr/Makefile +++ b/net/hsr/Makefile @@ -4,4 +4,5 @@ obj-$(CONFIG_HSR) += hsr.o -hsr-y := hsr_main.o hsr_framereg.o hsr_device.o hsr_netlink.o +hsr-y := hsr_main.o hsr_framereg.o hsr_device.o \ + hsr_netlink.o hsr_slave.o hsr_forward.o diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e5302b7f7ca9..a138d75751df 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * This file contains device methods for creating, using and destroying * virtual HSR devices. @@ -15,12 +15,13 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/etherdevice.h> -#include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> #include "hsr_device.h" +#include "hsr_slave.h" #include "hsr_framereg.h" #include "hsr_main.h" +#include "hsr_forward.h" static bool is_admin_up(struct net_device *dev) @@ -45,75 +46,108 @@ static void __hsr_set_operstate(struct net_device *dev, int transition) } } -void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1, - struct net_device *slave2) +static void hsr_set_operstate(struct hsr_port *master, bool has_carrier) { - if (!is_admin_up(hsr_dev)) { - __hsr_set_operstate(hsr_dev, IF_OPER_DOWN); + if (!is_admin_up(master->dev)) { + __hsr_set_operstate(master->dev, IF_OPER_DOWN); return; } - if (is_slave_up(slave1) || is_slave_up(slave2)) - __hsr_set_operstate(hsr_dev, IF_OPER_UP); + if (has_carrier) + __hsr_set_operstate(master->dev, IF_OPER_UP); else - __hsr_set_operstate(hsr_dev, IF_OPER_LOWERLAYERDOWN); + __hsr_set_operstate(master->dev, IF_OPER_LOWERLAYERDOWN); } -void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1, - struct net_device *slave2) +static bool hsr_check_carrier(struct hsr_port *master) { - if (is_slave_up(slave1) || is_slave_up(slave2)) - netif_carrier_on(hsr_dev); + struct hsr_port *port; + bool has_carrier; + + has_carrier = false; + + rcu_read_lock(); + hsr_for_each_port(master->hsr, port) + if ((port->type != HSR_PT_MASTER) && is_slave_up(port->dev)) { + has_carrier = true; + break; + } + rcu_read_unlock(); + + if (has_carrier) + netif_carrier_on(master->dev); else - netif_carrier_off(hsr_dev); + netif_carrier_off(master->dev); + + return has_carrier; } -void hsr_check_announce(struct net_device *hsr_dev, int old_operstate) +static void hsr_check_announce(struct net_device *hsr_dev, + unsigned char old_operstate) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; - hsr_priv = netdev_priv(hsr_dev); + hsr = netdev_priv(hsr_dev); if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) { /* Went up */ - hsr_priv->announce_count = 0; - hsr_priv->announce_timer.expires = jiffies + + hsr->announce_count = 0; + hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - add_timer(&hsr_priv->announce_timer); + add_timer(&hsr->announce_timer); } if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) /* Went down */ - del_timer(&hsr_priv->announce_timer); + del_timer(&hsr->announce_timer); } - -int hsr_get_max_mtu(struct hsr_priv *hsr_priv) +void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) { - int mtu_max; - - if (hsr_priv->slave[0] && hsr_priv->slave[1]) - mtu_max = min(hsr_priv->slave[0]->mtu, hsr_priv->slave[1]->mtu); - else if (hsr_priv->slave[0]) - mtu_max = hsr_priv->slave[0]->mtu; - else if (hsr_priv->slave[1]) - mtu_max = hsr_priv->slave[1]->mtu; - else - mtu_max = HSR_TAGLEN; + struct hsr_port *master; + unsigned char old_operstate; + bool has_carrier; - return mtu_max - HSR_TAGLEN; + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + /* netif_stacked_transfer_operstate() cannot be used here since + * it doesn't set IF_OPER_LOWERLAYERDOWN (?) + */ + old_operstate = master->dev->operstate; + has_carrier = hsr_check_carrier(master); + hsr_set_operstate(master, has_carrier); + hsr_check_announce(master->dev, old_operstate); } +int hsr_get_max_mtu(struct hsr_priv *hsr) +{ + unsigned int mtu_max; + struct hsr_port *port; + + mtu_max = ETH_DATA_LEN; + rcu_read_lock(); + hsr_for_each_port(hsr, port) + if (port->type != HSR_PT_MASTER) + mtu_max = min(port->dev->mtu, mtu_max); + rcu_read_unlock(); + + if (mtu_max < HSR_HLEN) + return 0; + return mtu_max - HSR_HLEN; +} + + static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *master; - hsr_priv = netdev_priv(dev); + hsr = netdev_priv(dev); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - if (new_mtu > hsr_get_max_mtu(hsr_priv)) { - netdev_info(hsr_priv->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n", - HSR_TAGLEN); + if (new_mtu > hsr_get_max_mtu(hsr)) { + netdev_info(master->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n", + HSR_HLEN); return -EINVAL; } @@ -124,164 +158,95 @@ static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) static int hsr_dev_open(struct net_device *dev) { - struct hsr_priv *hsr_priv; - int i; - char *slave_name; + struct hsr_priv *hsr; + struct hsr_port *port; + char designation; - hsr_priv = netdev_priv(dev); + hsr = netdev_priv(dev); + designation = '\0'; - for (i = 0; i < HSR_MAX_SLAVE; i++) { - if (hsr_priv->slave[i]) - slave_name = hsr_priv->slave[i]->name; - else - slave_name = "null"; - - if (!is_slave_up(hsr_priv->slave[i])) - netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a working HSR network\n", - 'A' + i, slave_name); + rcu_read_lock(); + hsr_for_each_port(hsr, port) { + if (port->type == HSR_PT_MASTER) + continue; + switch (port->type) { + case HSR_PT_SLAVE_A: + designation = 'A'; + break; + case HSR_PT_SLAVE_B: + designation = 'B'; + break; + default: + designation = '?'; + } + if (!is_slave_up(port->dev)) + netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n", + designation, port->dev->name); } + rcu_read_unlock(); + + if (designation == '\0') + netdev_warn(dev, "No slave devices configured\n"); return 0; } + static int hsr_dev_close(struct net_device *dev) { - /* Nothing to do here. We could try to restore the state of the slaves - * to what they were before being changed by the hsr master dev's state, - * but they might have been changed manually in the mean time too, so - * taking them up or down here might be confusing and is probably not a - * good idea. - */ + /* Nothing to do here. */ return 0; } -static void hsr_fill_tag(struct hsr_ethhdr *hsr_ethhdr, struct hsr_priv *hsr_priv) +static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr, + netdev_features_t features) { - unsigned long irqflags; + netdev_features_t mask; + struct hsr_port *port; - /* IEC 62439-1:2010, p 48, says the 4-bit "path" field can take values - * between 0001-1001 ("ring identifier", for regular HSR frames), - * or 1111 ("HSR management", supervision frames). Unfortunately, the - * spec writers forgot to explain what a "ring identifier" is, or - * how it is used. So we just set this to 0001 for regular frames, - * and 1111 for supervision frames. - */ - set_hsr_tag_path(&hsr_ethhdr->hsr_tag, 0x1); + mask = features; - /* IEC 62439-1:2010, p 12: "The link service data unit in an Ethernet - * frame is the content of the frame located between the Length/Type - * field and the Frame Check Sequence." + /* Mask out all features that, if supported by one device, should be + * enabled for all devices (see NETIF_F_ONE_FOR_ALL). * - * IEC 62439-3, p 48, specifies the "original LPDU" to include the - * original "LT" field (what "LT" means is not explained anywhere as - * far as I can see - perhaps "Length/Type"?). So LSDU_size might - * equal original length + 2. - * Also, the fact that this field is not used anywhere (might be used - * by a RedBox connecting HSR and PRP nets?) means I cannot test its - * correctness. Instead of guessing, I set this to 0 here, to make any - * problems immediately apparent. Anyone using this driver with PRP/HSR - * RedBoxes might need to fix this... + * Anything that's off in mask will not be enabled - so only things + * that were in features originally, and also is in NETIF_F_ONE_FOR_ALL, + * may become enabled. */ - set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, 0); - - spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags); - hsr_ethhdr->hsr_tag.sequence_nr = htons(hsr_priv->sequence_nr); - hsr_priv->sequence_nr++; - spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags); + features &= ~NETIF_F_ONE_FOR_ALL; + hsr_for_each_port(hsr, port) + features = netdev_increment_features(features, + port->dev->features, + mask); - hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; - - hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP); + return features; } -static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv, - enum hsr_dev_idx dev_idx) +static netdev_features_t hsr_fix_features(struct net_device *dev, + netdev_features_t features) { - struct hsr_ethhdr *hsr_ethhdr; - - hsr_ethhdr = (struct hsr_ethhdr *) skb->data; + struct hsr_priv *hsr = netdev_priv(dev); - skb->dev = hsr_priv->slave[dev_idx]; - - hsr_addr_subst_dest(hsr_priv, &hsr_ethhdr->ethhdr, dev_idx); - - /* Address substitution (IEC62439-3 pp 26, 50): replace mac - * address of outgoing frame with that of the outgoing slave's. - */ - ether_addr_copy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr); - - return dev_queue_xmit(skb); + return hsr_features_recompute(hsr, features); } static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) { - struct hsr_priv *hsr_priv; - struct hsr_ethhdr *hsr_ethhdr; - struct sk_buff *skb2; - int res1, res2; - - hsr_priv = netdev_priv(dev); - hsr_ethhdr = (struct hsr_ethhdr *) skb->data; - - if ((skb->protocol != htons(ETH_P_PRP)) || - (hsr_ethhdr->ethhdr.h_proto != htons(ETH_P_PRP))) { - hsr_fill_tag(hsr_ethhdr, hsr_priv); - skb->protocol = htons(ETH_P_PRP); - } - - skb2 = pskb_copy(skb, GFP_ATOMIC); - - res1 = NET_XMIT_DROP; - if (likely(hsr_priv->slave[HSR_DEV_SLAVE_A])) - res1 = slave_xmit(skb, hsr_priv, HSR_DEV_SLAVE_A); + struct hsr_priv *hsr = netdev_priv(dev); + struct hsr_port *master; - res2 = NET_XMIT_DROP; - if (likely(skb2 && hsr_priv->slave[HSR_DEV_SLAVE_B])) - res2 = slave_xmit(skb2, hsr_priv, HSR_DEV_SLAVE_B); - - if (likely(res1 == NET_XMIT_SUCCESS || res1 == NET_XMIT_CN || - res2 == NET_XMIT_SUCCESS || res2 == NET_XMIT_CN)) { - hsr_priv->dev->stats.tx_packets++; - hsr_priv->dev->stats.tx_bytes += skb->len; - } else { - hsr_priv->dev->stats.tx_dropped++; - } + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + skb->dev = master->dev; + hsr_forward_skb(skb, master); return NETDEV_TX_OK; } -static int hsr_header_create(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *daddr, - const void *saddr, unsigned int len) -{ - int res; - - /* Make room for the HSR tag now. We will fill it in later (in - * hsr_dev_xmit) - */ - if (skb_headroom(skb) < HSR_TAGLEN + ETH_HLEN) - return -ENOBUFS; - skb_push(skb, HSR_TAGLEN); - - /* To allow VLAN/HSR combos we should probably use - * res = dev_hard_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN); - * here instead. It would require other changes too, though - e.g. - * separate headers for each slave etc... - */ - res = eth_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN); - if (res <= 0) - return res; - skb_reset_mac_header(skb); - - return res + HSR_TAGLEN; -} - - static const struct header_ops hsr_header_ops = { - .create = hsr_header_create, + .create = eth_header, .parse = eth_header_parse, }; @@ -291,67 +256,63 @@ static const struct header_ops hsr_header_ops = { */ static int hsr_pad(int size) { - const int min_size = ETH_ZLEN - HSR_TAGLEN - ETH_HLEN; + const int min_size = ETH_ZLEN - HSR_HLEN - ETH_HLEN; if (size >= min_size) return size; return min_size; } -static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type) +static void send_hsr_supervision_frame(struct hsr_port *master, u8 type) { - struct hsr_priv *hsr_priv; struct sk_buff *skb; int hlen, tlen; struct hsr_sup_tag *hsr_stag; struct hsr_sup_payload *hsr_sp; unsigned long irqflags; - hlen = LL_RESERVED_SPACE(hsr_dev); - tlen = hsr_dev->needed_tailroom; + hlen = LL_RESERVED_SPACE(master->dev); + tlen = master->dev->needed_tailroom; skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen, GFP_ATOMIC); if (skb == NULL) return; - hsr_priv = netdev_priv(hsr_dev); - skb_reserve(skb, hlen); - skb->dev = hsr_dev; + skb->dev = master->dev; skb->protocol = htons(ETH_P_PRP); skb->priority = TC_PRIO_CONTROL; if (dev_hard_header(skb, skb->dev, ETH_P_PRP, - hsr_priv->sup_multicast_addr, - skb->dev->dev_addr, skb->len) < 0) + master->hsr->sup_multicast_addr, + skb->dev->dev_addr, skb->len) <= 0) goto out; + skb_reset_mac_header(skb); - skb_pull(skb, sizeof(struct ethhdr)); - hsr_stag = (typeof(hsr_stag)) skb->data; + hsr_stag = (typeof(hsr_stag)) skb_put(skb, sizeof(*hsr_stag)); set_hsr_stag_path(hsr_stag, 0xf); set_hsr_stag_HSR_Ver(hsr_stag, 0); - spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags); - hsr_stag->sequence_nr = htons(hsr_priv->sequence_nr); - hsr_priv->sequence_nr++; - spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags); + spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); + hsr_stag->sequence_nr = htons(master->hsr->sequence_nr); + master->hsr->sequence_nr++; + spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); hsr_stag->HSR_TLV_Type = type; hsr_stag->HSR_TLV_Length = 12; - skb_push(skb, sizeof(struct ethhdr)); - /* Payload: MacAddressA */ hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp)); - ether_addr_copy(hsr_sp->MacAddressA, hsr_dev->dev_addr); + ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr); - dev_queue_xmit(skb); + hsr_forward_skb(skb, master); return; out: + WARN_ON_ONCE("HSR: Could not send supervision frame\n"); kfree_skb(skb); } @@ -360,59 +321,32 @@ out: */ static void hsr_announce(unsigned long data) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *master; - hsr_priv = (struct hsr_priv *) data; + hsr = (struct hsr_priv *) data; - if (hsr_priv->announce_count < 3) { - send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_ANNOUNCE); - hsr_priv->announce_count++; + rcu_read_lock(); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + + if (hsr->announce_count < 3) { + send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE); + hsr->announce_count++; } else { - send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_LIFE_CHECK); + send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK); } - if (hsr_priv->announce_count < 3) - hsr_priv->announce_timer.expires = jiffies + + if (hsr->announce_count < 3) + hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); else - hsr_priv->announce_timer.expires = jiffies + + hsr->announce_timer.expires = jiffies + msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); - if (is_admin_up(hsr_priv->dev)) - add_timer(&hsr_priv->announce_timer); -} - - -static void restore_slaves(struct net_device *hsr_dev) -{ - struct hsr_priv *hsr_priv; - int i; - int res; - - hsr_priv = netdev_priv(hsr_dev); - - rtnl_lock(); - - /* Restore promiscuity */ - for (i = 0; i < HSR_MAX_SLAVE; i++) { - if (!hsr_priv->slave[i]) - continue; - res = dev_set_promiscuity(hsr_priv->slave[i], -1); - if (res) - netdev_info(hsr_dev, - "Cannot restore slave promiscuity (%s, %d)\n", - hsr_priv->slave[i]->name, res); - } - - rtnl_unlock(); -} - -static void reclaim_hsr_dev(struct rcu_head *rh) -{ - struct hsr_priv *hsr_priv; + if (is_admin_up(master->dev)) + add_timer(&hsr->announce_timer); - hsr_priv = container_of(rh, struct hsr_priv, rcu_head); - free_netdev(hsr_priv->dev); + rcu_read_unlock(); } @@ -421,14 +355,18 @@ static void reclaim_hsr_dev(struct rcu_head *rh) */ static void hsr_dev_destroy(struct net_device *hsr_dev) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *port; - hsr_priv = netdev_priv(hsr_dev); + hsr = netdev_priv(hsr_dev); + hsr_for_each_port(hsr, port) + hsr_del_port(port); - del_timer(&hsr_priv->announce_timer); - unregister_hsr_master(hsr_priv); /* calls list_del_rcu on hsr_priv */ - restore_slaves(hsr_dev); - call_rcu(&hsr_priv->rcu_head, reclaim_hsr_dev); /* reclaim hsr_priv */ + del_timer_sync(&hsr->prune_timer); + del_timer_sync(&hsr->announce_timer); + + synchronize_rcu(); + free_netdev(hsr_dev); } static const struct net_device_ops hsr_device_ops = { @@ -436,62 +374,51 @@ static const struct net_device_ops hsr_device_ops = { .ndo_open = hsr_dev_open, .ndo_stop = hsr_dev_close, .ndo_start_xmit = hsr_dev_xmit, + .ndo_fix_features = hsr_fix_features, }; +static struct device_type hsr_type = { + .name = "hsr", +}; void hsr_dev_setup(struct net_device *dev) { random_ether_addr(dev->dev_addr); ether_setup(dev); - dev->header_ops = &hsr_header_ops; - dev->netdev_ops = &hsr_device_ops; - dev->tx_queue_len = 0; + dev->header_ops = &hsr_header_ops; + dev->netdev_ops = &hsr_device_ops; + SET_NETDEV_DEVTYPE(dev, &hsr_type); + dev->tx_queue_len = 0; dev->destructor = hsr_dev_destroy; + + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_CTAG_TX; + + dev->features = dev->hw_features; + + /* Prevent recursive tx locking */ + dev->features |= NETIF_F_LLTX; + /* VLAN on top of HSR needs testing and probably some work on + * hsr_header_create() etc. + */ + dev->features |= NETIF_F_VLAN_CHALLENGED; + /* Not sure about this. Taken from bridge code. netdev_features.h says + * it means "Does not change network namespaces". + */ + dev->features |= NETIF_F_NETNS_LOCAL; } /* Return true if dev is a HSR master; return false otherwise. */ -bool is_hsr_master(struct net_device *dev) +inline bool is_hsr_master(struct net_device *dev) { return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit); } -static int check_slave_ok(struct net_device *dev) -{ - /* Don't allow HSR on non-ethernet like devices */ - if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) || - (dev->addr_len != ETH_ALEN)) { - netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n"); - return -EINVAL; - } - - /* Don't allow enslaving hsr devices */ - if (is_hsr_master(dev)) { - netdev_info(dev, "Cannot create trees of HSR devices.\n"); - return -EINVAL; - } - - if (is_hsr_slave(dev)) { - netdev_info(dev, "This device is already a HSR slave.\n"); - return -EINVAL; - } - - if (dev->priv_flags & IFF_802_1Q_VLAN) { - netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n"); - return -EINVAL; - } - - /* HSR over bonded devices has not been tested, but I'm not sure it - * won't work... - */ - - return 0; -} - - /* Default multicast address for HSR Supervision frames */ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x15, 0x4e, 0x00, 0x01, 0x00 @@ -500,97 +427,74 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec) { - struct hsr_priv *hsr_priv; - int i; + struct hsr_priv *hsr; + struct hsr_port *port; int res; - hsr_priv = netdev_priv(hsr_dev); - hsr_priv->dev = hsr_dev; - INIT_LIST_HEAD(&hsr_priv->node_db); - INIT_LIST_HEAD(&hsr_priv->self_node_db); - for (i = 0; i < HSR_MAX_SLAVE; i++) - hsr_priv->slave[i] = slave[i]; - - spin_lock_init(&hsr_priv->seqnr_lock); - /* Overflow soon to find bugs easier: */ - hsr_priv->sequence_nr = USHRT_MAX - 1024; - - init_timer(&hsr_priv->announce_timer); - hsr_priv->announce_timer.function = hsr_announce; - hsr_priv->announce_timer.data = (unsigned long) hsr_priv; + hsr = netdev_priv(hsr_dev); + INIT_LIST_HEAD(&hsr->ports); + INIT_LIST_HEAD(&hsr->node_db); + INIT_LIST_HEAD(&hsr->self_node_db); - ether_addr_copy(hsr_priv->sup_multicast_addr, def_multicast_addr); - hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; + ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr); -/* FIXME: should I modify the value of these? - * - * - hsr_dev->flags - i.e. - * IFF_MASTER/SLAVE? - * - hsr_dev->priv_flags - i.e. - * IFF_EBRIDGE? - * IFF_TX_SKB_SHARING? - * IFF_HSR_MASTER/SLAVE? - */ + /* Make sure we recognize frames from ourselves in hsr_rcv() */ + res = hsr_create_self_node(&hsr->self_node_db, hsr_dev->dev_addr, + slave[1]->dev_addr); + if (res < 0) + return res; - for (i = 0; i < HSR_MAX_SLAVE; i++) { - res = check_slave_ok(slave[i]); - if (res) - return res; - } + spin_lock_init(&hsr->seqnr_lock); + /* Overflow soon to find bugs easier: */ + hsr->sequence_nr = HSR_SEQNR_START; - hsr_dev->features = slave[0]->features & slave[1]->features; - /* Prevent recursive tx locking */ - hsr_dev->features |= NETIF_F_LLTX; - /* VLAN on top of HSR needs testing and probably some work on - * hsr_header_create() etc. - */ - hsr_dev->features |= NETIF_F_VLAN_CHALLENGED; + init_timer(&hsr->announce_timer); + hsr->announce_timer.function = hsr_announce; + hsr->announce_timer.data = (unsigned long) hsr; - /* Set hsr_dev's MAC address to that of mac_slave1 */ - ether_addr_copy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr); + init_timer(&hsr->prune_timer); + hsr->prune_timer.function = hsr_prune_nodes; + hsr->prune_timer.data = (unsigned long) hsr; - /* Set required header length */ - for (i = 0; i < HSR_MAX_SLAVE; i++) { - if (slave[i]->hard_header_len + HSR_TAGLEN > - hsr_dev->hard_header_len) - hsr_dev->hard_header_len = - slave[i]->hard_header_len + HSR_TAGLEN; - } + ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); + hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; - /* MTU */ - for (i = 0; i < HSR_MAX_SLAVE; i++) - if (slave[i]->mtu - HSR_TAGLEN < hsr_dev->mtu) - hsr_dev->mtu = slave[i]->mtu - HSR_TAGLEN; + /* FIXME: should I modify the value of these? + * + * - hsr_dev->flags - i.e. + * IFF_MASTER/SLAVE? + * - hsr_dev->priv_flags - i.e. + * IFF_EBRIDGE? + * IFF_TX_SKB_SHARING? + * IFF_HSR_MASTER/SLAVE? + */ /* Make sure the 1st call to netif_carrier_on() gets through */ netif_carrier_off(hsr_dev); - /* Promiscuity */ - for (i = 0; i < HSR_MAX_SLAVE; i++) { - res = dev_set_promiscuity(slave[i], 1); - if (res) { - netdev_info(hsr_dev, "Cannot set slave promiscuity (%s, %d)\n", - slave[i]->name, res); - goto fail; - } - } + res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); + if (res) + return res; - /* Make sure we recognize frames from ourselves in hsr_rcv() */ - res = hsr_create_self_node(&hsr_priv->self_node_db, - hsr_dev->dev_addr, - hsr_priv->slave[1]->dev_addr); - if (res < 0) + res = register_netdevice(hsr_dev); + if (res) goto fail; - res = register_netdevice(hsr_dev); + res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A); + if (res) + goto fail; + res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B); if (res) goto fail; - register_hsr_master(hsr_priv); + hsr->prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); + add_timer(&hsr->prune_timer); return 0; fail: - restore_slaves(hsr_dev); + hsr_for_each_port(hsr, port) + hsr_del_port(port); + return res; } diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 2c7148e73914..108a5d59d2a6 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ #ifndef __HSR_DEVICE_H @@ -18,12 +18,8 @@ void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec); -void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1, - struct net_device *slave2); -void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1, - struct net_device *slave2); -void hsr_check_announce(struct net_device *hsr_dev, int old_operstate); +void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); bool is_hsr_master(struct net_device *dev); -int hsr_get_max_mtu(struct hsr_priv *hsr_priv); +int hsr_get_max_mtu(struct hsr_priv *hsr); #endif /* __HSR_DEVICE_H */ diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c new file mode 100644 index 000000000000..7871ed6d3825 --- /dev/null +++ b/net/hsr/hsr_forward.c @@ -0,0 +1,368 @@ +/* Copyright 2011-2014 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + */ + +#include "hsr_forward.h" +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/if_vlan.h> +#include "hsr_main.h" +#include "hsr_framereg.h" + + +struct hsr_node; + +struct hsr_frame_info { + struct sk_buff *skb_std; + struct sk_buff *skb_hsr; + struct hsr_port *port_rcv; + struct hsr_node *node_src; + u16 sequence_nr; + bool is_supervision; + bool is_vlan; + bool is_local_dest; + bool is_local_exclusive; +}; + + +/* The uses I can see for these HSR supervision frames are: + * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type = + * 22") to reset any sequence_nr counters belonging to that node. Useful if + * the other node's counter has been reset for some reason. + * -- + * Or not - resetting the counter and bridging the frame would create a + * loop, unfortunately. + * + * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck + * frame is received from a particular node, we know something is wrong. + * We just register these (as with normal frames) and throw them away. + * + * 3) Allow different MAC addresses for the two slave interfaces, using the + * MacAddressA field. + */ +static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) +{ + struct hsr_ethhdr_sp *hdr; + + WARN_ON_ONCE(!skb_mac_header_was_set(skb)); + hdr = (struct hsr_ethhdr_sp *) skb_mac_header(skb); + + if (!ether_addr_equal(hdr->ethhdr.h_dest, + hsr->sup_multicast_addr)) + return false; + + if (get_hsr_stag_path(&hdr->hsr_sup) != 0x0f) + return false; + if ((hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_ANNOUNCE) && + (hdr->hsr_sup.HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) + return false; + if (hdr->hsr_sup.HSR_TLV_Length != 12) + return false; + + return true; +} + + +static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in, + struct hsr_frame_info *frame) +{ + struct sk_buff *skb; + int copylen; + unsigned char *dst, *src; + + skb_pull(skb_in, HSR_HLEN); + skb = __pskb_copy(skb_in, skb_headroom(skb_in) - HSR_HLEN, GFP_ATOMIC); + skb_push(skb_in, HSR_HLEN); + if (skb == NULL) + return NULL; + + skb_reset_mac_header(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start -= HSR_HLEN; + + copylen = 2*ETH_ALEN; + if (frame->is_vlan) + copylen += VLAN_HLEN; + src = skb_mac_header(skb_in); + dst = skb_mac_header(skb); + memcpy(dst, src, copylen); + + skb->protocol = eth_hdr(skb)->h_proto; + return skb; +} + +static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, + struct hsr_port *port) +{ + if (!frame->skb_std) + frame->skb_std = create_stripped_skb(frame->skb_hsr, frame); + return skb_clone(frame->skb_std, GFP_ATOMIC); +} + + +static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, + struct hsr_port *port) +{ + struct hsr_ethhdr *hsr_ethhdr; + int lane_id; + int lsdu_size; + + if (port->type == HSR_PT_SLAVE_A) + lane_id = 0; + else + lane_id = 1; + + lsdu_size = skb->len - 14; + if (frame->is_vlan) + lsdu_size -= 4; + + hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); + + set_hsr_tag_path(&hsr_ethhdr->hsr_tag, lane_id); + set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); + hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); + hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; + hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP); +} + +static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, + struct hsr_frame_info *frame, + struct hsr_port *port) +{ + int movelen; + unsigned char *dst, *src; + struct sk_buff *skb; + + /* Create the new skb with enough headroom to fit the HSR tag */ + skb = __pskb_copy(skb_o, skb_headroom(skb_o) + HSR_HLEN, GFP_ATOMIC); + if (skb == NULL) + return NULL; + skb_reset_mac_header(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start += HSR_HLEN; + + movelen = ETH_HLEN; + if (frame->is_vlan) + movelen += VLAN_HLEN; + + src = skb_mac_header(skb); + dst = skb_push(skb, HSR_HLEN); + memmove(dst, src, movelen); + skb_reset_mac_header(skb); + + hsr_fill_tag(skb, frame, port); + + return skb; +} + +/* If the original frame was an HSR tagged frame, just clone it to be sent + * unchanged. Otherwise, create a private frame especially tagged for 'port'. + */ +static struct sk_buff *frame_get_tagged_skb(struct hsr_frame_info *frame, + struct hsr_port *port) +{ + if (frame->skb_hsr) + return skb_clone(frame->skb_hsr, GFP_ATOMIC); + + if ((port->type != HSR_PT_SLAVE_A) && (port->type != HSR_PT_SLAVE_B)) { + WARN_ONCE(1, "HSR: Bug: trying to create a tagged frame for a non-ring port"); + return NULL; + } + + return create_tagged_skb(frame->skb_std, frame, port); +} + + +static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, + struct hsr_node *node_src) +{ + bool was_multicast_frame; + int res; + + was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); + hsr_addr_subst_source(node_src, skb); + skb_pull(skb, ETH_HLEN); + res = netif_rx(skb); + if (res == NET_RX_DROP) { + dev->stats.rx_dropped++; + } else { + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + if (was_multicast_frame) + dev->stats.multicast++; + } +} + +static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port, + struct hsr_frame_info *frame) +{ + if (frame->port_rcv->type == HSR_PT_MASTER) { + hsr_addr_subst_dest(frame->node_src, skb, port); + + /* Address substitution (IEC62439-3 pp 26, 50): replace mac + * address of outgoing frame with that of the outgoing slave's. + */ + ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr); + } + return dev_queue_xmit(skb); +} + + +/* Forward the frame through all devices except: + * - Back through the receiving device + * - If it's a HSR frame: through a device where it has passed before + * - To the local HSR master only if the frame is directly addressed to it, or + * a non-supervision multicast or broadcast frame. + * + * HSR slave devices should insert a HSR tag into the frame, or forward the + * frame unchanged if it's already tagged. Interlink devices should strip HSR + * tags if they're of the non-HSR type (but only after duplicate discard). The + * master device always strips HSR tags. + */ +static void hsr_forward_do(struct hsr_frame_info *frame) +{ + struct hsr_port *port; + struct sk_buff *skb; + + hsr_for_each_port(frame->port_rcv->hsr, port) { + /* Don't send frame back the way it came */ + if (port == frame->port_rcv) + continue; + + /* Don't deliver locally unless we should */ + if ((port->type == HSR_PT_MASTER) && !frame->is_local_dest) + continue; + + /* Deliver frames directly addressed to us to master only */ + if ((port->type != HSR_PT_MASTER) && frame->is_local_exclusive) + continue; + + /* Don't send frame over port where it has been sent before */ + if (hsr_register_frame_out(port, frame->node_src, + frame->sequence_nr)) + continue; + + if (frame->is_supervision && (port->type == HSR_PT_MASTER)) { + hsr_handle_sup_frame(frame->skb_hsr, + frame->node_src, + frame->port_rcv); + continue; + } + + if (port->type != HSR_PT_MASTER) + skb = frame_get_tagged_skb(frame, port); + else + skb = frame_get_stripped_skb(frame, port); + if (skb == NULL) { + /* FIXME: Record the dropped frame? */ + continue; + } + + skb->dev = port->dev; + if (port->type == HSR_PT_MASTER) + hsr_deliver_master(skb, port->dev, frame->node_src); + else + hsr_xmit(skb, port, frame); + } +} + + +static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, + struct hsr_frame_info *frame) +{ + struct net_device *master_dev; + + master_dev = hsr_port_get_hsr(hsr, HSR_PT_MASTER)->dev; + + if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) { + frame->is_local_exclusive = true; + skb->pkt_type = PACKET_HOST; + } else { + frame->is_local_exclusive = false; + } + + if ((skb->pkt_type == PACKET_HOST) || + (skb->pkt_type == PACKET_MULTICAST) || + (skb->pkt_type == PACKET_BROADCAST)) { + frame->is_local_dest = true; + } else { + frame->is_local_dest = false; + } +} + + +static int hsr_fill_frame_info(struct hsr_frame_info *frame, + struct sk_buff *skb, struct hsr_port *port) +{ + struct ethhdr *ethhdr; + unsigned long irqflags; + + frame->is_supervision = is_supervision_frame(port->hsr, skb); + frame->node_src = hsr_get_node(&port->hsr->node_db, skb, + frame->is_supervision); + if (frame->node_src == NULL) + return -1; /* Unknown node and !is_supervision, or no mem */ + + ethhdr = (struct ethhdr *) skb_mac_header(skb); + frame->is_vlan = false; + if (ethhdr->h_proto == htons(ETH_P_8021Q)) { + frame->is_vlan = true; + /* FIXME: */ + WARN_ONCE(1, "HSR: VLAN not yet supported"); + } + if (ethhdr->h_proto == htons(ETH_P_PRP)) { + frame->skb_std = NULL; + frame->skb_hsr = skb; + frame->sequence_nr = hsr_get_skb_sequence_nr(skb); + } else { + frame->skb_std = skb; + frame->skb_hsr = NULL; + /* Sequence nr for the master node */ + spin_lock_irqsave(&port->hsr->seqnr_lock, irqflags); + frame->sequence_nr = port->hsr->sequence_nr; + port->hsr->sequence_nr++; + spin_unlock_irqrestore(&port->hsr->seqnr_lock, irqflags); + } + + frame->port_rcv = port; + check_local_dest(port->hsr, skb, frame); + + return 0; +} + +/* Must be called holding rcu read lock (because of the port parameter) */ +void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) +{ + struct hsr_frame_info frame; + + if (skb_mac_header(skb) != skb->data) { + WARN_ONCE(1, "%s:%d: Malformed frame (port_src %s)\n", + __FILE__, __LINE__, port->dev->name); + goto out_drop; + } + + if (hsr_fill_frame_info(&frame, skb, port) < 0) + goto out_drop; + hsr_register_frame_in(frame.node_src, port, frame.sequence_nr); + hsr_forward_do(&frame); + + if (frame.skb_hsr != NULL) + kfree_skb(frame.skb_hsr); + if (frame.skb_std != NULL) + kfree_skb(frame.skb_std); + return; + +out_drop: + port->dev->stats.tx_dropped++; + kfree_skb(skb); +} diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h new file mode 100644 index 000000000000..5c5bc4b6b75f --- /dev/null +++ b/net/hsr/hsr_forward.h @@ -0,0 +1,20 @@ +/* Copyright 2011-2014 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + */ + +#ifndef __HSR_FORWARD_H +#define __HSR_FORWARD_H + +#include <linux/netdevice.h> +#include "hsr_main.h" + +void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port); + +#endif /* __HSR_FORWARD_H */ diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 83e58449366a..c709c13842cb 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * The HSR spec says never to forward the same frame twice on the same * interface. A frame is identified by its source MAC address and its HSR @@ -23,71 +23,68 @@ #include "hsr_netlink.h" -struct node_entry { - struct list_head mac_list; - unsigned char MacAddressA[ETH_ALEN]; - unsigned char MacAddressB[ETH_ALEN]; - enum hsr_dev_idx AddrB_if; /* The local slave through which AddrB - * frames are received from this node - */ - unsigned long time_in[HSR_MAX_SLAVE]; - bool time_in_stale[HSR_MAX_SLAVE]; - u16 seq_out[HSR_MAX_DEV]; - struct rcu_head rcu_head; +struct hsr_node { + struct list_head mac_list; + unsigned char MacAddressA[ETH_ALEN]; + unsigned char MacAddressB[ETH_ALEN]; + /* Local slave through which AddrB frames are received from this node */ + enum hsr_port_type AddrB_port; + unsigned long time_in[HSR_PT_PORTS]; + bool time_in_stale[HSR_PT_PORTS]; + u16 seq_out[HSR_PT_PORTS]; + struct rcu_head rcu_head; }; -/* TODO: use hash lists for mac addresses (linux/jhash.h)? */ +/* TODO: use hash lists for mac addresses (linux/jhash.h)? */ -/* Search for mac entry. Caller must hold rcu read lock. +/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, + * false otherwise. */ -static struct node_entry *find_node_by_AddrA(struct list_head *node_db, - const unsigned char addr[ETH_ALEN]) +static bool seq_nr_after(u16 a, u16 b) { - struct node_entry *node; - - list_for_each_entry_rcu(node, node_db, mac_list) { - if (ether_addr_equal(node->MacAddressA, addr)) - return node; - } + /* Remove inconsistency where + * seq_nr_after(a, b) == seq_nr_before(a, b) + */ + if ((int) b - a == 32768) + return false; - return NULL; + return (((s16) (b - a)) < 0); } +#define seq_nr_before(a, b) seq_nr_after((b), (a)) +#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b))) +#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) -/* Search for mac entry. Caller must hold rcu read lock. - */ -static struct node_entry *find_node_by_AddrB(struct list_head *node_db, - const unsigned char addr[ETH_ALEN]) +bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr) { - struct node_entry *node; + struct hsr_node *node; - list_for_each_entry_rcu(node, node_db, mac_list) { - if (ether_addr_equal(node->MacAddressB, addr)) - return node; + node = list_first_or_null_rcu(&hsr->self_node_db, struct hsr_node, + mac_list); + if (!node) { + WARN_ONCE(1, "HSR: No self node\n"); + return false; } - return NULL; -} + if (ether_addr_equal(addr, node->MacAddressA)) + return true; + if (ether_addr_equal(addr, node->MacAddressB)) + return true; + return false; +} /* Search for mac entry. Caller must hold rcu read lock. */ -struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb) +static struct hsr_node *find_node_by_AddrA(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]) { - struct node_entry *node; - struct ethhdr *ethhdr; - - if (!skb_mac_header_was_set(skb)) - return NULL; - - ethhdr = (struct ethhdr *) skb_mac_header(skb); + struct hsr_node *node; list_for_each_entry_rcu(node, node_db, mac_list) { - if (ether_addr_equal(node->MacAddressA, ethhdr->h_source)) - return node; - if (ether_addr_equal(node->MacAddressB, ethhdr->h_source)) + if (ether_addr_equal(node->MacAddressA, addr)) return node; } @@ -102,7 +99,7 @@ int hsr_create_self_node(struct list_head *self_node_db, unsigned char addr_a[ETH_ALEN], unsigned char addr_b[ETH_ALEN]) { - struct node_entry *node, *oldnode; + struct hsr_node *node, *oldnode; node = kmalloc(sizeof(*node), GFP_KERNEL); if (!node) @@ -113,7 +110,7 @@ int hsr_create_self_node(struct list_head *self_node_db, rcu_read_lock(); oldnode = list_first_or_null_rcu(self_node_db, - struct node_entry, mac_list); + struct hsr_node, mac_list); if (oldnode) { list_replace_rcu(&oldnode->mac_list, &node->mac_list); rcu_read_unlock(); @@ -128,135 +125,144 @@ int hsr_create_self_node(struct list_head *self_node_db, } -/* Add/merge node to the database of nodes. 'skb' must contain an HSR - * supervision frame. - * - If the supervision header's MacAddressA field is not yet in the database, - * this frame is from an hitherto unknown node - add it to the database. - * - If the sender's MAC address is not the same as its MacAddressA address, - * the node is using PICS_SUBS (address substitution). Record the sender's - * address as the node's MacAddressB. - * - * This function needs to work even if the sender node has changed one of its - * slaves' MAC addresses. In this case, there are four different cases described - * by (Addr-changed, received-from) pairs as follows. Note that changing the - * SlaveA address is equal to changing the node's own address: - * - * - (AddrB, SlaveB): The new AddrB will be recorded by PICS_SUBS code since - * node == NULL. - * - (AddrB, SlaveA): Will work as usual (the AddrB change won't be detected - * from this frame). - * - * - (AddrA, SlaveB): The old node will be found. We need to detect this and - * remove the node. - * - (AddrA, SlaveA): A new node will be registered (non-PICS_SUBS at first). - * The old one will be pruned after HSR_NODE_FORGET_TIME. - * - * We also need to detect if the sender's SlaveA and SlaveB cables have been - * swapped. +/* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA; + * seq_out is used to initialize filtering of outgoing duplicate frames + * originating from the newly added node. */ -struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, - struct node_entry *node, - struct sk_buff *skb, - enum hsr_dev_idx dev_idx) +struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], + u16 seq_out) { - struct hsr_sup_payload *hsr_sp; - struct hsr_ethhdr_sp *hsr_ethsup; - int i; + struct hsr_node *node; unsigned long now; - - hsr_ethsup = (struct hsr_ethhdr_sp *) skb_mac_header(skb); - hsr_sp = (struct hsr_sup_payload *) skb->data; - - if (node && !ether_addr_equal(node->MacAddressA, hsr_sp->MacAddressA)) { - /* Node has changed its AddrA, frame was received from SlaveB */ - list_del_rcu(&node->mac_list); - kfree_rcu(node, rcu_head); - node = NULL; - } - - if (node && (dev_idx == node->AddrB_if) && - !ether_addr_equal(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) { - /* Cables have been swapped */ - list_del_rcu(&node->mac_list); - kfree_rcu(node, rcu_head); - node = NULL; - } - - if (node && (dev_idx != node->AddrB_if) && - (node->AddrB_if != HSR_DEV_NONE) && - !ether_addr_equal(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) { - /* Cables have been swapped */ - list_del_rcu(&node->mac_list); - kfree_rcu(node, rcu_head); - node = NULL; - } - - if (node) - return node; - - node = find_node_by_AddrA(&hsr_priv->node_db, hsr_sp->MacAddressA); - if (node) { - /* Node is known, but frame was received from an unknown - * address. Node is PICS_SUBS capable; merge its AddrB. - */ - ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source); - node->AddrB_if = dev_idx; - return node; - } + int i; node = kzalloc(sizeof(*node), GFP_ATOMIC); if (!node) return NULL; - ether_addr_copy(node->MacAddressA, hsr_sp->MacAddressA); - ether_addr_copy(node->MacAddressB, hsr_ethsup->ethhdr.h_source); - if (!ether_addr_equal(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source)) - node->AddrB_if = dev_idx; - else - node->AddrB_if = HSR_DEV_NONE; + ether_addr_copy(node->MacAddressA, addr); /* We are only interested in time diffs here, so use current jiffies * as initialization. (0 could trigger an spurious ring error warning). */ now = jiffies; - for (i = 0; i < HSR_MAX_SLAVE; i++) + for (i = 0; i < HSR_PT_PORTS; i++) node->time_in[i] = now; - for (i = 0; i < HSR_MAX_DEV; i++) - node->seq_out[i] = ntohs(hsr_ethsup->hsr_sup.sequence_nr) - 1; + for (i = 0; i < HSR_PT_PORTS; i++) + node->seq_out[i] = seq_out; - list_add_tail_rcu(&node->mac_list, &hsr_priv->node_db); + list_add_tail_rcu(&node->mac_list, node_db); return node; } +/* Get the hsr_node from which 'skb' was sent. + */ +struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, + bool is_sup) +{ + struct hsr_node *node; + struct ethhdr *ethhdr; + u16 seq_out; + + if (!skb_mac_header_was_set(skb)) + return NULL; + + ethhdr = (struct ethhdr *) skb_mac_header(skb); + + list_for_each_entry_rcu(node, node_db, mac_list) { + if (ether_addr_equal(node->MacAddressA, ethhdr->h_source)) + return node; + if (ether_addr_equal(node->MacAddressB, ethhdr->h_source)) + return node; + } + + if (!is_sup) + return NULL; /* Only supervision frame may create node entry */ + + if (ethhdr->h_proto == htons(ETH_P_PRP)) { + /* Use the existing sequence_nr from the tag as starting point + * for filtering duplicate frames. + */ + seq_out = hsr_get_skb_sequence_nr(skb) - 1; + } else { + WARN_ONCE(1, "%s: Non-HSR frame\n", __func__); + seq_out = 0; + } + + return hsr_add_node(node_db, ethhdr->h_source, seq_out); +} + +/* Use the Supervision frame's info about an eventual MacAddressB for merging + * nodes that has previously had their MacAddressB registered as a separate + * node. + */ +void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, + struct hsr_port *port_rcv) +{ + struct hsr_node *node_real; + struct hsr_sup_payload *hsr_sp; + struct list_head *node_db; + int i; + + skb_pull(skb, sizeof(struct hsr_ethhdr_sp)); + hsr_sp = (struct hsr_sup_payload *) skb->data; + + if (ether_addr_equal(eth_hdr(skb)->h_source, hsr_sp->MacAddressA)) + /* Not sent from MacAddressB of a PICS_SUBS capable node */ + goto done; + + /* Merge node_curr (registered on MacAddressB) into node_real */ + node_db = &port_rcv->hsr->node_db; + node_real = find_node_by_AddrA(node_db, hsr_sp->MacAddressA); + if (!node_real) + /* No frame received from AddrA of this node yet */ + node_real = hsr_add_node(node_db, hsr_sp->MacAddressA, + HSR_SEQNR_START - 1); + if (!node_real) + goto done; /* No mem */ + if (node_real == node_curr) + /* Node has already been merged */ + goto done; + + ether_addr_copy(node_real->MacAddressB, eth_hdr(skb)->h_source); + for (i = 0; i < HSR_PT_PORTS; i++) { + if (!node_curr->time_in_stale[i] && + time_after(node_curr->time_in[i], node_real->time_in[i])) { + node_real->time_in[i] = node_curr->time_in[i]; + node_real->time_in_stale[i] = node_curr->time_in_stale[i]; + } + if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i])) + node_real->seq_out[i] = node_curr->seq_out[i]; + } + node_real->AddrB_port = port_rcv->type; + + list_del_rcu(&node_curr->mac_list); + kfree_rcu(node_curr, rcu_head); + +done: + skb_push(skb, sizeof(struct hsr_ethhdr_sp)); +} + /* 'skb' is a frame meant for this host, that is to be passed to upper layers. * - * If the frame was sent by a node's B interface, replace the sender + * If the frame was sent by a node's B interface, replace the source * address with that node's "official" address (MacAddressA) so that upper * layers recognize where it came from. */ -void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb) +void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb) { - struct ethhdr *ethhdr; - struct node_entry *node; - if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: Mac header not set\n", __func__); return; } - ethhdr = (struct ethhdr *) skb_mac_header(skb); - rcu_read_lock(); - node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source); - if (node) - ether_addr_copy(ethhdr->h_source, node->MacAddressA); - rcu_read_unlock(); + memcpy(ð_hdr(skb)->h_source, node->MacAddressA, ETH_ALEN); } - /* 'skb' is a frame meant for another host. - * 'hsr_dev_idx' is the HSR index of the outgoing device + * 'port' is the outgoing interface * * Substitute the target (dest) MAC address if necessary, so the it matches the * recipient interface MAC address, regardless of whether that is the @@ -264,47 +270,48 @@ void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb) * This is needed to keep the packets flowing through switches that learn on * which "side" the different interfaces are. */ -void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, - enum hsr_dev_idx dev_idx) +void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, + struct hsr_port *port) { - struct node_entry *node; + struct hsr_node *node_dst; - rcu_read_lock(); - node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest); - if (node && (node->AddrB_if == dev_idx)) - ether_addr_copy(ethhdr->h_dest, node->MacAddressB); - rcu_read_unlock(); -} + if (!skb_mac_header_was_set(skb)) { + WARN_ONCE(1, "%s: Mac header not set\n", __func__); + return; + } + if (!is_unicast_ether_addr(eth_hdr(skb)->h_dest)) + return; -/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, - * false otherwise. - */ -static bool seq_nr_after(u16 a, u16 b) -{ - /* Remove inconsistency where - * seq_nr_after(a, b) == seq_nr_before(a, b) - */ - if ((int) b - a == 32768) - return false; + node_dst = find_node_by_AddrA(&port->hsr->node_db, eth_hdr(skb)->h_dest); + if (!node_dst) { + WARN_ONCE(1, "%s: Unknown node\n", __func__); + return; + } + if (port->type != node_dst->AddrB_port) + return; + if (!node_dst->MacAddressB) { + WARN_ONCE(1, "%s: No MacAddressB\n", __func__); + return; + } - return (((s16) (b - a)) < 0); + ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->MacAddressB); } -#define seq_nr_before(a, b) seq_nr_after((b), (a)) -#define seq_nr_after_or_eq(a, b) (!seq_nr_before((a), (b))) -#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) -void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx) +void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, + u16 sequence_nr) { - if ((dev_idx < 0) || (dev_idx >= HSR_MAX_SLAVE)) { - WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); + /* Don't register incoming frames without a valid sequence number. This + * ensures entries of restarted nodes gets pruned so that they can + * re-register and resume communications. + */ + if (seq_nr_before(sequence_nr, node->seq_out[port->type])) return; - } - node->time_in[dev_idx] = jiffies; - node->time_in_stale[dev_idx] = false; -} + node->time_in[port->type] = jiffies; + node->time_in_stale[port->type] = false; +} /* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid * ethhdr->h_source address and skb->mac_header set. @@ -314,102 +321,87 @@ void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx) * 0 otherwise, or * negative error code on error */ -int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx, - struct sk_buff *skb) +int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, + u16 sequence_nr) { - struct hsr_ethhdr *hsr_ethhdr; - u16 sequence_nr; - - if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) { - WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); - return -EINVAL; - } - if (!skb_mac_header_was_set(skb)) { - WARN_ONCE(1, "%s: Mac header not set\n", __func__); - return -EINVAL; - } - hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); - - sequence_nr = ntohs(hsr_ethhdr->hsr_tag.sequence_nr); - if (seq_nr_before_or_eq(sequence_nr, node->seq_out[dev_idx])) + if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type])) return 1; - node->seq_out[dev_idx] = sequence_nr; + node->seq_out[port->type] = sequence_nr; return 0; } - -static bool is_late(struct node_entry *node, enum hsr_dev_idx dev_idx) +static struct hsr_port *get_late_port(struct hsr_priv *hsr, + struct hsr_node *node) { - enum hsr_dev_idx other; - - if (node->time_in_stale[dev_idx]) - return true; - - if (dev_idx == HSR_DEV_SLAVE_A) - other = HSR_DEV_SLAVE_B; - else - other = HSR_DEV_SLAVE_A; - - if (node->time_in_stale[other]) - return false; - - if (time_after(node->time_in[other], node->time_in[dev_idx] + - msecs_to_jiffies(MAX_SLAVE_DIFF))) - return true; + if (node->time_in_stale[HSR_PT_SLAVE_A]) + return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); + if (node->time_in_stale[HSR_PT_SLAVE_B]) + return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + + if (time_after(node->time_in[HSR_PT_SLAVE_B], + node->time_in[HSR_PT_SLAVE_A] + + msecs_to_jiffies(MAX_SLAVE_DIFF))) + return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); + if (time_after(node->time_in[HSR_PT_SLAVE_A], + node->time_in[HSR_PT_SLAVE_B] + + msecs_to_jiffies(MAX_SLAVE_DIFF))) + return hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); - return false; + return NULL; } /* Remove stale sequence_nr records. Called by timer every * HSR_LIFE_CHECK_INTERVAL (two seconds or so). */ -void hsr_prune_nodes(struct hsr_priv *hsr_priv) +void hsr_prune_nodes(unsigned long data) { - struct node_entry *node; + struct hsr_priv *hsr; + struct hsr_node *node; + struct hsr_port *port; unsigned long timestamp; unsigned long time_a, time_b; + hsr = (struct hsr_priv *) data; + rcu_read_lock(); - list_for_each_entry_rcu(node, &hsr_priv->node_db, mac_list) { + list_for_each_entry_rcu(node, &hsr->node_db, mac_list) { /* Shorthand */ - time_a = node->time_in[HSR_DEV_SLAVE_A]; - time_b = node->time_in[HSR_DEV_SLAVE_B]; + time_a = node->time_in[HSR_PT_SLAVE_A]; + time_b = node->time_in[HSR_PT_SLAVE_B]; /* Check for timestamps old enough to risk wrap-around */ if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET/2)) - node->time_in_stale[HSR_DEV_SLAVE_A] = true; + node->time_in_stale[HSR_PT_SLAVE_A] = true; if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET/2)) - node->time_in_stale[HSR_DEV_SLAVE_B] = true; + node->time_in_stale[HSR_PT_SLAVE_B] = true; /* Get age of newest frame from node. * At least one time_in is OK here; nodes get pruned long * before both time_ins can get stale */ timestamp = time_a; - if (node->time_in_stale[HSR_DEV_SLAVE_A] || - (!node->time_in_stale[HSR_DEV_SLAVE_B] && + if (node->time_in_stale[HSR_PT_SLAVE_A] || + (!node->time_in_stale[HSR_PT_SLAVE_B] && time_after(time_b, time_a))) timestamp = time_b; /* Warn of ring error only as long as we get frames at all */ if (time_is_after_jiffies(timestamp + msecs_to_jiffies(1.5*MAX_SLAVE_DIFF))) { - - if (is_late(node, HSR_DEV_SLAVE_A)) - hsr_nl_ringerror(hsr_priv, node->MacAddressA, - HSR_DEV_SLAVE_A); - else if (is_late(node, HSR_DEV_SLAVE_B)) - hsr_nl_ringerror(hsr_priv, node->MacAddressA, - HSR_DEV_SLAVE_B); + rcu_read_lock(); + port = get_late_port(hsr, node); + if (port != NULL) + hsr_nl_ringerror(hsr, node->MacAddressA, port); + rcu_read_unlock(); } /* Prune old entries */ if (time_is_before_jiffies(timestamp + msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { - hsr_nl_nodedown(hsr_priv, node->MacAddressA); + hsr_nl_nodedown(hsr, node->MacAddressA); list_del_rcu(&node->mac_list); /* Note that we need to free this entry later: */ kfree_rcu(node, rcu_head); @@ -419,21 +411,21 @@ void hsr_prune_nodes(struct hsr_priv *hsr_priv) } -void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, +void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]) { - struct node_entry *node; + struct hsr_node *node; if (!_pos) { - node = list_first_or_null_rcu(&hsr_priv->node_db, - struct node_entry, mac_list); + node = list_first_or_null_rcu(&hsr->node_db, + struct hsr_node, mac_list); if (node) ether_addr_copy(addr, node->MacAddressA); return node; } node = _pos; - list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) { + list_for_each_entry_continue_rcu(node, &hsr->node_db, mac_list) { ether_addr_copy(addr, node->MacAddressA); return node; } @@ -442,7 +434,7 @@ void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, } -int hsr_get_node_data(struct hsr_priv *hsr_priv, +int hsr_get_node_data(struct hsr_priv *hsr, const unsigned char *addr, unsigned char addr_b[ETH_ALEN], unsigned int *addr_b_ifindex, @@ -451,12 +443,13 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, int *if2_age, u16 *if2_seq) { - struct node_entry *node; + struct hsr_node *node; + struct hsr_port *port; unsigned long tdiff; rcu_read_lock(); - node = find_node_by_AddrA(&hsr_priv->node_db, addr); + node = find_node_by_AddrA(&hsr->node_db, addr); if (!node) { rcu_read_unlock(); return -ENOENT; /* No such entry */ @@ -464,8 +457,8 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, ether_addr_copy(addr_b, node->MacAddressB); - tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A]; - if (node->time_in_stale[HSR_DEV_SLAVE_A]) + tdiff = jiffies - node->time_in[HSR_PT_SLAVE_A]; + if (node->time_in_stale[HSR_PT_SLAVE_A]) *if1_age = INT_MAX; #if HZ <= MSEC_PER_SEC else if (tdiff > msecs_to_jiffies(INT_MAX)) @@ -474,8 +467,8 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, else *if1_age = jiffies_to_msecs(tdiff); - tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_B]; - if (node->time_in_stale[HSR_DEV_SLAVE_B]) + tdiff = jiffies - node->time_in[HSR_PT_SLAVE_B]; + if (node->time_in_stale[HSR_PT_SLAVE_B]) *if2_age = INT_MAX; #if HZ <= MSEC_PER_SEC else if (tdiff > msecs_to_jiffies(INT_MAX)) @@ -485,13 +478,15 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, *if2_age = jiffies_to_msecs(tdiff); /* Present sequence numbers as if they were incoming on interface */ - *if1_seq = node->seq_out[HSR_DEV_SLAVE_B]; - *if2_seq = node->seq_out[HSR_DEV_SLAVE_A]; + *if1_seq = node->seq_out[HSR_PT_SLAVE_B]; + *if2_seq = node->seq_out[HSR_PT_SLAVE_A]; - if ((node->AddrB_if != HSR_DEV_NONE) && hsr_priv->slave[node->AddrB_if]) - *addr_b_ifindex = hsr_priv->slave[node->AddrB_if]->ifindex; - else + if (node->AddrB_port != HSR_PT_NONE) { + port = hsr_port_get_hsr(hsr, node->AddrB_port); + *addr_b_ifindex = port->dev->ifindex; + } else { *addr_b_ifindex = -1; + } rcu_read_unlock(); diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index e6c4022030ad..438b40f98f5a 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,42 +6,43 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ -#ifndef _HSR_FRAMEREG_H -#define _HSR_FRAMEREG_H +#ifndef __HSR_FRAMEREG_H +#define __HSR_FRAMEREG_H #include "hsr_main.h" -struct node_entry; +struct hsr_node; -struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb); +struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], + u16 seq_out); +struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, + bool is_sup); +void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, + struct hsr_port *port); +bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr); -struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv, - struct node_entry *node, - struct sk_buff *skb, - enum hsr_dev_idx dev_idx); +void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb); +void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, + struct hsr_port *port); -void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb); -void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, - enum hsr_dev_idx dev_idx); +void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, + u16 sequence_nr); +int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, + u16 sequence_nr); -void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx); - -int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx, - struct sk_buff *skb); - -void hsr_prune_nodes(struct hsr_priv *hsr_priv); +void hsr_prune_nodes(unsigned long data); int hsr_create_self_node(struct list_head *self_node_db, unsigned char addr_a[ETH_ALEN], unsigned char addr_b[ETH_ALEN]); -void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos, +void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]); -int hsr_get_node_data(struct hsr_priv *hsr_priv, +int hsr_get_node_data(struct hsr_priv *hsr, const unsigned char *addr, unsigned char addr_b[ETH_ALEN], unsigned int *addr_b_ifindex, @@ -50,4 +51,4 @@ int hsr_get_node_data(struct hsr_priv *hsr_priv, int *if2_age, u16 *if2_seq); -#endif /* _HSR_FRAMEREG_H */ +#endif /* __HSR_FRAMEREG_H */ diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 3fee5218a691..779d28b65417 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,11 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com - * - * In addition to routines for registering and unregistering HSR support, this - * file also contains the receive routine that handles all incoming frames with - * Ethertype (protocol) ETH_P_PRP (HSRv0), and network device event handling. + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ #include <linux/netdevice.h> @@ -21,154 +17,71 @@ #include "hsr_device.h" #include "hsr_netlink.h" #include "hsr_framereg.h" - - -/* List of all registered virtual HSR devices */ -static LIST_HEAD(hsr_list); - -void register_hsr_master(struct hsr_priv *hsr_priv) -{ - list_add_tail_rcu(&hsr_priv->hsr_list, &hsr_list); -} - -void unregister_hsr_master(struct hsr_priv *hsr_priv) -{ - struct hsr_priv *hsr_priv_it; - - list_for_each_entry(hsr_priv_it, &hsr_list, hsr_list) - if (hsr_priv_it == hsr_priv) { - list_del_rcu(&hsr_priv_it->hsr_list); - return; - } -} - -bool is_hsr_slave(struct net_device *dev) -{ - struct hsr_priv *hsr_priv_it; - - list_for_each_entry_rcu(hsr_priv_it, &hsr_list, hsr_list) { - if (dev == hsr_priv_it->slave[0]) - return true; - if (dev == hsr_priv_it->slave[1]) - return true; - } - - return false; -} - - -/* If dev is a HSR slave device, return the virtual master device. Return NULL - * otherwise. - */ -static struct hsr_priv *get_hsr_master(struct net_device *dev) -{ - struct hsr_priv *hsr_priv; - - rcu_read_lock(); - list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list) - if ((dev == hsr_priv->slave[0]) || - (dev == hsr_priv->slave[1])) { - rcu_read_unlock(); - return hsr_priv; - } - - rcu_read_unlock(); - return NULL; -} - - -/* If dev is a HSR slave device, return the other slave device. Return NULL - * otherwise. - */ -static struct net_device *get_other_slave(struct hsr_priv *hsr_priv, - struct net_device *dev) -{ - if (dev == hsr_priv->slave[0]) - return hsr_priv->slave[1]; - if (dev == hsr_priv->slave[1]) - return hsr_priv->slave[0]; - - return NULL; -} +#include "hsr_slave.h" static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, void *ptr) { - struct net_device *slave, *other_slave; - struct hsr_priv *hsr_priv; - int old_operstate; + struct net_device *dev; + struct hsr_port *port, *master; + struct hsr_priv *hsr; int mtu_max; int res; - struct net_device *dev; dev = netdev_notifier_info_to_dev(ptr); - - hsr_priv = get_hsr_master(dev); - if (hsr_priv) { - /* dev is a slave device */ - slave = dev; - other_slave = get_other_slave(hsr_priv, slave); - } else { + port = hsr_port_get_rtnl(dev); + if (port == NULL) { if (!is_hsr_master(dev)) - return NOTIFY_DONE; - hsr_priv = netdev_priv(dev); - slave = hsr_priv->slave[0]; - other_slave = hsr_priv->slave[1]; + return NOTIFY_DONE; /* Not an HSR device */ + hsr = netdev_priv(dev); + port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + } else { + hsr = port->hsr; } switch (event) { case NETDEV_UP: /* Administrative state DOWN */ case NETDEV_DOWN: /* Administrative state UP */ case NETDEV_CHANGE: /* Link (carrier) state changes */ - old_operstate = hsr_priv->dev->operstate; - hsr_set_carrier(hsr_priv->dev, slave, other_slave); - /* netif_stacked_transfer_operstate() cannot be used here since - * it doesn't set IF_OPER_LOWERLAYERDOWN (?) - */ - hsr_set_operstate(hsr_priv->dev, slave, other_slave); - hsr_check_announce(hsr_priv->dev, old_operstate); + hsr_check_carrier_and_operstate(hsr); break; case NETDEV_CHANGEADDR: - - /* This should not happen since there's no ndo_set_mac_address() - * for HSR devices - i.e. not supported. - */ - if (dev == hsr_priv->dev) + if (port->type == HSR_PT_MASTER) { + /* This should not happen since there's no + * ndo_set_mac_address() for HSR devices - i.e. not + * supported. + */ break; + } - if (dev == hsr_priv->slave[0]) - ether_addr_copy(hsr_priv->dev->dev_addr, - hsr_priv->slave[0]->dev_addr); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + + if (port->type == HSR_PT_SLAVE_A) { + ether_addr_copy(master->dev->dev_addr, dev->dev_addr); + call_netdevice_notifiers(NETDEV_CHANGEADDR, master->dev); + } /* Make sure we recognize frames from ourselves in hsr_rcv() */ - res = hsr_create_self_node(&hsr_priv->self_node_db, - hsr_priv->dev->dev_addr, - hsr_priv->slave[1] ? - hsr_priv->slave[1]->dev_addr : - hsr_priv->dev->dev_addr); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + res = hsr_create_self_node(&hsr->self_node_db, + master->dev->dev_addr, + port ? + port->dev->dev_addr : + master->dev->dev_addr); if (res) - netdev_warn(hsr_priv->dev, + netdev_warn(master->dev, "Could not update HSR node address.\n"); - - if (dev == hsr_priv->slave[0]) - call_netdevice_notifiers(NETDEV_CHANGEADDR, hsr_priv->dev); break; case NETDEV_CHANGEMTU: - if (dev == hsr_priv->dev) + if (port->type == HSR_PT_MASTER) break; /* Handled in ndo_change_mtu() */ - mtu_max = hsr_get_max_mtu(hsr_priv); - if (hsr_priv->dev->mtu > mtu_max) - dev_set_mtu(hsr_priv->dev, mtu_max); + mtu_max = hsr_get_max_mtu(port->hsr); + master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER); + master->dev->mtu = mtu_max; break; case NETDEV_UNREGISTER: - if (dev == hsr_priv->slave[0]) - hsr_priv->slave[0] = NULL; - if (dev == hsr_priv->slave[1]) - hsr_priv->slave[1] = NULL; - - /* There should really be a way to set a new slave device... */ - + hsr_del_port(port); break; case NETDEV_PRE_TYPE_CHANGE: /* HSR works only on Ethernet devices. Refuse slave to change @@ -181,255 +94,16 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, } -static struct timer_list prune_timer; - -static void prune_nodes_all(unsigned long data) -{ - struct hsr_priv *hsr_priv; - - rcu_read_lock(); - list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list) - hsr_prune_nodes(hsr_priv); - rcu_read_unlock(); - - prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); - add_timer(&prune_timer); -} - - -static struct sk_buff *hsr_pull_tag(struct sk_buff *skb) +struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt) { - struct hsr_tag *hsr_tag; - struct sk_buff *skb2; - - skb2 = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(!skb2)) - goto err_free; - skb = skb2; - - if (unlikely(!pskb_may_pull(skb, HSR_TAGLEN))) - goto err_free; + struct hsr_port *port; - hsr_tag = (struct hsr_tag *) skb->data; - skb->protocol = hsr_tag->encap_proto; - skb_pull(skb, HSR_TAGLEN); - - return skb; - -err_free: - kfree_skb(skb); + hsr_for_each_port(hsr, port) + if (port->type == pt) + return port; return NULL; } - -/* The uses I can see for these HSR supervision frames are: - * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type = - * 22") to reset any sequence_nr counters belonging to that node. Useful if - * the other node's counter has been reset for some reason. - * -- - * Or not - resetting the counter and bridging the frame would create a - * loop, unfortunately. - * - * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck - * frame is received from a particular node, we know something is wrong. - * We just register these (as with normal frames) and throw them away. - * - * 3) Allow different MAC addresses for the two slave interfaces, using the - * MacAddressA field. - */ -static bool is_supervision_frame(struct hsr_priv *hsr_priv, struct sk_buff *skb) -{ - struct hsr_sup_tag *hsr_stag; - - if (!ether_addr_equal(eth_hdr(skb)->h_dest, - hsr_priv->sup_multicast_addr)) - return false; - - hsr_stag = (struct hsr_sup_tag *) skb->data; - if (get_hsr_stag_path(hsr_stag) != 0x0f) - return false; - if ((hsr_stag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) && - (hsr_stag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK)) - return false; - if (hsr_stag->HSR_TLV_Length != 12) - return false; - - return true; -} - - -/* Implementation somewhat according to IEC-62439-3, p. 43 - */ -static int hsr_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct hsr_priv *hsr_priv; - struct net_device *other_slave; - struct node_entry *node; - bool deliver_to_self; - struct sk_buff *skb_deliver; - enum hsr_dev_idx dev_in_idx, dev_other_idx; - bool dup_out; - int ret; - - hsr_priv = get_hsr_master(dev); - - if (!hsr_priv) { - /* Non-HSR-slave device 'dev' is connected to a HSR network */ - kfree_skb(skb); - dev->stats.rx_errors++; - return NET_RX_SUCCESS; - } - - if (dev == hsr_priv->slave[0]) { - dev_in_idx = HSR_DEV_SLAVE_A; - dev_other_idx = HSR_DEV_SLAVE_B; - } else { - dev_in_idx = HSR_DEV_SLAVE_B; - dev_other_idx = HSR_DEV_SLAVE_A; - } - - node = hsr_find_node(&hsr_priv->self_node_db, skb); - if (node) { - /* Always kill frames sent by ourselves */ - kfree_skb(skb); - return NET_RX_SUCCESS; - } - - /* Is this frame a candidate for local reception? */ - deliver_to_self = false; - if ((skb->pkt_type == PACKET_HOST) || - (skb->pkt_type == PACKET_MULTICAST) || - (skb->pkt_type == PACKET_BROADCAST)) - deliver_to_self = true; - else if (ether_addr_equal(eth_hdr(skb)->h_dest, - hsr_priv->dev->dev_addr)) { - skb->pkt_type = PACKET_HOST; - deliver_to_self = true; - } - - - rcu_read_lock(); /* node_db */ - node = hsr_find_node(&hsr_priv->node_db, skb); - - if (is_supervision_frame(hsr_priv, skb)) { - skb_pull(skb, sizeof(struct hsr_sup_tag)); - node = hsr_merge_node(hsr_priv, node, skb, dev_in_idx); - if (!node) { - rcu_read_unlock(); /* node_db */ - kfree_skb(skb); - hsr_priv->dev->stats.rx_dropped++; - return NET_RX_DROP; - } - skb_push(skb, sizeof(struct hsr_sup_tag)); - deliver_to_self = false; - } - - if (!node) { - /* Source node unknown; this might be a HSR frame from - * another net (different multicast address). Ignore it. - */ - rcu_read_unlock(); /* node_db */ - kfree_skb(skb); - return NET_RX_SUCCESS; - } - - /* Register ALL incoming frames as outgoing through the other interface. - * This allows us to register frames as incoming only if they are valid - * for the receiving interface, without using a specific counter for - * incoming frames. - */ - dup_out = hsr_register_frame_out(node, dev_other_idx, skb); - if (!dup_out) - hsr_register_frame_in(node, dev_in_idx); - - /* Forward this frame? */ - if (!dup_out && (skb->pkt_type != PACKET_HOST)) - other_slave = get_other_slave(hsr_priv, dev); - else - other_slave = NULL; - - if (hsr_register_frame_out(node, HSR_DEV_MASTER, skb)) - deliver_to_self = false; - - rcu_read_unlock(); /* node_db */ - - if (!deliver_to_self && !other_slave) { - kfree_skb(skb); - /* Circulated frame; silently remove it. */ - return NET_RX_SUCCESS; - } - - skb_deliver = skb; - if (deliver_to_self && other_slave) { - /* skb_clone() is not enough since we will strip the hsr tag - * and do address substitution below - */ - skb_deliver = pskb_copy(skb, GFP_ATOMIC); - if (!skb_deliver) { - deliver_to_self = false; - hsr_priv->dev->stats.rx_dropped++; - } - } - - if (deliver_to_self) { - bool multicast_frame; - - skb_deliver = hsr_pull_tag(skb_deliver); - if (!skb_deliver) { - hsr_priv->dev->stats.rx_dropped++; - goto forward; - } -#if !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - /* Move everything in the header that is after the HSR tag, - * to work around alignment problems caused by the 6-byte HSR - * tag. In practice, this removes/overwrites the HSR tag in - * the header and restores a "standard" packet. - */ - memmove(skb_deliver->data - HSR_TAGLEN, skb_deliver->data, - skb_headlen(skb_deliver)); - - /* Adjust skb members so they correspond with the move above. - * This cannot possibly underflow skb->data since hsr_pull_tag() - * above succeeded. - * At this point in the protocol stack, the transport and - * network headers have not been set yet, and we haven't touched - * the mac header nor the head. So we only need to adjust data - * and tail: - */ - skb_deliver->data -= HSR_TAGLEN; - skb_deliver->tail -= HSR_TAGLEN; -#endif - skb_deliver->dev = hsr_priv->dev; - hsr_addr_subst_source(hsr_priv, skb_deliver); - multicast_frame = (skb_deliver->pkt_type == PACKET_MULTICAST); - ret = netif_rx(skb_deliver); - if (ret == NET_RX_DROP) { - hsr_priv->dev->stats.rx_dropped++; - } else { - hsr_priv->dev->stats.rx_packets++; - hsr_priv->dev->stats.rx_bytes += skb->len; - if (multicast_frame) - hsr_priv->dev->stats.multicast++; - } - } - -forward: - if (other_slave) { - skb_push(skb, ETH_HLEN); - skb->dev = other_slave; - dev_queue_xmit(skb); - } - - return NET_RX_SUCCESS; -} - - -static struct packet_type hsr_pt __read_mostly = { - .type = htons(ETH_P_PRP), - .func = hsr_rcv, -}; - static struct notifier_block hsr_nb = { .notifier_call = hsr_netdev_notify, /* Slave event notifications */ }; @@ -439,18 +113,9 @@ static int __init hsr_init(void) { int res; - BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_TAGLEN); - - dev_add_pack(&hsr_pt); - - init_timer(&prune_timer); - prune_timer.function = prune_nodes_all; - prune_timer.data = 0; - prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD); - add_timer(&prune_timer); + BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_HLEN); register_netdevice_notifier(&hsr_nb); - res = hsr_netlink_init(); return res; @@ -459,9 +124,7 @@ static int __init hsr_init(void) static void __exit hsr_exit(void) { unregister_netdevice_notifier(&hsr_nb); - del_timer_sync(&prune_timer); hsr_netlink_exit(); - dev_remove_pack(&hsr_pt); } module_init(hsr_init); diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 56fe060c0ab1..5a9c69962ded 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,11 +6,11 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ -#ifndef _HSR_PRIVATE_H -#define _HSR_PRIVATE_H +#ifndef __HSR_PRIVATE_H +#define __HSR_PRIVATE_H #include <linux/netdevice.h> #include <linux/list.h> @@ -29,6 +29,7 @@ * each node differ before we notify of communication problem? */ #define MAX_SLAVE_DIFF 3000 /* ms */ +#define HSR_SEQNR_START (USHRT_MAX - 1024) /* How often shall we check for broken ring and remove node entries older than @@ -46,16 +47,16 @@ * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr, * encapsulated protocol } instead. + * + * Field names as defined in the IEC:2010 standard for HSR. */ -#define HSR_TAGLEN 6 - -/* Field names below as defined in the IEC:2010 standard for HSR. */ struct hsr_tag { __be16 path_and_LSDU_size; __be16 sequence_nr; __be16 encap_proto; } __packed; +#define HSR_HLEN 6 /* The helper functions below assumes that 'path' occupies the 4 most * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or @@ -136,31 +137,47 @@ struct hsr_ethhdr_sp { } __packed; -enum hsr_dev_idx { - HSR_DEV_NONE = -1, - HSR_DEV_SLAVE_A = 0, - HSR_DEV_SLAVE_B, - HSR_DEV_MASTER, +enum hsr_port_type { + HSR_PT_NONE = 0, /* Must be 0, used by framereg */ + HSR_PT_SLAVE_A, + HSR_PT_SLAVE_B, + HSR_PT_INTERLINK, + HSR_PT_MASTER, + HSR_PT_PORTS, /* This must be the last item in the enum */ +}; + +struct hsr_port { + struct list_head port_list; + struct net_device *dev; + struct hsr_priv *hsr; + enum hsr_port_type type; }; -#define HSR_MAX_SLAVE (HSR_DEV_SLAVE_B + 1) -#define HSR_MAX_DEV (HSR_DEV_MASTER + 1) struct hsr_priv { - struct list_head hsr_list; /* List of hsr devices */ struct rcu_head rcu_head; - struct net_device *dev; - struct net_device *slave[HSR_MAX_SLAVE]; - struct list_head node_db; /* Other HSR nodes */ + struct list_head ports; + struct list_head node_db; /* Known HSR nodes */ struct list_head self_node_db; /* MACs of slaves */ struct timer_list announce_timer; /* Supervision frame dispatch */ + struct timer_list prune_timer; int announce_count; u16 sequence_nr; spinlock_t seqnr_lock; /* locking for sequence_nr */ unsigned char sup_multicast_addr[ETH_ALEN]; }; -void register_hsr_master(struct hsr_priv *hsr_priv); -void unregister_hsr_master(struct hsr_priv *hsr_priv); -bool is_hsr_slave(struct net_device *dev); +#define hsr_for_each_port(hsr, port) \ + list_for_each_entry_rcu((port), &(hsr)->ports, port_list) + +struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt); + +/* Caller must ensure skb is a valid HSR frame */ +static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb) +{ + struct hsr_ethhdr *hsr_ethhdr; + + hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb); + return ntohs(hsr_ethhdr->hsr_tag.sequence_nr); +} -#endif /* _HSR_PRIVATE_H */ +#endif /* __HSR_PRIVATE_H */ diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 01a5261ac7a5..a2c7e4c0ac1e 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * Routines for handling Netlink messages for HSR. */ @@ -37,13 +37,17 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, struct net_device *link[2]; unsigned char multicast_spec; + if (!data) { + netdev_info(dev, "HSR: No slave devices specified\n"); + return -EINVAL; + } if (!data[IFLA_HSR_SLAVE1]) { - netdev_info(dev, "IFLA_HSR_SLAVE1 missing!\n"); + netdev_info(dev, "HSR: Slave1 device not specified\n"); return -EINVAL; } link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1])); if (!data[IFLA_HSR_SLAVE2]) { - netdev_info(dev, "IFLA_HSR_SLAVE2 missing!\n"); + netdev_info(dev, "HSR: Slave2 device not specified\n"); return -EINVAL; } link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2])); @@ -63,21 +67,33 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) { - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *port; + int res; - hsr_priv = netdev_priv(dev); + hsr = netdev_priv(dev); - if (hsr_priv->slave[0]) - if (nla_put_u32(skb, IFLA_HSR_SLAVE1, hsr_priv->slave[0]->ifindex)) - goto nla_put_failure; + res = 0; - if (hsr_priv->slave[1]) - if (nla_put_u32(skb, IFLA_HSR_SLAVE2, hsr_priv->slave[1]->ifindex)) - goto nla_put_failure; + rcu_read_lock(); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); + if (port) + res = nla_put_u32(skb, IFLA_HSR_SLAVE1, port->dev->ifindex); + rcu_read_unlock(); + if (res) + goto nla_put_failure; + + rcu_read_lock(); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + if (port) + res = nla_put_u32(skb, IFLA_HSR_SLAVE2, port->dev->ifindex); + rcu_read_unlock(); + if (res) + goto nla_put_failure; if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, - hsr_priv->sup_multicast_addr) || - nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr_priv->sequence_nr)) + hsr->sup_multicast_addr) || + nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr)) goto nla_put_failure; return 0; @@ -128,13 +144,13 @@ static const struct genl_multicast_group hsr_mcgrps[] = { * over one of the slave interfaces. This would indicate an open network ring * (i.e. a link has failed somewhere). */ -void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], - enum hsr_dev_idx dev_idx) +void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN], + struct hsr_port *port) { struct sk_buff *skb; void *msg_head; + struct hsr_port *master; int res; - int ifindex; skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) @@ -148,11 +164,7 @@ void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], if (res < 0) goto nla_put_failure; - if (hsr_priv->slave[dev_idx]) - ifindex = hsr_priv->slave[dev_idx]->ifindex; - else - ifindex = -1; - res = nla_put_u32(skb, HSR_A_IFINDEX, ifindex); + res = nla_put_u32(skb, HSR_A_IFINDEX, port->dev->ifindex); if (res < 0) goto nla_put_failure; @@ -165,16 +177,20 @@ nla_put_failure: kfree_skb(skb); fail: - netdev_warn(hsr_priv->dev, "Could not send HSR ring error message\n"); + rcu_read_lock(); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + netdev_warn(master->dev, "Could not send HSR ring error message\n"); + rcu_read_unlock(); } /* This is called when we haven't heard from the node with MAC address addr for * some time (just before the node is removed from the node table/list). */ -void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]) +void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN]) { struct sk_buff *skb; void *msg_head; + struct hsr_port *master; int res; skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); @@ -199,7 +215,10 @@ nla_put_failure: kfree_skb(skb); fail: - netdev_warn(hsr_priv->dev, "Could not send HSR node down\n"); + rcu_read_lock(); + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + netdev_warn(master->dev, "Could not send HSR node down\n"); + rcu_read_unlock(); } @@ -220,7 +239,8 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) /* For sending */ struct sk_buff *skb_out; void *msg_head; - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; + struct hsr_port *port; unsigned char hsr_node_addr_b[ETH_ALEN]; int hsr_node_if1_age; u16 hsr_node_if1_seq; @@ -267,8 +287,8 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) if (res < 0) goto nla_put_failure; - hsr_priv = netdev_priv(hsr_dev); - res = hsr_get_node_data(hsr_priv, + hsr = netdev_priv(hsr_dev); + res = hsr_get_node_data(hsr, (unsigned char *) nla_data(info->attrs[HSR_A_NODE_ADDR]), hsr_node_addr_b, &addr_b_ifindex, @@ -301,9 +321,12 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq); if (res < 0) goto nla_put_failure; - if (hsr_priv->slave[0]) + rcu_read_lock(); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); + if (port) res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX, - hsr_priv->slave[0]->ifindex); + port->dev->ifindex); + rcu_read_unlock(); if (res < 0) goto nla_put_failure; @@ -313,9 +336,14 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq); if (res < 0) goto nla_put_failure; - if (hsr_priv->slave[1]) + rcu_read_lock(); + port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); + if (port) res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX, - hsr_priv->slave[1]->ifindex); + port->dev->ifindex); + rcu_read_unlock(); + if (res < 0) + goto nla_put_failure; genlmsg_end(skb_out, msg_head); genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid); @@ -334,7 +362,7 @@ fail: return res; } -/* Get a list of MacAddressA of all nodes known to this node (other than self). +/* Get a list of MacAddressA of all nodes known to this node (including self). */ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) { @@ -345,7 +373,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) /* For sending */ struct sk_buff *skb_out; void *msg_head; - struct hsr_priv *hsr_priv; + struct hsr_priv *hsr; void *pos; unsigned char addr[ETH_ALEN]; int res; @@ -385,17 +413,17 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) if (res < 0) goto nla_put_failure; - hsr_priv = netdev_priv(hsr_dev); + hsr = netdev_priv(hsr_dev); rcu_read_lock(); - pos = hsr_get_next_node(hsr_priv, NULL, addr); + pos = hsr_get_next_node(hsr, NULL, addr); while (pos) { res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr); if (res < 0) { rcu_read_unlock(); goto nla_put_failure; } - pos = hsr_get_next_node(hsr_priv, pos, addr); + pos = hsr_get_next_node(hsr, pos, addr); } rcu_read_unlock(); diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h index d4579dcc3c7d..3f6b95b5b6b8 100644 --- a/net/hsr/hsr_netlink.h +++ b/net/hsr/hsr_netlink.h @@ -1,4 +1,4 @@ -/* Copyright 2011-2013 Autronica Fire and Security AS +/* Copyright 2011-2014 Autronica Fire and Security AS * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -6,7 +6,7 @@ * any later version. * * Author(s): - * 2011-2013 Arvid Brodin, arvid.brodin@xdin.com + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se */ #ifndef __HSR_NETLINK_H @@ -17,13 +17,14 @@ #include <uapi/linux/hsr_netlink.h> struct hsr_priv; +struct hsr_port; int __init hsr_netlink_init(void); void __exit hsr_netlink_exit(void); -void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN], - int dev_idx); -void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]); +void hsr_nl_ringerror(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN], + struct hsr_port *port); +void hsr_nl_nodedown(struct hsr_priv *hsr, unsigned char addr[ETH_ALEN]); void hsr_nl_framedrop(int dropcount, int dev_idx); void hsr_nl_linkdown(int dev_idx); diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c new file mode 100644 index 000000000000..a348dcbcd683 --- /dev/null +++ b/net/hsr/hsr_slave.c @@ -0,0 +1,196 @@ +/* Copyright 2011-2014 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + */ + +#include "hsr_slave.h" +#include <linux/etherdevice.h> +#include <linux/if_arp.h> +#include "hsr_main.h" +#include "hsr_device.h" +#include "hsr_forward.h" +#include "hsr_framereg.h" + + +static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct hsr_port *port; + + if (!skb_mac_header_was_set(skb)) { + WARN_ONCE(1, "%s: skb invalid", __func__); + return RX_HANDLER_PASS; + } + + rcu_read_lock(); /* hsr->node_db, hsr->ports */ + port = hsr_port_get_rcu(skb->dev); + + if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { + /* Directly kill frames sent by ourselves */ + kfree_skb(skb); + goto finish_consume; + } + + if (eth_hdr(skb)->h_proto != htons(ETH_P_PRP)) + goto finish_pass; + + skb_push(skb, ETH_HLEN); + + hsr_forward_skb(skb, port); + +finish_consume: + rcu_read_unlock(); /* hsr->node_db, hsr->ports */ + return RX_HANDLER_CONSUMED; + +finish_pass: + rcu_read_unlock(); /* hsr->node_db, hsr->ports */ + return RX_HANDLER_PASS; +} + +bool hsr_port_exists(const struct net_device *dev) +{ + return rcu_access_pointer(dev->rx_handler) == hsr_handle_frame; +} + + +static int hsr_check_dev_ok(struct net_device *dev) +{ + /* Don't allow HSR on non-ethernet like devices */ + if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) || + (dev->addr_len != ETH_ALEN)) { + netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n"); + return -EINVAL; + } + + /* Don't allow enslaving hsr devices */ + if (is_hsr_master(dev)) { + netdev_info(dev, "Cannot create trees of HSR devices.\n"); + return -EINVAL; + } + + if (hsr_port_exists(dev)) { + netdev_info(dev, "This device is already a HSR slave.\n"); + return -EINVAL; + } + + if (dev->priv_flags & IFF_802_1Q_VLAN) { + netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n"); + return -EINVAL; + } + + if (dev->priv_flags & IFF_DONT_BRIDGE) { + netdev_info(dev, "This device does not support bridging.\n"); + return -EOPNOTSUPP; + } + + /* HSR over bonded devices has not been tested, but I'm not sure it + * won't work... + */ + + return 0; +} + + +/* Setup device to be added to the HSR bridge. */ +static int hsr_portdev_setup(struct net_device *dev, struct hsr_port *port) +{ + int res; + + dev_hold(dev); + res = dev_set_promiscuity(dev, 1); + if (res) + goto fail_promiscuity; + + /* FIXME: + * What does net device "adjacency" mean? Should we do + * res = netdev_master_upper_dev_link(port->dev, port->hsr->dev); ? + */ + + res = netdev_rx_handler_register(dev, hsr_handle_frame, port); + if (res) + goto fail_rx_handler; + dev_disable_lro(dev); + + return 0; + +fail_rx_handler: + dev_set_promiscuity(dev, -1); +fail_promiscuity: + dev_put(dev); + + return res; +} + +int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, + enum hsr_port_type type) +{ + struct hsr_port *port, *master; + int res; + + if (type != HSR_PT_MASTER) { + res = hsr_check_dev_ok(dev); + if (res) + return res; + } + + port = hsr_port_get_hsr(hsr, type); + if (port != NULL) + return -EBUSY; /* This port already exists */ + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (port == NULL) + return -ENOMEM; + + if (type != HSR_PT_MASTER) { + res = hsr_portdev_setup(dev, port); + if (res) + goto fail_dev_setup; + } + + port->hsr = hsr; + port->dev = dev; + port->type = type; + + list_add_tail_rcu(&port->port_list, &hsr->ports); + synchronize_rcu(); + + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + netdev_update_features(master->dev); + dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + + return 0; + +fail_dev_setup: + kfree(port); + return res; +} + +void hsr_del_port(struct hsr_port *port) +{ + struct hsr_priv *hsr; + struct hsr_port *master; + + hsr = port->hsr; + master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + list_del_rcu(&port->port_list); + + if (port != master) { + netdev_update_features(master->dev); + dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + netdev_rx_handler_unregister(port->dev); + dev_set_promiscuity(port->dev, -1); + } + + /* FIXME? + * netdev_upper_dev_unlink(port->dev, port->hsr->dev); + */ + + synchronize_rcu(); + dev_put(port->dev); +} diff --git a/net/hsr/hsr_slave.h b/net/hsr/hsr_slave.h new file mode 100644 index 000000000000..3ccfbf71c92e --- /dev/null +++ b/net/hsr/hsr_slave.h @@ -0,0 +1,38 @@ +/* Copyright 2011-2014 Autronica Fire and Security AS + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Author(s): + * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + */ + +#ifndef __HSR_SLAVE_H +#define __HSR_SLAVE_H + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include "hsr_main.h" + +int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, + enum hsr_port_type pt); +void hsr_del_port(struct hsr_port *port); +bool hsr_port_exists(const struct net_device *dev); + +static inline struct hsr_port *hsr_port_get_rtnl(const struct net_device *dev) +{ + ASSERT_RTNL(); + return hsr_port_exists(dev) ? + rtnl_dereference(dev->rx_handler_data) : NULL; +} + +static inline struct hsr_port *hsr_port_get_rcu(const struct net_device *dev) +{ + return hsr_port_exists(dev) ? + rcu_dereference(dev->rx_handler_data) : NULL; +} + +#endif /* __HSR_SLAVE_H */ diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c index 211b5686d719..511ddeee7353 100644 --- a/net/ieee802154/6lowpan_iphc.c +++ b/net/ieee802154/6lowpan_iphc.c @@ -3,8 +3,7 @@ * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ -/* - * Based on patches from Jon Smirl <jonsmirl@gmail.com> +/* Based on patches from Jon Smirl <jonsmirl@gmail.com> * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -58,16 +57,15 @@ #include <net/ipv6.h> #include <net/af_ieee802154.h> -/* - * Uncompress address function for source and +/* Uncompress address function for source and * destination address(non-multicast). * * address_mode is sam value or dam value. */ static int uncompress_addr(struct sk_buff *skb, - struct in6_addr *ipaddr, const u8 address_mode, - const u8 *lladdr, const u8 addr_type, - const u8 addr_len) + struct in6_addr *ipaddr, const u8 address_mode, + const u8 *lladdr, const u8 addr_type, + const u8 addr_len) { bool fail; @@ -140,13 +138,12 @@ static int uncompress_addr(struct sk_buff *skb, return 0; } -/* - * Uncompress address function for source context +/* Uncompress address function for source context * based address(non-multicast). */ static int uncompress_context_based_src_addr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 sam) + struct in6_addr *ipaddr, + const u8 sam) { switch (sam) { case LOWPAN_IPHC_ADDR_00: @@ -175,13 +172,13 @@ static int uncompress_context_based_src_addr(struct sk_buff *skb, } static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr, - struct net_device *dev, skb_delivery_cb deliver_skb) + struct net_device *dev, skb_delivery_cb deliver_skb) { struct sk_buff *new; int stat; - new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb), - GFP_ATOMIC); + new = skb_copy_expand(skb, sizeof(struct ipv6hdr), + skb_tailroom(skb), GFP_ATOMIC); kfree_skb(skb); if (!new) @@ -196,7 +193,7 @@ static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr, new->dev = dev; raw_dump_table(__func__, "raw skb data dump before receiving", - new->data, new->len); + new->data, new->len); stat = deliver_skb(new, dev); @@ -210,8 +207,8 @@ static int skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr, */ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb, - struct in6_addr *ipaddr, - const u8 dam) + struct in6_addr *ipaddr, + const u8 dam) { bool fail; @@ -314,8 +311,7 @@ uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) fail |= lowpan_fetch_skb(skb, &uh->check, 2); } - /* - * UDP lenght needs to be infered from the lower layers + /* UDP lenght needs to be infered from the lower layers * here, we obtain the hint from the remaining size of the * frame */ @@ -338,16 +334,17 @@ err: static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 }; int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, - const u8 *saddr, const u8 saddr_type, const u8 saddr_len, - const u8 *daddr, const u8 daddr_type, const u8 daddr_len, - u8 iphc0, u8 iphc1, skb_delivery_cb deliver_skb) + const u8 *saddr, const u8 saddr_type, + const u8 saddr_len, const u8 *daddr, + const u8 daddr_type, const u8 daddr_len, + u8 iphc0, u8 iphc1, skb_delivery_cb deliver_skb) { struct ipv6hdr hdr = {}; u8 tmp, num_context = 0; int err; raw_dump_table(__func__, "raw skb data dump uncompressed", - skb->data, skb->len); + skb->data, skb->len); /* another if the CID flag is set */ if (iphc1 & LOWPAN_IPHC_CID) { @@ -360,8 +357,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, /* Traffic Class and Flow Label */ switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) { - /* - * Traffic Class and FLow Label carried in-line + /* Traffic Class and FLow Label carried in-line * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) */ case 0: /* 00b */ @@ -374,8 +370,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) | (hdr.flow_lbl[0] & 0x0f); break; - /* - * Traffic class carried in-line + /* Traffic class carried in-line * ECN + DSCP (1 byte), Flow Label is elided */ case 2: /* 10b */ @@ -385,8 +380,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, hdr.priority = ((tmp >> 2) & 0x0f); hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30); break; - /* - * Flow Label carried in-line + /* Flow Label carried in-line * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided */ case 1: /* 01b */ @@ -415,9 +409,9 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, } /* Hop Limit */ - if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) + if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) { hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03]; - else { + } else { if (lowpan_fetch_skb_u8(skb, &(hdr.hop_limit))) goto drop; } @@ -429,12 +423,12 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, /* Source address context based uncompression */ pr_debug("SAC bit is set. Handle context based source address.\n"); err = uncompress_context_based_src_addr( - skb, &hdr.saddr, tmp); + skb, &hdr.saddr, tmp); } else { /* Source address uncompression */ pr_debug("source address stateless compression\n"); err = uncompress_addr(skb, &hdr.saddr, tmp, saddr, - saddr_type, saddr_len); + saddr_type, saddr_len); } /* Check on error of previous branch */ @@ -457,9 +451,9 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, } } else { err = uncompress_addr(skb, &hdr.daddr, tmp, daddr, - daddr_type, daddr_len); + daddr_type, daddr_len); pr_debug("dest: stateless compression mode %d dest %pI6c\n", - tmp, &hdr.daddr); + tmp, &hdr.daddr); if (err) goto drop; } @@ -468,11 +462,11 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, if (iphc0 & LOWPAN_IPHC_NH_C) { struct udphdr uh; struct sk_buff *new; + if (uncompress_udp_header(skb, &uh)) goto drop; - /* - * replace the compressed UDP head by the uncompressed UDP + /* replace the compressed UDP head by the uncompressed UDP * header */ new = skb_copy_expand(skb, sizeof(struct udphdr), @@ -489,7 +483,7 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); raw_dump_table(__func__, "raw UDP header dump", - (u8 *)&uh, sizeof(uh)); + (u8 *)&uh, sizeof(uh)); hdr.nexthdr = UIP_PROTO_UDP; } @@ -504,8 +498,8 @@ int lowpan_process_data(struct sk_buff *skb, struct net_device *dev, hdr.version, ntohs(hdr.payload_len), hdr.nexthdr, hdr.hop_limit, &hdr.daddr); - raw_dump_table(__func__, "raw header dump", (u8 *)&hdr, - sizeof(hdr)); + raw_dump_table(__func__, "raw header dump", + (u8 *)&hdr, sizeof(hdr)); return skb_deliver(skb, &hdr, dev, deliver_skb); @@ -516,8 +510,8 @@ drop: EXPORT_SYMBOL_GPL(lowpan_process_data); static u8 lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, - const struct in6_addr *ipaddr, - const unsigned char *lladdr) + const struct in6_addr *ipaddr, + const unsigned char *lladdr) { u8 val = 0; @@ -530,14 +524,14 @@ static u8 lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, *hc06_ptr += 2; val = 2; /* 16-bits */ raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)", - *hc06_ptr - 2, 2); + *hc06_ptr - 2, 2); } else { /* do not compress IID => xxxx::IID */ memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8); *hc06_ptr += 8; val = 1; /* 64-bits */ raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)", - *hc06_ptr - 8, 8); + *hc06_ptr - 8, 8); } return rol8(val, shift); @@ -601,8 +595,8 @@ static void compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) } int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, - unsigned short type, const void *_daddr, - const void *_saddr, unsigned int len) + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) { u8 tmp, iphc0, iphc1, *hc06_ptr; struct ipv6hdr *hdr; @@ -616,14 +610,13 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, pr_debug("IPv6 header dump:\n\tversion = %d\n\tlength = %d\n" "\tnexthdr = 0x%02x\n\thop_lim = %d\n\tdest = %pI6c\n", - hdr->version, ntohs(hdr->payload_len), hdr->nexthdr, - hdr->hop_limit, &hdr->daddr); + hdr->version, ntohs(hdr->payload_len), hdr->nexthdr, + hdr->hop_limit, &hdr->daddr); raw_dump_table(__func__, "raw skb network header dump", - skb_network_header(skb), sizeof(struct ipv6hdr)); + skb_network_header(skb), sizeof(struct ipv6hdr)); - /* - * As we copy some bit-length fields, in the IPHC encoding bytes, + /* As we copy some bit-length fields, in the IPHC encoding bytes, * we sometimes use |= * If the field is 0, and the current bit value in memory is 1, * this does not work. We therefore reset the IPHC encoding here @@ -639,11 +632,10 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, (unsigned char *)_daddr, IEEE802154_ADDR_LEN); raw_dump_table(__func__, - "sending raw skb network uncompressed packet", - skb->data, skb->len); + "sending raw skb network uncompressed packet", + skb->data, skb->len); - /* - * Traffic class, flow label + /* Traffic class, flow label * If flow label is 0, compress it. If traffic class is 0, compress it * We have to process both in the same time as the offset of traffic * class depends on the presence of version and flow label @@ -654,11 +646,11 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, tmp = ((tmp & 0x03) << 6) | (tmp >> 2); if (((hdr->flow_lbl[0] & 0x0F) == 0) && - (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { + (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) { /* flow label can be compressed */ iphc0 |= LOWPAN_IPHC_FL_C; if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { + ((hdr->flow_lbl[0] & 0xF0) == 0)) { /* compress (elide) all */ iphc0 |= LOWPAN_IPHC_TC_C; } else { @@ -669,7 +661,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, } else { /* Flow label cannot be compressed */ if ((hdr->priority == 0) && - ((hdr->flow_lbl[0] & 0xF0) == 0)) { + ((hdr->flow_lbl[0] & 0xF0) == 0)) { /* compress only traffic class */ iphc0 |= LOWPAN_IPHC_TC_C; *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F); @@ -695,8 +687,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, hc06_ptr += 1; } - /* - * Hop limit + /* Hop limit * if 1: compress, encoding is 01 * if 64: compress, encoding is 10 * if 255: compress, encoding is 11 @@ -793,7 +784,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, pr_debug("header len %d skb %u\n", (int)(hc06_ptr - head), skb->len); raw_dump_table(__func__, "raw skb data dump compressed", - skb->data, skb->len); + skb->data, skb->len); return 0; } EXPORT_SYMBOL_GPL(lowpan_header_compress); diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c index fe6bd7a71081..016b77ee88f0 100644 --- a/net/ieee802154/6lowpan_rtnl.c +++ b/net/ieee802154/6lowpan_rtnl.c @@ -80,14 +80,14 @@ lowpan_dev_info *lowpan_dev_info(const struct net_device *dev) static inline void lowpan_address_flip(u8 *src, u8 *dest) { int i; + for (i = 0; i < IEEE802154_ADDR_LEN; i++) (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i]; } -static int lowpan_header_create(struct sk_buff *skb, - struct net_device *dev, - unsigned short type, const void *_daddr, - const void *_saddr, unsigned int len) +static int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *_daddr, + const void *_saddr, unsigned int len) { const u8 *saddr = _saddr; const u8 *daddr = _daddr; @@ -144,7 +144,7 @@ static int lowpan_header_create(struct sk_buff *skb, } static int lowpan_give_skb_to_devices(struct sk_buff *skb, - struct net_device *dev) + struct net_device *dev) { struct lowpan_dev_record *entry; struct sk_buff *skb_cp; @@ -368,24 +368,28 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_phy(real_dev); } static __le16 lowpan_get_pan_id(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_pan_id(real_dev); } static __le16 lowpan_get_short_addr(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); } static u8 lowpan_get_dsn(const struct net_device *dev) { struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); } @@ -454,7 +458,7 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) } static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) + struct packet_type *pt, struct net_device *orig_dev) { struct ieee802154_hdr hdr; int ret; diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 351d9a94ec2f..29e0de63001b 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -40,9 +40,7 @@ #include "af802154.h" -/* - * Utility function for families - */ +/* Utility function for families */ struct net_device* ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) { @@ -87,8 +85,8 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) rtnl_unlock(); break; default: - pr_warning("Unsupported ieee802154 address type: %d\n", - addr->mode); + pr_warn("Unsupported ieee802154 address type: %d\n", + addr->mode); break; } @@ -106,7 +104,7 @@ static int ieee802154_sock_release(struct socket *sock) return 0; } static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) + struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; @@ -114,7 +112,7 @@ static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, - int addr_len) + int addr_len) { struct sock *sk = sock->sk; @@ -125,7 +123,7 @@ static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, } static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) + int addr_len, int flags) { struct sock *sk = sock->sk; @@ -139,7 +137,7 @@ static int ieee802154_sock_connect(struct socket *sock, struct sockaddr *uaddr, } static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, - unsigned int cmd) + unsigned int cmd) { struct ifreq ifr; int ret = -ENOIOCTLCMD; @@ -167,7 +165,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, } static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) + unsigned long arg) { struct sock *sk = sock->sk; @@ -238,8 +236,7 @@ static const struct proto_ops ieee802154_dgram_ops = { }; -/* - * Create a socket. Initialise the socket, blank the addresses +/* Create a socket. Initialise the socket, blank the addresses * set the state. */ static int ieee802154_create(struct net *net, struct socket *sock, @@ -301,13 +298,14 @@ static const struct net_proto_family ieee802154_family_ops = { }; static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) + struct packet_type *pt, struct net_device *orig_dev) { if (!netif_running(dev)) goto drop; pr_debug("got frame, type %d, dev %p\n", dev->type, dev); #ifdef DEBUG - print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len); + print_hex_dump_bytes("ieee802154_rcv ", + DUMP_PREFIX_NONE, skb->data, skb->len); #endif if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 4f0ed8780194..ef2ad8aaef13 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -149,8 +149,7 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) { - /* - * We will only return the amount + /* We will only return the amount * of this packet since that is all * that will be read. */ @@ -161,12 +160,13 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) } } + return -ENOIOCTLCMD; } /* FIXME: autobind */ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, - int len) + int len) { struct sockaddr_ieee802154 *addr = (struct sockaddr_ieee802154 *)uaddr; struct dgram_sock *ro = dgram_sk(sk); @@ -205,7 +205,7 @@ static int dgram_disconnect(struct sock *sk, int flags) } static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) + struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -248,8 +248,8 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, - &err); + msg->msg_flags & MSG_DONTWAIT, + &err); if (!skb) goto out_dev; @@ -262,7 +262,8 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, cb->ackreq = ro->want_ack; if (msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name); + DECLARE_SOCKADDR(struct sockaddr_ieee802154*, + daddr, msg->msg_name); ieee802154_addr_from_sa(&dst_addr, &daddr->addr); } else { @@ -304,8 +305,8 @@ out: } static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, int flags, - int *addr_len) + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -398,6 +399,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) dgram_sk(sk))) { if (prev) { struct sk_buff *clone; + clone = skb_clone(skb, GFP_ATOMIC); if (clone) dgram_rcv_skb(prev, clone); @@ -407,9 +409,9 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) } } - if (prev) + if (prev) { dgram_rcv_skb(prev, skb); - else { + } else { kfree_skb(skb); ret = NET_RX_DROP; } @@ -419,7 +421,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb) } static int dgram_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen) { struct dgram_sock *ro = dgram_sk(sk); @@ -463,7 +465,7 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname, } static int dgram_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + char __user *optval, unsigned int optlen) { struct dgram_sock *ro = dgram_sk(sk); struct net *net = sock_net(sk); diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index 8b83a231299e..5d352f86979e 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -43,7 +43,7 @@ struct genl_info; struct sk_buff *ieee802154_nl_create(int flags, u8 req); int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group); struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, - int flags, u8 req); + int flags, u8 req); int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info); extern struct genl_family nl802154_family; diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 26efcf4fd2ff..9222966f5e6d 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -52,7 +52,7 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req) spin_lock_irqsave(&ieee802154_seq_lock, f); hdr = genlmsg_put(msg, 0, ieee802154_seq_num++, - &nl802154_family, flags, req); + &nl802154_family, flags, req); spin_unlock_irqrestore(&ieee802154_seq_lock, f); if (!hdr) { nlmsg_free(msg); @@ -86,7 +86,7 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, return NULL; hdr = genlmsg_put_reply(msg, info, - &nl802154_family, flags, req); + &nl802154_family, flags, req); if (!hdr) { nlmsg_free(msg); return NULL; diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index a3281b8bfd5b..c6bfe22bfa5e 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -60,7 +60,8 @@ static __le16 nla_get_shortaddr(const struct nlattr *nla) } int ieee802154_nl_assoc_indic(struct net_device *dev, - struct ieee802154_addr *addr, u8 cap) + struct ieee802154_addr *addr, + u8 cap) { struct sk_buff *msg; @@ -93,7 +94,7 @@ nla_put_failure: EXPORT_SYMBOL(ieee802154_nl_assoc_indic); int ieee802154_nl_assoc_confirm(struct net_device *dev, __le16 short_addr, - u8 status) + u8 status) { struct sk_buff *msg; @@ -119,7 +120,8 @@ nla_put_failure: EXPORT_SYMBOL(ieee802154_nl_assoc_confirm); int ieee802154_nl_disassoc_indic(struct net_device *dev, - struct ieee802154_addr *addr, u8 reason) + struct ieee802154_addr *addr, + u8 reason) { struct sk_buff *msg; @@ -205,8 +207,9 @@ nla_put_failure: EXPORT_SYMBOL(ieee802154_nl_beacon_indic); int ieee802154_nl_scan_confirm(struct net_device *dev, - u8 status, u8 scan_type, u32 unscanned, u8 page, - u8 *edl/* , struct list_head *pan_desc_list */) + u8 status, u8 scan_type, + u32 unscanned, u8 page, + u8 *edl/* , struct list_head *pan_desc_list */) { struct sk_buff *msg; @@ -260,7 +263,7 @@ nla_put_failure: EXPORT_SYMBOL(ieee802154_nl_start_confirm); static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, - u32 seq, int flags, struct net_device *dev) + u32 seq, int flags, struct net_device *dev) { void *hdr; struct wpan_phy *phy; @@ -270,7 +273,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, pr_debug("%s\n", __func__); hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, - IEEE802154_LIST_IFACE); + IEEE802154_LIST_IFACE); if (!hdr) goto out; @@ -330,14 +333,16 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { char name[IFNAMSIZ + 1]; + nla_strlcpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME], - sizeof(name)); + sizeof(name)); dev = dev_get_by_name(&init_net, name); - } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) + } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) { dev = dev_get_by_index(&init_net, nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX])); - else + } else { return NULL; + } if (!dev) return NULL; @@ -435,7 +440,7 @@ int ieee802154_disassociate_req(struct sk_buff *skb, struct genl_info *info) int ret = -EOPNOTSUPP; if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && - !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || + !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || !info->attrs[IEEE802154_ATTR_REASON]) return -EINVAL; @@ -464,8 +469,7 @@ out: return ret; } -/* - * PANid, channel, beacon_order = 15, superframe_order = 15, +/* PANid, channel, beacon_order = 15, superframe_order = 15, * PAN_coordinator, battery_life_extension = 0, * coord_realignment = 0, security_enable = 0 */ @@ -559,8 +563,8 @@ int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) page = 0; - ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, - duration); + ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, + page, duration); out: dev_put(dev); @@ -570,7 +574,8 @@ out: int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info) { /* Request for interface name, index, type, IEEE address, - PAN Id, short address */ + * PAN Id, short address + */ struct sk_buff *msg; struct net_device *dev = NULL; int rc = -ENOBUFS; @@ -586,7 +591,7 @@ int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info) goto out_dev; rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq, - 0, dev); + 0, dev); if (rc < 0) goto out_free; @@ -598,7 +603,6 @@ out_free: out_dev: dev_put(dev); return rc; - } int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb) @@ -616,7 +620,8 @@ int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb) goto cont; if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) + cb->nlh->nlmsg_seq, + NLM_F_MULTI, dev) < 0) break; cont: idx++; @@ -765,6 +770,7 @@ ieee802154_llsec_parse_key_id(struct genl_info *info, case IEEE802154_SCF_KEY_SHORT_INDEX: { u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]); + desc->short_source = cpu_to_le32(source); break; } @@ -842,7 +848,7 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info) goto out_dev; hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0, - IEEE802154_LLSEC_GETPARAMS); + IEEE802154_LLSEC_GETPARAMS); if (!hdr) goto out_free; @@ -946,7 +952,7 @@ struct llsec_dump_data { static int ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb, - int (*step)(struct llsec_dump_data*)) + int (*step)(struct llsec_dump_data *)) { struct net *net = sock_net(skb->sk); struct net_device *dev; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 89b265aea151..972baf83411a 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -36,7 +36,7 @@ #include "ieee802154.h" static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, - u32 seq, int flags, struct wpan_phy *phy) + u32 seq, int flags, struct wpan_phy *phy) { void *hdr; int i, pages = 0; @@ -48,7 +48,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, return -EMSGSIZE; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, - IEEE802154_LIST_PHY); + IEEE802154_LIST_PHY); if (!hdr) goto out; @@ -80,7 +80,8 @@ out: int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info) { /* Request for interface name, index, type, IEEE address, - PAN Id, short address */ + * PAN Id, short address + */ struct sk_buff *msg; struct wpan_phy *phy; const char *name; @@ -105,7 +106,7 @@ int ieee802154_list_phy(struct sk_buff *skb, struct genl_info *info) goto out_dev; rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq, - 0, phy); + 0, phy); if (rc < 0) goto out_free; @@ -117,7 +118,6 @@ out_free: out_dev: wpan_phy_put(phy); return rc; - } struct dump_phy_data { @@ -137,10 +137,10 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) return 0; rc = ieee802154_nl_fill_phy(data->skb, - NETLINK_CB(data->cb->skb).portid, - data->cb->nlh->nlmsg_seq, - NLM_F_MULTI, - phy); + NETLINK_CB(data->cb->skb).portid, + data->cb->nlh->nlmsg_seq, + NLM_F_MULTI, + phy); if (rc < 0) { data->idx--; @@ -238,10 +238,9 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) addr.sa_family = ARPHRD_IEEE802154; nla_memcpy(&addr.sa_data, info->attrs[IEEE802154_ATTR_HW_ADDR], - IEEE802154_ADDR_LEN); + IEEE802154_ADDR_LEN); - /* - * strangely enough, some callbacks (inetdev_event) from + /* strangely enough, some callbacks (inetdev_event) from * dev_set_mac_address require RTNL_LOCK */ rtnl_lock(); diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 74d54fae33d7..9d1f64806f02 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -96,7 +96,7 @@ out: } static int raw_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len) + int addr_len) { return -ENOTSUPP; } @@ -106,8 +106,8 @@ static int raw_disconnect(struct sock *sk, int flags) return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size) +static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -145,7 +145,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(sk, hlen + tlen + size, - msg->msg_flags & MSG_DONTWAIT, &err); + msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) goto out_dev; @@ -235,7 +235,6 @@ void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) bh_lock_sock(sk); if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) { - struct sk_buff *clone; clone = skb_clone(skb, GFP_ATOMIC); @@ -248,13 +247,13 @@ void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb) } static int raw_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen) { return -EOPNOTSUPP; } static int raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + char __user *optval, unsigned int optlen) { return -EOPNOTSUPP; } @@ -274,4 +273,3 @@ struct proto ieee802154_raw_prot = { .getsockopt = raw_getsockopt, .setsockopt = raw_setsockopt, }; - diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index 6f1428c4870b..b85bd3f7048e 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c @@ -378,6 +378,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) fq = fq_find(net, frag_info, &source, &dest); if (fq != NULL) { int ret; + spin_lock(&fq->q.lock); ret = lowpan_frag_queue(fq, skb, frag_type); spin_unlock(&fq->q.lock); diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index 8d6f6704da84..4955e0fe5883 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -48,7 +48,8 @@ MASTER_SHOW(transmit_power, "%d +- 1 dB"); MASTER_SHOW(cca_mode, "%d"); static ssize_t channels_supported_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); int ret; @@ -57,7 +58,7 @@ static ssize_t channels_supported_show(struct device *dev, mutex_lock(&phy->pib_lock); for (i = 0; i < 32; i++) { ret = snprintf(buf + len, PAGE_SIZE - len, - "%#09x\n", phy->channels_supported[i]); + "%#09x\n", phy->channels_supported[i]); if (ret < 0) break; len += ret; @@ -80,6 +81,7 @@ ATTRIBUTE_GROUPS(pmib); static void wpan_phy_release(struct device *d) { struct wpan_phy *phy = container_of(d, struct wpan_phy, dev); + kfree(phy); } @@ -121,11 +123,12 @@ static int wpan_phy_iter(struct device *dev, void *_data) { struct wpan_phy_iter_data *wpid = _data; struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); + return wpid->fn(phy, wpid->data); } int wpan_phy_for_each(int (*fn)(struct wpan_phy *phy, void *data), - void *data) + void *data) { struct wpan_phy_iter_data wpid = { .fn = fn, @@ -197,6 +200,7 @@ EXPORT_SYMBOL(wpan_phy_free); static int __init wpan_phy_class_init(void) { int rc; + rc = class_register(&wpan_phy_class); if (rc) goto err; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index a3095fdefbed..90c0e8386116 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -76,6 +76,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; + inet_set_txhash(sk); inet->inet_id = jiffies; sk_dst_set(sk, &rt->dst); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 097b3e7c1e8f..54b6731dab55 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -73,12 +73,7 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst, { struct dst_entry *old_dst; - if (dst) { - if (dst->flags & DST_NOCACHE) - dst = NULL; - else - dst_clone(dst); - } + dst_clone(dst); old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); dst_release(old_dst); } @@ -108,13 +103,14 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) rcu_read_lock(); dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); + if (dst && !atomic_inc_not_zero(&dst->__refcnt)) + dst = NULL; if (dst) { if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { - rcu_read_unlock(); tunnel_dst_reset(t); - return NULL; + dst_release(dst); + dst = NULL; } - dst_hold(dst); } rcu_read_unlock(); return (struct rtable *)dst; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b3e86ea7b71b..5bbef4fdcb43 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -143,8 +143,6 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ -__be32 ic_dev_xid; /* Device under configuration */ - /* vendor class identifier */ static char vendor_class_identifier[253] __initdata; @@ -654,6 +652,7 @@ static struct packet_type bootp_packet_type __initdata = { .func = ic_bootp_recv, }; +static __be32 ic_dev_xid; /* Device under configuration */ /* * Initialize DHCP/BOOTP extension fields in the request. @@ -1218,10 +1217,10 @@ static int __init ic_dynamic(void) get_random_bytes(&timeout, sizeof(timeout)); timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); for (;;) { +#ifdef IPCONFIG_BOOTP /* Track the device we are configuring */ ic_dev_xid = d->xid; -#ifdef IPCONFIG_BOOTP if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); #endif diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c86624b36a62..c0c75688896e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -170,7 +170,8 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, } EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); -__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +__u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb, + __u16 *mssp) { const struct iphdr *iph = ip_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 62e48cf84e60..9771563ab564 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -131,7 +131,7 @@ static bool tcp_fastopen_create_child(struct sock *sk, struct dst_entry *dst, struct request_sock *req) { - struct tcp_sock *tp = tcp_sk(sk); + struct tcp_sock *tp; struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; struct sock *child; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 40661fc1e233..bb684967f7a7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1162,7 +1162,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, unsigned int new_len = (pkt_len / mss) * mss; if (!in_sack && new_len < pkt_len) { new_len += mss; - if (new_len > skb->len) + if (new_len >= skb->len) return 0; } pkt_len = new_len; @@ -5877,3 +5877,153 @@ discard: return 0; } EXPORT_SYMBOL(tcp_rcv_state_process); + +static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + if (family == AF_INET) + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), + &ireq->ir_rmt_addr, port); +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI6/%u\n"), + &ireq->ir_v6_rmt_addr, port); +#endif +} + +int tcp_conn_request(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct sk_buff *skb) +{ + struct tcp_options_received tmp_opt; + struct request_sock *req; + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = NULL; + __u32 isn = TCP_SKB_CB(skb)->when; + bool want_cookie = false, fastopen; + struct flowi fl; + struct tcp_fastopen_cookie foc = { .len = -1 }; + int err; + + + /* TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if ((sysctl_tcp_syncookies == 2 || + inet_csk_reqsk_queue_is_full(sk)) && !isn) { + want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); + if (!want_cookie) + goto drop; + } + + + /* Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + goto drop; + } + + req = inet_reqsk_alloc(rsk_ops); + if (!req) + goto drop; + + tcp_rsk(req)->af_specific = af_ops; + + tcp_clear_options(&tmp_opt); + tmp_opt.mss_clamp = af_ops->mss_clamp; + tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); + + if (want_cookie && !tmp_opt.saw_tstamp) + tcp_clear_options(&tmp_opt); + + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; + tcp_openreq_init(req, &tmp_opt, skb, sk); + + af_ops->init_req(req, sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + + if (!want_cookie || tmp_opt.tstamp_ok) + TCP_ECN_create_request(req, skb, sock_net(sk)); + + if (want_cookie) { + isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); + req->cookie_ts = tmp_opt.tstamp_ok; + } else if (!isn) { + /* VJ's idea. We save last timestamp seen + * from the destination in peer table, when entering + * state TIME-WAIT, and check against it before + * accepting new connection request. + * + * If "isn" is not zero, this request hit alive + * timewait bucket, so that all the necessary checks + * are made in the function processing timewait state. + */ + if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { + bool strict; + + dst = af_ops->route_req(sk, &fl, req, &strict); + if (dst && strict && + !tcp_peer_is_proven(req, dst, true)) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); + goto drop_and_release; + } + } + /* Kill the following clause, if you dislike this way. */ + else if (!sysctl_tcp_syncookies && + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (sysctl_max_syn_backlog >> 2)) && + !tcp_peer_is_proven(req, dst, false)) { + /* Without syncookies last quarter of + * backlog is filled with destinations, + * proven to be alive. + * It means that we continue to communicate + * to destinations, already remembered + * to the moment of synflood. + */ + pr_drop_req(req, ntohs(tcp_hdr(skb)->source), + rsk_ops->family); + goto drop_and_release; + } + + isn = af_ops->init_seq(skb); + } + if (!dst) { + dst = af_ops->route_req(sk, &fl, req, NULL); + if (!dst) + goto drop_and_free; + } + + tcp_rsk(req)->snt_isn = isn; + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = af_ops->send_synack(sk, dst, &fl, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { + if (err || want_cookie) + goto drop_and_free; + + tcp_rsk(req)->listener = NULL; + af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + } + + return 0; + +drop_and_release: + dst_release(dst); +drop_and_free: + reqsk_free(req); +drop: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + return 0; +} +EXPORT_SYMBOL(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 77cccda1ad0c..1edc739b9da5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -99,7 +99,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) +static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, @@ -208,6 +208,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; + inet_set_txhash(sk); + inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; @@ -814,6 +816,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * socket. */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct flowi *fl, struct request_sock *req, u16 queue_mapping, struct tcp_fastopen_cookie *foc) @@ -837,24 +840,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); - if (!tcp_rsk(req)->snt_synack && !err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; } return err; } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) -{ - int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); - - if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - } - return res; -} - /* * IPv4 request_sock destructor. */ @@ -1237,161 +1227,68 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) #endif +static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(skb); +} + +static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict) +{ + struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); + + if (strict) { + if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) + *strict = true; + else + *strict = false; + } + + return dst; +} + struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), - .rtx_syn_ack = tcp_v4_rtx_synack, + .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, .syn_ack_timeout = tcp_syn_ack_timeout, }; -#ifdef CONFIG_TCP_MD5SIG static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { + .mss_clamp = TCP_MSS_DEFAULT, +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_reqsk_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, -}; #endif + .init_req = tcp_v4_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = cookie_v4_init_sequence, +#endif + .route_req = tcp_v4_route_req, + .init_seq = tcp_v4_init_sequence, + .send_synack = tcp_v4_send_synack, + .queue_hash_add = inet_csk_reqsk_queue_hash_add, +}; int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_options_received tmp_opt; - struct request_sock *req; - struct inet_request_sock *ireq; - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = NULL; - __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; - __u32 isn = TCP_SKB_CB(skb)->when; - bool want_cookie = false, fastopen; - struct flowi4 fl4; - struct tcp_fastopen_cookie foc = { .len = -1 }; - int err; - /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if ((sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); - if (!want_cookie) - goto drop; - } - - /* Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - goto drop; - } - - req = inet_reqsk_alloc(&tcp_request_sock_ops); - if (!req) - goto drop; - -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; -#endif - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = TCP_MSS_DEFAULT; - tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); - - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); - - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); - - ireq = inet_rsk(req); - ireq->ir_loc_addr = daddr; - ireq->ir_rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(skb); - ireq->ir_mark = inet_request_mark(sk, skb); - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - - if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); - - if (want_cookie) { - isn = cookie_v4_init_sequence(sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - } else if (!isn) { - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && - fl4.daddr == saddr) { - if (!tcp_peer_is_proven(req, dst, true)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), - &saddr, ntohs(tcp_hdr(skb)->source)); - goto drop_and_release; - } + return tcp_conn_request(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, skb); - isn = tcp_v4_init_sequence(skb); - } - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) - goto drop_and_free; - - tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_openreq_init_rwin(req, sk, dst); - fastopen = !want_cookie && - tcp_try_fastopen(sk, skb, req, &foc, dst); - err = tcp_v4_send_synack(sk, dst, req, - skb_get_queue_mapping(skb), &foc); - if (!fastopen) { - if (err || want_cookie) - goto drop_and_free; - - tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_rsk(req)->listener = NULL; - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - } - - return 0; - -drop_and_release: - dst_release(dst); -drop_and_free: - reqsk_free(req); drop: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; @@ -1439,6 +1336,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; inet_csk(newsk)->icsk_ext_hdr_len = 0; + inet_set_txhash(newsk); if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e68e0d4af6c9..1649988bd1b6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -298,7 +298,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_flowlabel = np->flow_label >> 12; - tw->tw_ipv6only = np->ipv6only; + tw->tw_ipv6only = sk->sk_ipv6only; } #endif diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d92bce0ea24e..bcee13c4627c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -916,6 +916,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; skb->destructor = tcp_wfree; + skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ @@ -3299,3 +3300,18 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } } + +int tcp_rtx_synack(struct sock *sk, struct request_sock *req) +{ + const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; + struct flowi fl; + int res; + + res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); + if (!res) { + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + } + return res; +} +EXPORT_SYMBOL(tcp_rtx_synack); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5667b3003af9..358edd2272ac 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -186,6 +186,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .accept_ra_from_local = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -222,6 +223,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .accept_ra_from_local = 0, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -4321,6 +4323,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; + array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; } static inline size_t inet6_ifla6_size(void) @@ -5168,6 +5171,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec }, { + .procname = "accept_ra_from_local", + .data = &ipv6_devconf.accept_ra_from_local, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { /* sentinel */ } }, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7cb4392690dd..2daa3a133e49 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -197,7 +197,7 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->ipv6only = net->ipv6.sysctl.bindv6only; + sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. @@ -294,7 +294,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Binding to v4-mapped address on a v6-only socket * makes no sense */ - if (np->ipv6only) { + if (sk->sk_ipv6only) { err = -EINVAL; goto out; } @@ -371,7 +371,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_type != IPV6_ADDR_ANY) { sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (addr_type != IPV6_ADDR_MAPPED) - np->ipv6only = 1; + sk->sk_ipv6only = 1; } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; @@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; + net->ipv6.sysctl.auto_flowlabels = 0; atomic_set(&net->ipv6.rt_genid, 0); err = ipv6_init_mibs(net); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c3bf2d2e519e..2753319524f1 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -199,6 +199,7 @@ ipv4_connected: NULL); sk->sk_state = TCP_ESTABLISHED; + ip6_set_txhash(sk); out: fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3873181ed856..365b2b6f3942 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, * Push down and install the IP header. */ ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = tunnel->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; @@ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); __be16 *p = (__be16 *)(ipv6h+1); - ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel); + ip6_flow_hdr(ipv6h, 0, + ip6_make_flowlabel(dev_net(dev), skb, + t->fl.u.ip6.flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cb9df0eb4023..fa83bdd4c3dd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - ip6_flow_hdr(hdr, tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); + ip6_flow_hdr(hdr, np->cork.tclass, + ip6_make_flowlabel(net, skb, fl6->flowlabel, + np->autoflowlabel)); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index afa082458360..51a1eb185ea7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_make_flowlabel(net, skb, fl6->flowlabel, false)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index edb58aff4ae7..b50b9e54cf53 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -235,7 +235,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optlen < sizeof(int) || inet_sk(sk)->inet_num) goto e_inval; - np->ipv6only = valbool; + sk->sk_ipv6only = valbool; retv = 0; break; @@ -834,6 +834,10 @@ pref_skip_coa: np->dontfrag = valbool; retv = 0; break; + case IPV6_AUTOFLOWLABEL: + np->autoflowlabel = valbool; + retv = 0; + break; } release_sock(sk); @@ -1058,7 +1062,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_V6ONLY: - val = np->ipv6only; + val = sk->sk_ipv6only; break; case IPV6_RECVPKTINFO: @@ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->dontfrag; break; + case IPV6_AUTOFLOWLABEL: + val = np->autoflowlabel; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ca8d4ea48a5d..b7ece278dd49 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1070,6 +1070,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); + ND_PRINTK(2, info, + "RA: %s, dev: %s\n", + __func__, skb->dev->name); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); return; @@ -1102,13 +1105,21 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - if (!ipv6_accept_ra(in6_dev)) + if (!ipv6_accept_ra(in6_dev)) { + ND_PRINTK(2, info, + "RA: %s, did not accept ra for dev: %s\n", + __func__, skb->dev->name); goto skip_linkparms; + } #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ - if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) + if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { + ND_PRINTK(2, info, + "RA: %s, nodetype is NODEFAULT, dev: %s\n", + __func__, skb->dev->name); goto skip_linkparms; + } #endif if (in6_dev->if_flags & IF_RS_SENT) { @@ -1130,11 +1141,24 @@ static void ndisc_router_discovery(struct sk_buff *skb) (ra_msg->icmph.icmp6_addrconf_other ? IF_RA_OTHERCONF : 0); - if (!in6_dev->cnf.accept_ra_defrtr) + if (!in6_dev->cnf.accept_ra_defrtr) { + ND_PRINTK(2, info, + "RA: %s, defrtr is false for dev: %s\n", + __func__, skb->dev->name); goto skip_defrtr; + } - if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + /* Do not accept RA with source-addr found on local machine unless + * accept_ra_from_local is set to true. + */ + if (!in6_dev->cnf.accept_ra_from_local && + ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, + NULL, 0)) { + ND_PRINTK(2, info, + "RA from local address detected on dev: %s: default router ignored\n", + skb->dev->name); goto skip_defrtr; + } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); @@ -1163,8 +1187,10 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = NULL; } + ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n", + rt, lifetime, skb->dev->name); if (rt == NULL && lifetime) { - ND_PRINTK(3, dbg, "RA: adding default router\n"); + ND_PRINTK(3, info, "RA: adding default router\n"); rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); if (rt == NULL) { @@ -1260,12 +1286,22 @@ skip_linkparms: NEIGH_UPDATE_F_ISROUTER); } - if (!ipv6_accept_ra(in6_dev)) + if (!ipv6_accept_ra(in6_dev)) { + ND_PRINTK(2, info, + "RA: %s, accept_ra is false for dev: %s\n", + __func__, skb->dev->name); goto out; + } #ifdef CONFIG_IPV6_ROUTE_INFO - if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0)) + if (!in6_dev->cnf.accept_ra_from_local && + ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, + NULL, 0)) { + ND_PRINTK(2, info, + "RA from local address detected on dev: %s: router info ignored.\n", + skb->dev->name); goto skip_routeinfo; + } if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; @@ -1293,8 +1329,12 @@ skip_routeinfo: #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ - if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) + if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { + ND_PRINTK(2, info, + "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", + __func__, skb->dev->name); goto out; + } #endif if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) { diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index a822b880689b..83cea1d39466 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -187,7 +187,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet6_reqsk_alloc(&tcp6_request_sock_ops); + req = inet_reqsk_alloc(&tcp6_request_sock_ops); if (!req) goto out; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 058f3eca2e53..5bf7b61f8ae8 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -39,6 +39,13 @@ static struct ctl_table ipv6_table_template[] = { .proc_handler = proc_dointvec }, { + .procname = "auto_flowlabels", + .data = &init_net.ipv6.sysctl.auto_flowlabels, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "fwmark_reflect", .data = &init_net.ipv6.sysctl.fwmark_reflect, .maxlen = sizeof(int), @@ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; + ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 229239ad96b1..22055b098428 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -198,6 +198,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; + ip6_set_txhash(sk); + /* * TCP over IPv4 */ @@ -470,13 +472,14 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, - struct flowi6 *fl6, + struct flowi *fl, struct request_sock *req, u16 queue_mapping, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; int err = -ENOMEM; @@ -503,18 +506,6 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) -{ - struct flowi6 fl6; - int res; - - res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL); - if (!res) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - } - return res; -} static void tcp_v6_reqsk_destructor(struct request_sock *req) { @@ -718,22 +709,66 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) } #endif +static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct ipv6_pinfo *np = inet6_sk(sk); + + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + + ireq->ir_iif = sk->sk_bound_dev_if; + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); + + if (!TCP_SKB_CB(skb)->when && + (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || + np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || + np->rxopt.bits.rxohlim || np->repflow)) { + atomic_inc(&skb->users); + ireq->pktopts = skb; + } +} + +static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict) +{ + if (strict) + *strict = true; + return inet6_csk_route_req(sk, &fl->u.ip6, req); +} + struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), - .rtx_syn_ack = tcp_v6_rtx_synack, + .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, .syn_ack_timeout = tcp_syn_ack_timeout, }; -#ifdef CONFIG_TCP_MD5SIG static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { + .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - + sizeof(struct ipv6hdr), +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_reqsk_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, -}; #endif + .init_req = tcp_v6_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = cookie_v6_init_sequence, +#endif + .route_req = tcp_v6_route_req, + .init_seq = tcp_v6_init_sequence, + .send_synack = tcp_v6_send_synack, + .queue_hash_add = inet6_csk_reqsk_queue_hash_add, +}; static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, @@ -973,153 +1008,17 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) return sk; } -/* FIXME: this is substantially similar to the ipv4 code. - * Can some kind of merge be done? -- erics - */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_options_received tmp_opt; - struct request_sock *req; - struct inet_request_sock *ireq; - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; - struct tcp_fastopen_cookie foc = { .len = -1 }; - bool want_cookie = false, fastopen; - struct flowi6 fl6; - int err; - if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) goto drop; - if ((sysctl_tcp_syncookies == 2 || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); - if (!want_cookie) - goto drop; - } - - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); - goto drop; - } - - req = inet6_reqsk_alloc(&tcp6_request_sock_ops); - if (req == NULL) - goto drop; - -#ifdef CONFIG_TCP_MD5SIG - tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops; -#endif - - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); - tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); + return tcp_conn_request(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, skb); - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); - - tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); - - ireq = inet_rsk(req); - ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; - ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - if (!want_cookie || tmp_opt.tstamp_ok) - TCP_ECN_create_request(req, skb, sock_net(sk)); - - ireq->ir_iif = sk->sk_bound_dev_if; - ireq->ir_mark = inet_request_mark(sk, skb); - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); - - if (!isn) { - if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || - np->repflow) { - atomic_inc(&skb->users); - ireq->pktopts = skb; - } - - if (want_cookie) { - isn = cookie_v6_init_sequence(sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - goto have_isn; - } - - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (tmp_opt.saw_tstamp && - tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { - if (!tcp_peer_is_proven(req, dst, true)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } - /* Kill the following clause, if you dislike this way. */ - else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false)) { - /* Without syncookies last quarter of - * backlog is filled with destinations, - * proven to be alive. - * It means that we continue to communicate - * to destinations, already remembered - * to the moment of synflood. - */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", - &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source)); - goto drop_and_release; - } - - isn = tcp_v6_init_sequence(skb); - } -have_isn: - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_release; - - if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL) - goto drop_and_free; - - tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_openreq_init_rwin(req, sk, dst); - fastopen = !want_cookie && - tcp_try_fastopen(sk, skb, req, &foc, dst); - err = tcp_v6_send_synack(sk, dst, &fl6, req, - skb_get_queue_mapping(skb), &foc); - if (!fastopen) { - if (err || want_cookie) - goto drop_and_free; - - tcp_rsk(req)->listener = NULL; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - } - return 0; - -drop_and_release: - dst_release(dst); -drop_and_free: - reqsk_free(req); drop: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return 0; /* don't send reset */ @@ -1235,6 +1134,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; + ip6_set_txhash(newsk); + /* Now IPv6 options... First: no IPv4 options. diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 95c834799288..c2bd28fd43e4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -79,7 +79,6 @@ static unsigned int udp6_ehashfn(struct net *net, int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -95,7 +94,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 1; if (addr_type == IPV6_ADDR_ANY && - !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) + !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) return 1; if (sk2_rcv_saddr6 && diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 98ad6ec4bd3c..a5f28d421ea8 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -1426,7 +1426,8 @@ __u8 *irlmp_hint_to_service(__u8 *hint) if (hint[1] & HINT_TELEPHONY) { IRDA_DEBUG(1, "Telephony "); service[i++] = S_TELEPHONY; - } if (hint[1] & HINT_FILE_SERVER) + } + if (hint[1] & HINT_FILE_SERVER) IRDA_DEBUG(1, "File Server "); if (hint[1] & HINT_COMM) { diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index 2cf66d885e68..9b54370f5e87 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -143,6 +143,7 @@ static void mac802154_del_iface(struct wpan_phy *phy, struct net_device *dev) { struct mac802154_sub_if_data *sdata; + ASSERT_RTNL(); sdata = netdev_priv(dev); @@ -276,7 +277,8 @@ ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops) } priv = wpan_phy_priv(phy); - priv->hw.phy = priv->phy = phy; + priv->phy = phy; + priv->hw.phy = priv->phy; priv->hw.priv = (char *)priv + ALIGN(sizeof(*priv), NETDEV_ALIGN); priv->ops = ops; @@ -302,29 +304,61 @@ EXPORT_SYMBOL(ieee802154_free_device); int ieee802154_register_device(struct ieee802154_dev *dev) { struct mac802154_priv *priv = mac802154_to_priv(dev); - int rc = -ENOMEM; + int rc = -ENOSYS; + + if (dev->flags & IEEE802154_HW_TXPOWER) { + if (!priv->ops->set_txpower) + goto out; + + priv->phy->set_txpower = mac802154_set_txpower; + } + + if (dev->flags & IEEE802154_HW_LBT) { + if (!priv->ops->set_lbt) + goto out; + + priv->phy->set_lbt = mac802154_set_lbt; + } + + if (dev->flags & IEEE802154_HW_CCA_MODE) { + if (!priv->ops->set_cca_mode) + goto out; + + priv->phy->set_cca_mode = mac802154_set_cca_mode; + } + + if (dev->flags & IEEE802154_HW_CCA_ED_LEVEL) { + if (!priv->ops->set_cca_ed_level) + goto out; + + priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level; + } + + if (dev->flags & IEEE802154_HW_CSMA_PARAMS) { + if (!priv->ops->set_csma_params) + goto out; + + priv->phy->set_csma_params = mac802154_set_csma_params; + } + + if (dev->flags & IEEE802154_HW_FRAME_RETRIES) { + if (!priv->ops->set_frame_retries) + goto out; + + priv->phy->set_frame_retries = mac802154_set_frame_retries; + } priv->dev_workqueue = create_singlethread_workqueue(wpan_phy_name(priv->phy)); - if (!priv->dev_workqueue) + if (!priv->dev_workqueue) { + rc = -ENOMEM; goto out; + } wpan_phy_set_dev(priv->phy, priv->hw.parent); priv->phy->add_iface = mac802154_add_iface; priv->phy->del_iface = mac802154_del_iface; - if (priv->ops->set_txpower) - priv->phy->set_txpower = mac802154_set_txpower; - if (priv->ops->set_lbt) - priv->phy->set_lbt = mac802154_set_lbt; - if (priv->ops->set_cca_mode) - priv->phy->set_cca_mode = mac802154_set_cca_mode; - if (priv->ops->set_cca_ed_level) - priv->phy->set_cca_ed_level = mac802154_set_cca_ed_level; - if (priv->ops->set_csma_params) - priv->phy->set_csma_params = mac802154_set_csma_params; - if (priv->ops->set_frame_retries) - priv->phy->set_frame_retries = mac802154_set_frame_retries; rc = wpan_phy_register(priv->phy); if (rc < 0) diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 1456f73b02b9..457058142098 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -538,6 +538,7 @@ static int llsec_recover_addr(struct mac802154_llsec *sec, struct ieee802154_addr *addr) { __le16 caddr = sec->params.coord_shortaddr; + addr->pan_id = sec->params.pan_id; if (caddr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index 15aa2f2b03a7..868a040fd422 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -175,9 +175,9 @@ static void phy_chan_notify(struct work_struct *work) mutex_lock(&priv->hw->phy->pib_lock); res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan); - if (res) + if (res) { pr_debug("set_channel failed\n"); - else { + } else { priv->hw->phy->current_channel = priv->chan; priv->hw->phy->current_page = priv->page; } @@ -210,8 +210,9 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan) INIT_WORK(&work->work, phy_chan_notify); work->dev = dev; queue_work(priv->hw->dev_workqueue, &work->work); - } else + } else { mutex_unlock(&priv->hw->phy->pib_lock); + } } diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 6d1647399d4f..8124353646ae 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -98,6 +98,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb, if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) { u16 crc = crc_ccitt(0, skb->data, skb->len); u8 *data = skb_put(skb, 2); + data[0] = crc & 0xff; data[1] = crc >> 8; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c42e83d2751c..581a6584ed0c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3778,6 +3778,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); + ip_vs_stop_estimator(net, &ipvs->tot_stats); } #else @@ -3840,7 +3841,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_trash_cleanup(net); - ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); remove_proc_entry("ip_vs_stats_percpu", net->proc_net); remove_proc_entry("ip_vs_stats", net->proc_net); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 58579634427d..300ed1eec729 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -597,6 +597,9 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) #ifdef CONFIG_NF_CONNTRACK_MARK + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ +#endif + ctnetlink_proto_size(ct) + ctnetlink_label_size(ct) ; @@ -1150,7 +1153,7 @@ static int ctnetlink_done_list(struct netlink_callback *cb) static int ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying) { - struct nf_conn *ct, *last = NULL; + struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); @@ -1163,8 +1166,7 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying if (cb->args[2]) return 0; - if (cb->args[0] == nr_cpu_ids) - return 0; + last = (struct nf_conn *)cb->args[1]; for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { struct ct_pcpu *pcpu; @@ -1174,7 +1176,6 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); spin_lock_bh(&pcpu->lock); - last = (struct nf_conn *)cb->args[1]; list = dying ? &pcpu->dying : &pcpu->unconfirmed; restart: hlist_nulls_for_each_entry(h, n, list, hnnode) { @@ -1193,7 +1194,9 @@ restart: ct); rcu_read_unlock(); if (res < 0) { - nf_conntrack_get(&ct->ct_general); + if (!atomic_inc_not_zero(&ct->ct_general.use)) + continue; + cb->args[0] = cpu; cb->args[1] = (unsigned long)ct; spin_unlock_bh(&pcpu->lock); goto out; @@ -1202,10 +1205,10 @@ restart: if (cb->args[1]) { cb->args[1] = 0; goto restart; - } else - cb->args[2] = 1; + } spin_unlock_bh(&pcpu->lock); } + cb->args[2] = 1; out: if (last) nf_ct_put(last); @@ -2040,6 +2043,9 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct) #ifdef CONFIG_NF_CONNTRACK_MARK + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ +#endif + ctnetlink_proto_size(ct) ; } diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 09096a670c45..a49907b1dabc 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -525,6 +525,39 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data) return i->status & IPS_NAT_MASK ? 1 : 0; } +static int nf_nat_proto_clean(struct nf_conn *ct, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + + if (nf_nat_proto_remove(ct, data)) + return 1; + + if (!nat || !nat->ct) + return 0; + + /* This netns is being destroyed, and conntrack has nat null binding. + * Remove it from bysource hash, as the table will be freed soon. + * + * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() + * will delete entry from already-freed table. + */ + if (!del_timer(&ct->timeout)) + return 1; + + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&nat->bysource); + ct->status &= ~IPS_NAT_DONE_MASK; + nat->ct = NULL; + spin_unlock_bh(&nf_nat_lock); + + add_timer(&ct->timeout); + + /* don't delete conntrack. Although that would make things a lot + * simpler, we'd end up flushing all conntracks on nat rmmod. + */ + return 0; +} + static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) { struct nf_nat_proto_clean clean = { @@ -795,7 +828,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; - nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0); + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0); synchronize_rcu(); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 624e083125b9..ab4566cfcbe4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1730,6 +1730,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (!create || nlh->nlmsg_flags & NLM_F_REPLACE) return -EINVAL; handle = nf_tables_alloc_handle(table); + + if (chain->use == UINT_MAX) + return -EOVERFLOW; } if (nla[NFTA_RULE_POSITION]) { @@ -1789,14 +1792,15 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_REPLACE) { if (nft_rule_is_active_next(net, old_rule)) { - trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, + trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, old_rule); if (trans == NULL) { err = -ENOMEM; goto err2; } nft_rule_disactivate_next(net, old_rule); - list_add_tail(&rule->list, &old_rule->list); + chain->use--; + list_add_tail_rcu(&rule->list, &old_rule->list); } else { err = -ENOENT; goto err2; @@ -1826,6 +1830,7 @@ err3: list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_clear(net, nft_trans_rule(trans)); nft_trans_destroy(trans); + chain->use++; } err2: nf_tables_rule_destroy(&ctx, rule); @@ -2845,7 +2850,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = NFPROTO_UNSPEC; + nfmsg->nfgen_family = ctx.afi->family; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 8a779be832fb..1840989092ed 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -195,6 +195,15 @@ static void nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; + void *info = nft_expr_priv(expr); + struct xt_tgdtor_param par; + + par.net = ctx->net; + par.target = target; + par.targinfo = info; + par.family = ctx->afi->family; + if (par.target->destroy != NULL) + par.target->destroy(&par); module_put(target->me); } @@ -382,6 +391,15 @@ static void nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_match *match = expr->ops->data; + void *info = nft_expr_priv(expr); + struct xt_mtdtor_param par; + + par.net = ctx->net; + par.match = match; + par.matchinfo = info; + par.family = ctx->afi->family; + if (par.match->destroy != NULL) + par.match->destroy(&par); module_put(match->me); } diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index a0195d28bcfc..79ff58cd36dc 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -175,12 +175,14 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min))) - goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) - goto nla_put_failure; + if (priv->sreg_proto_min) { + if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN, + htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, + htonl(priv->sreg_proto_max))) + goto nla_put_failure; + } return 0; nla_put_failure: diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 15c731f03fa6..e8c9f9704216 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1961,25 +1961,25 @@ struct netlink_broadcast_data { void *tx_data; }; -static int do_one_broadcast(struct sock *sk, - struct netlink_broadcast_data *p) +static void do_one_broadcast(struct sock *sk, + struct netlink_broadcast_data *p) { struct netlink_sock *nlk = nlk_sk(sk); int val; if (p->exclude_sk == sk) - goto out; + return; if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) - goto out; + return; if (!net_eq(sock_net(sk), p->net)) - goto out; + return; if (p->failure) { netlink_overrun(sk); - goto out; + return; } sock_hold(sk); @@ -2017,9 +2017,6 @@ static int do_one_broadcast(struct sock *sk, p->skb2 = NULL; } sock_put(sk); - -out: - return 0; } int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0d407bca81e3..fe95b6c224a7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2054,10 +2054,14 @@ static int __init dp_init(void) pr_info("Open vSwitch switching datapath\n"); - err = ovs_flow_init(); + err = ovs_internal_dev_rtnl_link_register(); if (err) goto error; + err = ovs_flow_init(); + if (err) + goto error_unreg_rtnl_link; + err = ovs_vport_init(); if (err) goto error_flow_exit; @@ -2084,6 +2088,8 @@ error_vport_exit: ovs_vport_exit(); error_flow_exit: ovs_flow_exit(); +error_unreg_rtnl_link: + ovs_internal_dev_rtnl_link_unregister(); error: return err; } @@ -2096,6 +2102,7 @@ static void dp_cleanup(void) rcu_barrier(); ovs_vport_exit(); ovs_flow_exit(); + ovs_internal_dev_rtnl_link_unregister(); } module_init(dp_init); diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 789af9280e77..295471a66c78 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -26,6 +26,7 @@ #include <net/dst.h> #include <net/xfrm.h> +#include <net/rtnetlink.h> #include "datapath.h" #include "vport-internal_dev.h" @@ -121,6 +122,10 @@ static const struct net_device_ops internal_dev_netdev_ops = { .ndo_get_stats64 = internal_dev_get_stats, }; +static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { + .kind = "openvswitch", +}; + static void do_setup(struct net_device *netdev) { ether_setup(netdev); @@ -131,6 +136,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev->destructor = internal_dev_destructor; netdev->ethtool_ops = &internal_dev_ethtool_ops; + netdev->rtnl_link_ops = &internal_dev_link_ops; netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | @@ -248,3 +254,13 @@ struct vport *ovs_internal_dev_get_vport(struct net_device *netdev) return internal_dev_priv(netdev)->vport; } + +int ovs_internal_dev_rtnl_link_register(void) +{ + return rtnl_link_register(&internal_dev_link_ops); +} + +void ovs_internal_dev_rtnl_link_unregister(void) +{ + rtnl_link_unregister(&internal_dev_link_ops); +} diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h index 9a7d30ecc6a2..1b179a190cff 100644 --- a/net/openvswitch/vport-internal_dev.h +++ b/net/openvswitch/vport-internal_dev.h @@ -24,5 +24,7 @@ int ovs_is_internal_dev(const struct net_device *); struct vport *ovs_internal_dev_get_vport(struct net_device *); +int ovs_internal_dev_rtnl_link_register(void); +void ovs_internal_dev_rtnl_link_unregister(void); #endif /* vport-internal_dev.h */ diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 0edbd95c60e7..d8b7e247bebf 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -143,8 +143,6 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) struct rtable *rt; struct flowi4 fl; __be16 src_port; - int port_min; - int port_max; __be16 df; int err; @@ -172,8 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->ignore_df = 1; - inet_get_local_port_range(net, &port_min, &port_max); - src_port = vxlan_src_port(port_min, port_max, skb); + src_port = udp_flow_src_port(net, skb, 0, 0, true); err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c index bfd34e4c1afc..7c292d474f47 100644 --- a/net/sched/em_canid.c +++ b/net/sched/em_canid.c @@ -125,7 +125,6 @@ static int em_canid_change(struct tcf_proto *tp, void *data, int len, { struct can_filter *conf = data; /* Array with rules */ struct canid_match *cm; - struct canid_match *cm_old = (struct canid_match *)m->data; int i; if (!len) @@ -181,12 +180,6 @@ static int em_canid_change(struct tcf_proto *tp, void *data, int len, m->datalen = sizeof(struct canid_match) + len; m->data = (unsigned long)cm; - - if (cm_old != NULL) { - pr_err("canid: Configuring an existing ematch!\n"); - kfree(cm_old); - } - return 0; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e1543b03e39d..fc04fe93c2da 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -108,7 +108,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, /* * Transmit one skb, and handle the return status as required. Holding the - * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this + * __QDISC___STATE_RUNNING bit guarantees that only one CPU can execute this * function. * * Returns to the caller: @@ -156,7 +156,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, /* * NOTE: Called under qdisc_lock(q) with locally disabled BH. * - * __QDISC_STATE_RUNNING guarantees only one CPU can process + * __QDISC___STATE_RUNNING guarantees only one CPU can process * this qdisc at a time. qdisc_lock(q) serializes queue accesses for * this queue. * diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 5c30b7a873df..3b4ffb021cf1 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ protocol.o endpointola.o associola.o \ transport.o chunk.o sm_make_chunk.o ulpevent.o \ - inqueue.o outqueue.o ulpqueue.o command.o \ + inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o ssnmap.o auth.o diff --git a/net/sctp/command.c b/net/sctp/command.c deleted file mode 100644 index dd7375851618..000000000000 --- a/net/sctp/command.c +++ /dev/null @@ -1,68 +0,0 @@ -/* SCTP kernel implementation Copyright (C) 1999-2001 - * Cisco, Motorola, and IBM - * Copyright 2001 La Monte H.P. Yarroll - * - * This file is part of the SCTP kernel implementation - * - * These functions manipulate sctp command sequences. - * - * This SCTP implementation is free software; - * you can redistribute it and/or modify it under the terms of - * the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This SCTP implementation is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * ************************ - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, see - * <http://www.gnu.org/licenses/>. - * - * Please send any bug reports or fixes you make to the - * email address(es): - * lksctp developers <linux-sctp@vger.kernel.org> - * - * Written or modified by: - * La Monte H.P. Yarroll <piggy@acm.org> - * Karl Knutson <karl@athena.chicago.il.us> - */ - -#include <linux/types.h> -#include <net/sctp/sctp.h> -#include <net/sctp/sm.h> - -/* Initialize a block of memory as a command sequence. */ -int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) -{ - memset(seq, 0, sizeof(sctp_cmd_seq_t)); - return 1; /* We always succeed. */ -} - -/* Add a command to a sctp_cmd_seq_t. - * Return 0 if the command sequence is full. - */ -void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, sctp_arg_t obj) -{ - BUG_ON(seq->next_free_slot >= SCTP_MAX_NUM_COMMANDS); - - seq->cmds[seq->next_free_slot].verb = verb; - seq->cmds[seq->next_free_slot++].obj = obj; -} - -/* Return the next command structure in a sctp_cmd_seq. - * Returns NULL at the end of the sequence. - */ -sctp_cmd_t *sctp_next_cmd(sctp_cmd_seq_t *seq) -{ - sctp_cmd_t *retval = NULL; - - if (seq->next_cmd < seq->next_free_slot) - retval = &seq->cmds[seq->next_cmd++]; - - return retval; -} - diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index dcb19592761e..2e9ada10fd84 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -321,41 +321,40 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; - char tmp[8]; struct ctl_table tbl; - int ret; - int changed = 0; + bool changed = false; char *none = "none"; + char tmp[8]; + int ret; memset(&tbl, 0, sizeof(struct ctl_table)); if (write) { tbl.data = tmp; - tbl.maxlen = 8; + tbl.maxlen = sizeof(tmp); } else { tbl.data = net->sctp.sctp_hmac_alg ? : none; tbl.maxlen = strlen(tbl.data); } - ret = proc_dostring(&tbl, write, buffer, lenp, ppos); - if (write) { + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { #ifdef CONFIG_CRYPTO_MD5 if (!strncmp(tmp, "md5", 3)) { net->sctp.sctp_hmac_alg = "md5"; - changed = 1; + changed = true; } #endif #ifdef CONFIG_CRYPTO_SHA1 if (!strncmp(tmp, "sha1", 4)) { net->sctp.sctp_hmac_alg = "sha1"; - changed = 1; + changed = true; } #endif if (!strncmp(tmp, "none", 4)) { net->sctp.sctp_hmac_alg = NULL; - changed = 1; + changed = true; } - if (!changed) ret = -EINVAL; } @@ -368,11 +367,10 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; - int new_value; - struct ctl_table tbl; unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; - int ret; + struct ctl_table tbl; + int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); @@ -381,12 +379,15 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, tbl.data = &new_value; else tbl.data = &net->sctp.rto_min; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); - if (write) { - if (ret || new_value > max || new_value < min) + if (write && ret == 0) { + if (new_value > max || new_value < min) return -EINVAL; + net->sctp.rto_min = new_value; } + return ret; } @@ -395,11 +396,10 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; - int new_value; - struct ctl_table tbl; unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; - int ret; + struct ctl_table tbl; + int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); @@ -408,12 +408,15 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, tbl.data = &new_value; else tbl.data = &net->sctp.rto_max; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); - if (write) { - if (ret || new_value > max || new_value < min) + if (write && ret == 0) { + if (new_value > max || new_value < min) return -EINVAL; + net->sctp.rto_max = new_value; } + return ret; } @@ -421,8 +424,9 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - pr_warn_once("Changing rto_alpha or rto_beta may lead to " - "suboptimal rtt/srtt estimations!\n"); + if (write) + pr_warn_once("Changing rto_alpha or rto_beta may lead to " + "suboptimal rtt/srtt estimations!\n"); return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); } @@ -444,8 +448,7 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write, tbl.data = &net->sctp.auth_enable; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); - - if (write) { + if (write && ret == 0) { struct sock *sk = net->sctp.ctl_sock; net->sctp.auth_enable = new_value; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 7dd672fa651f..b10e047bbd15 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -594,15 +594,16 @@ void sctp_transport_burst_reset(struct sctp_transport *t) } /* What is the next timeout value for this transport? */ -unsigned long sctp_transport_timeout(struct sctp_transport *t) +unsigned long sctp_transport_timeout(struct sctp_transport *trans) { - unsigned long timeout; - timeout = t->rto + sctp_jitter(t->rto); - if ((t->state != SCTP_UNCONFIRMED) && - (t->state != SCTP_PF)) - timeout += t->hbinterval; - timeout += jiffies; - return timeout; + /* RTO + timer slack +/- 50% of RTO */ + unsigned long timeout = (trans->rto >> 1) + prandom_u32_max(trans->rto); + + if (trans->state != SCTP_UNCONFIRMED && + trans->state != SCTP_PF) + timeout += trans->hbinterval; + + return timeout + jiffies; } /* Reset transport variables to their initial values */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 247e973544bf..f77366717420 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -592,6 +592,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) put_group_info(acred.group_info); return ret; } +EXPORT_SYMBOL_GPL(rpcauth_lookupcred); void rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, diff --git a/net/tipc/link.c b/net/tipc/link.c index ad2c57f5868d..a235b245f682 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -82,15 +82,14 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf); static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, struct sk_buff **buf); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); -static int tipc_link_iovec_long_xmit(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destnode); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); static int tipc_link_frag_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); +static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf); +static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf); /* * Simple link routines @@ -335,13 +334,15 @@ void tipc_link_delete_list(unsigned int bearer_id, bool shutting_down) static int link_schedule_port(struct tipc_link *l_ptr, u32 origport, u32 sz) { struct tipc_port *p_ptr; + struct tipc_sock *tsk; spin_lock_bh(&tipc_port_list_lock); p_ptr = tipc_port_lock(origport); if (p_ptr) { if (!list_empty(&p_ptr->wait_list)) goto exit; - p_ptr->congested = 1; + tsk = tipc_port_to_sock(p_ptr); + tsk->link_cong = 1; p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); l_ptr->stats.link_congs++; @@ -355,6 +356,7 @@ exit: void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) { struct tipc_port *p_ptr; + struct tipc_sock *tsk; struct tipc_port *temp_p_ptr; int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; @@ -370,10 +372,11 @@ void tipc_link_wakeup_ports(struct tipc_link *l_ptr, int all) wait_list) { if (win <= 0) break; + tsk = tipc_port_to_sock(p_ptr); list_del_init(&p_ptr->wait_list); spin_lock_bh(p_ptr->lock); - p_ptr->congested = 0; - tipc_port_wakeup(p_ptr); + tsk->link_cong = 0; + tipc_sock_wakeup(tsk); win -= p_ptr->waiting_pkts; spin_unlock_bh(p_ptr->lock); } @@ -850,6 +853,144 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector) return res; } +/* tipc_link_cong: determine return value and how to treat the + * sent buffer during link congestion. + * - For plain, errorless user data messages we keep the buffer and + * return -ELINKONG. + * - For all other messages we discard the buffer and return -EHOSTUNREACH + * - For TIPC internal messages we also reset the link + */ +static int tipc_link_cong(struct tipc_link *link, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + uint psz = msg_size(msg); + uint imp = tipc_msg_tot_importance(msg); + u32 oport = msg_tot_origport(msg); + + if (likely(imp <= TIPC_CRITICAL_IMPORTANCE)) { + if (!msg_errcode(msg) && !msg_reroute_cnt(msg)) { + link_schedule_port(link, oport, psz); + return -ELINKCONG; + } + } else { + pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + tipc_link_reset(link); + } + kfree_skb_list(buf); + return -EHOSTUNREACH; +} + +/** + * __tipc_link_xmit2(): same as tipc_link_xmit2, but destlink is known & locked + * @link: link to use + * @buf: chain of buffers containing message + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG, -EMSGSIZE (plain socket + * user data messages) or -EHOSTUNREACH (all other messages/senders) + * Only the socket functions tipc_send_stream() and tipc_send_packet() need + * to act on the return value, since they may need to do more send attempts. + */ +int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + uint psz = msg_size(msg); + uint qsz = link->out_queue_size; + uint sndlim = link->queue_limit[0]; + uint imp = tipc_msg_tot_importance(msg); + uint mtu = link->max_pkt; + uint ack = mod(link->next_in_no - 1); + uint seqno = link->next_out_no; + uint bc_last_in = link->owner->bclink.last_in; + struct tipc_media_addr *addr = &link->media_addr; + struct sk_buff *next = buf->next; + + /* Match queue limits against msg importance: */ + if (unlikely(qsz >= link->queue_limit[imp])) + return tipc_link_cong(link, buf); + + /* Has valid packet limit been used ? */ + if (unlikely(psz > mtu)) { + kfree_skb_list(buf); + return -EMSGSIZE; + } + + /* Prepare each packet for sending, and add to outqueue: */ + while (buf) { + next = buf->next; + msg = buf_msg(buf); + msg_set_word(msg, 2, ((ack << 16) | mod(seqno))); + msg_set_bcast_ack(msg, bc_last_in); + + if (!link->first_out) { + link->first_out = buf; + } else if (qsz < sndlim) { + link->last_out->next = buf; + } else if (tipc_msg_bundle(link->last_out, buf, mtu)) { + link->stats.sent_bundled++; + buf = next; + next = buf->next; + continue; + } else if (tipc_msg_make_bundle(&buf, mtu, link->addr)) { + link->stats.sent_bundled++; + link->stats.sent_bundles++; + link->last_out->next = buf; + if (!link->next_out) + link->next_out = buf; + } else { + link->last_out->next = buf; + if (!link->next_out) + link->next_out = buf; + } + + /* Send packet if possible: */ + if (likely(++qsz <= sndlim)) { + tipc_bearer_send(link->bearer_id, buf, addr); + link->next_out = next; + link->unacked_window = 0; + } + seqno++; + link->last_out = buf; + buf = next; + } + link->next_out_no = seqno; + link->out_queue_size = qsz; + return 0; +} + +/** + * tipc_link_xmit2() is the general link level function for message sending + * @buf: chain of buffers containing message + * @dsz: amount of user data to be sent + * @dnode: address of destination node + * @selector: a number used for deterministic link selection + * Consumes the buffer chain, except when returning -ELINKCONG + * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE + */ +int tipc_link_xmit2(struct sk_buff *buf, u32 dnode, u32 selector) +{ + struct tipc_link *link = NULL; + struct tipc_node *node; + int rc = -EHOSTUNREACH; + + node = tipc_node_find(dnode); + if (node) { + tipc_node_lock(node); + link = node->active_links[selector & 1]; + if (link) + rc = __tipc_link_xmit2(link, buf); + tipc_node_unlock(node); + } + + if (link) + return rc; + + if (likely(in_own_node(dnode))) + return tipc_sk_rcv(buf); + + kfree_skb_list(buf); + return rc; +} + /* * tipc_link_sync_xmit - synchronize broadcast link endpoints. * @@ -933,252 +1074,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest) } /* - * tipc_link_xmit_fast: Entry for data messages where the - * destination link is known and the header is complete, - * inclusive total message length. Very time critical. - * Link is locked. Returns user data length. - */ -static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf, - u32 *used_max_pkt) -{ - struct tipc_msg *msg = buf_msg(buf); - int res = msg_data_sz(msg); - - if (likely(!link_congested(l_ptr))) { - if (likely(msg_size(msg) <= l_ptr->max_pkt)) { - link_add_to_outqueue(l_ptr, buf, msg); - tipc_bearer_send(l_ptr->bearer_id, buf, - &l_ptr->media_addr); - l_ptr->unacked_window = 0; - return res; - } - else - *used_max_pkt = l_ptr->max_pkt; - } - return __tipc_link_xmit(l_ptr, buf); /* All other cases */ -} - -/* - * tipc_link_iovec_xmit_fast: Entry for messages where the - * destination processor is known and the header is complete, - * except for total message length. - * Returns user data length or errno. - */ -int tipc_link_iovec_xmit_fast(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) -{ - struct tipc_msg *hdr = &sender->phdr; - struct tipc_link *l_ptr; - struct sk_buff *buf; - struct tipc_node *node; - int res; - u32 selector = msg_origport(hdr) & 1; - -again: - /* - * Try building message using port's max_pkt hint. - * (Must not hold any locks while building message.) - */ - res = tipc_msg_build(hdr, msg_sect, len, sender->max_pkt, &buf); - /* Exit if build request was invalid */ - if (unlikely(res < 0)) - return res; - - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[selector]; - if (likely(l_ptr)) { - if (likely(buf)) { - res = tipc_link_xmit_fast(l_ptr, buf, - &sender->max_pkt); -exit: - tipc_node_unlock(node); - return res; - } - - /* Exit if link (or bearer) is congested */ - if (link_congested(l_ptr)) { - res = link_schedule_port(l_ptr, - sender->ref, res); - goto exit; - } - - /* - * Message size exceeds max_pkt hint; update hint, - * then re-try fast path or fragment the message - */ - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - - - if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) - goto again; - - return tipc_link_iovec_long_xmit(sender, msg_sect, - len, destaddr); - } - tipc_node_unlock(node); - } - - /* Couldn't find a link to the destination node */ - kfree_skb(buf); - tipc_port_iovec_reject(sender, hdr, msg_sect, len, TIPC_ERR_NO_NODE); - return -ENETUNREACH; -} - -/* - * tipc_link_iovec_long_xmit(): Entry for long messages where the - * destination node is known and the header is complete, - * inclusive total message length. - * Link and bearer congestion status have been checked to be ok, - * and are ignored if they change. - * - * Note that fragments do not use the full link MTU so that they won't have - * to undergo refragmentation if link changeover causes them to be sent - * over another link with an additional tunnel header added as prefix. - * (Refragmentation will still occur if the other link has a smaller MTU.) - * - * Returns user data length or errno. - */ -static int tipc_link_iovec_long_xmit(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len, u32 destaddr) -{ - struct tipc_link *l_ptr; - struct tipc_node *node; - struct tipc_msg *hdr = &sender->phdr; - u32 dsz = len; - u32 max_pkt, fragm_sz, rest; - struct tipc_msg fragm_hdr; - struct sk_buff *buf, *buf_chain, *prev; - u32 fragm_crs, fragm_rest, hsz, sect_rest; - const unchar __user *sect_crs; - int curr_sect; - u32 fragm_no; - int res = 0; - -again: - fragm_no = 1; - max_pkt = sender->max_pkt - INT_H_SIZE; - /* leave room for tunnel header in case of link changeover */ - fragm_sz = max_pkt - INT_H_SIZE; - /* leave room for fragmentation header in each fragment */ - rest = dsz; - fragm_crs = 0; - fragm_rest = 0; - sect_rest = 0; - sect_crs = NULL; - curr_sect = -1; - - /* Prepare reusable fragment header */ - tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - INT_H_SIZE, msg_destnode(hdr)); - msg_set_size(&fragm_hdr, max_pkt); - msg_set_fragm_no(&fragm_hdr, 1); - - /* Prepare header of first fragment */ - buf_chain = buf = tipc_buf_acquire(max_pkt); - if (!buf) - return -ENOMEM; - buf->next = NULL; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - hsz = msg_hdr_sz(hdr); - skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz); - - /* Chop up message */ - fragm_crs = INT_H_SIZE + hsz; - fragm_rest = fragm_sz - hsz; - - do { /* For all sections */ - u32 sz; - - if (!sect_rest) { - sect_rest = msg_sect[++curr_sect].iov_len; - sect_crs = msg_sect[curr_sect].iov_base; - } - - if (sect_rest < fragm_rest) - sz = sect_rest; - else - sz = fragm_rest; - - if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { - res = -EFAULT; -error: - kfree_skb_list(buf_chain); - return res; - } - sect_crs += sz; - sect_rest -= sz; - fragm_crs += sz; - fragm_rest -= sz; - rest -= sz; - - if (!fragm_rest && rest) { - - /* Initiate new fragment: */ - if (rest <= fragm_sz) { - fragm_sz = rest; - msg_set_type(&fragm_hdr, LAST_FRAGMENT); - } else { - msg_set_type(&fragm_hdr, FRAGMENT); - } - msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); - msg_set_fragm_no(&fragm_hdr, ++fragm_no); - prev = buf; - buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (!buf) { - res = -ENOMEM; - goto error; - } - - buf->next = NULL; - prev->next = buf; - skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE); - fragm_crs = INT_H_SIZE; - fragm_rest = fragm_sz; - } - } while (rest > 0); - - /* - * Now we have a buffer chain. Select a link and check - * that packet size is still OK - */ - node = tipc_node_find(destaddr); - if (likely(node)) { - tipc_node_lock(node); - l_ptr = node->active_links[sender->ref & 1]; - if (!l_ptr) { - tipc_node_unlock(node); - goto reject; - } - if (l_ptr->max_pkt < max_pkt) { - sender->max_pkt = l_ptr->max_pkt; - tipc_node_unlock(node); - kfree_skb_list(buf_chain); - goto again; - } - } else { -reject: - kfree_skb_list(buf_chain); - tipc_port_iovec_reject(sender, hdr, msg_sect, len, - TIPC_ERR_NO_NODE); - return -ENETUNREACH; - } - - /* Append chain of fragments to send queue & send them */ - l_ptr->long_msg_seq_no++; - link_add_chain_to_outqueue(l_ptr, buf_chain, l_ptr->long_msg_seq_no); - l_ptr->stats.sent_fragments += fragm_no; - l_ptr->stats.sent_fragmented++; - tipc_link_push_queue(l_ptr); - tipc_node_unlock(node); - return dsz; -} - -/* * tipc_link_push_packet: Push one unsent packet to the media */ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) @@ -1238,7 +1133,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr) tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr); if (msg_user(msg) == MSG_BUNDLER) - msg_set_type(msg, CLOSED_MSG); + msg_set_type(msg, BUNDLE_CLOSED); l_ptr->next_out = buf->next; return 0; } @@ -1527,11 +1422,6 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if (unlikely(!list_empty(&l_ptr->waiting_ports))) tipc_link_wakeup_ports(l_ptr, 0); - if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { - l_ptr->stats.sent_acks++; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); - } - /* Process the incoming packet */ if (unlikely(!link_working_working(l_ptr))) { if (msg_user(msg) == LINK_PROTOCOL) { @@ -1565,57 +1455,19 @@ void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) if (unlikely(l_ptr->oldest_deferred_in)) head = link_insert_deferred_queue(l_ptr, head); - /* Deliver packet/message to correct user: */ - if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) { - if (!tipc_link_tunnel_rcv(n_ptr, &buf)) { - tipc_node_unlock(n_ptr); - continue; - } - msg = buf_msg(buf); - } else if (msg_user(msg) == MSG_FRAGMENTER) { - l_ptr->stats.recv_fragments++; - if (tipc_buf_append(&l_ptr->reasm_buf, &buf)) { - l_ptr->stats.recv_fragmented++; - msg = buf_msg(buf); - } else { - if (!l_ptr->reasm_buf) - tipc_link_reset(l_ptr); - tipc_node_unlock(n_ptr); - continue; - } + if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { + l_ptr->stats.sent_acks++; + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } - switch (msg_user(msg)) { - case TIPC_LOW_IMPORTANCE: - case TIPC_MEDIUM_IMPORTANCE: - case TIPC_HIGH_IMPORTANCE: - case TIPC_CRITICAL_IMPORTANCE: - tipc_node_unlock(n_ptr); - tipc_sk_rcv(buf); - continue; - case MSG_BUNDLER: - l_ptr->stats.recv_bundles++; - l_ptr->stats.recv_bundled += msg_msgcnt(msg); - tipc_node_unlock(n_ptr); - tipc_link_bundle_rcv(buf); - continue; - case NAME_DISTRIBUTOR: - n_ptr->bclink.recv_permitted = true; - tipc_node_unlock(n_ptr); - tipc_named_rcv(buf); - continue; - case CONN_MANAGER: + if (tipc_link_prepare_input(l_ptr, &buf)) { tipc_node_unlock(n_ptr); - tipc_port_proto_rcv(buf); continue; - case BCAST_PROTOCOL: - tipc_link_sync_rcv(n_ptr, buf); - break; - default: - kfree_skb(buf); - break; } tipc_node_unlock(n_ptr); + msg = buf_msg(buf); + if (tipc_link_input(l_ptr, buf) != 0) + goto discard; continue; unlock_discard: tipc_node_unlock(n_ptr); @@ -1625,6 +1477,80 @@ discard: } /** + * tipc_link_prepare_input - process TIPC link messages + * + * returns nonzero if the message was consumed + * + * Node lock must be held + */ +static int tipc_link_prepare_input(struct tipc_link *l, struct sk_buff **buf) +{ + struct tipc_node *n; + struct tipc_msg *msg; + int res = -EINVAL; + + n = l->owner; + msg = buf_msg(*buf); + switch (msg_user(msg)) { + case CHANGEOVER_PROTOCOL: + if (tipc_link_tunnel_rcv(n, buf)) + res = 0; + break; + case MSG_FRAGMENTER: + l->stats.recv_fragments++; + if (tipc_buf_append(&l->reasm_buf, buf)) { + l->stats.recv_fragmented++; + res = 0; + } else if (!l->reasm_buf) { + tipc_link_reset(l); + } + break; + case MSG_BUNDLER: + l->stats.recv_bundles++; + l->stats.recv_bundled += msg_msgcnt(msg); + res = 0; + break; + case NAME_DISTRIBUTOR: + n->bclink.recv_permitted = true; + res = 0; + break; + case BCAST_PROTOCOL: + tipc_link_sync_rcv(n, *buf); + break; + default: + res = 0; + } + return res; +} +/** + * tipc_link_input - Deliver message too higher layers + */ +static int tipc_link_input(struct tipc_link *l, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + int res = 0; + + switch (msg_user(msg)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + case CONN_MANAGER: + tipc_sk_rcv(buf); + break; + case NAME_DISTRIBUTOR: + tipc_named_rcv(buf); + break; + case MSG_BUNDLER: + tipc_link_bundle_rcv(buf); + break; + default: + res = -EINVAL; + } + return res; +} + +/** * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue * * Returns increase in queue length (i.e. 0 or 1) @@ -2217,6 +2143,7 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) u32 msgcount = msg_msgcnt(buf_msg(buf)); u32 pos = INT_H_SIZE; struct sk_buff *obuf; + struct tipc_msg *omsg; while (msgcount--) { obuf = buf_extract(buf, pos); @@ -2224,8 +2151,16 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) pr_warn("Link unable to unbundle message(s)\n"); break; } - pos += align(msg_size(buf_msg(obuf))); - tipc_net_route_msg(obuf); + omsg = buf_msg(obuf); + pos += align(msg_size(omsg)); + if (msg_isdata(omsg) || (msg_user(omsg) == CONN_MANAGER)) { + tipc_sk_rcv(obuf); + } else if (msg_user(omsg) == NAME_DISTRIBUTOR) { + tipc_named_rcv(obuf); + } else { + pr_warn("Illegal bundled msg: %u\n", msg_user(omsg)); + kfree_skb(obuf); + } } kfree_skb(buf); } diff --git a/net/tipc/link.h b/net/tipc/link.h index 200d518b218e..227ff8120897 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -227,8 +227,10 @@ void tipc_link_reset_all(struct tipc_node *node); void tipc_link_reset(struct tipc_link *l_ptr); void tipc_link_reset_list(unsigned int bearer_id); int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector); +int tipc_link_xmit2(struct sk_buff *buf, u32 dest, u32 selector); void tipc_link_names_xmit(struct list_head *message_list, u32 dest); int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf); +int __tipc_link_xmit2(struct tipc_link *link, struct sk_buff *buf); int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); int tipc_link_iovec_xmit_fast(struct tipc_port *sender, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8be6e94a1ca9..6ec958401f78 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -36,21 +36,16 @@ #include "core.h" #include "msg.h" +#include "addr.h" +#include "name_table.h" -u32 tipc_msg_tot_importance(struct tipc_msg *m) +#define MAX_FORWARD_SIZE 1024 + +static unsigned int align(unsigned int i) { - if (likely(msg_isdata(m))) { - if (likely(msg_orignode(m) == tipc_own_addr)) - return msg_importance(m); - return msg_importance(m) + 4; - } - if ((msg_user(m) == MSG_FRAGMENTER) && - (msg_type(m) == FIRST_FRAGMENT)) - return msg_importance(msg_get_wrapped(m)); - return msg_importance(m); + return (i + 3) & ~3u; } - void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode) { @@ -152,3 +147,268 @@ out_free: kfree_skb(*buf); return 0; } + + +/** + * tipc_msg_build2 - create buffer chain containing specified header and data + * @mhdr: Message header, to be prepended to data + * @iov: User data + * @offset: Posision in iov to start copying from + * @dsz: Total length of user data + * @pktmax: Max packet size that can be used + * @chain: Buffer or chain of buffers to be returned to caller + * Returns message data size or errno: -ENOMEM, -EFAULT + */ +int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int pktmax , struct sk_buff **chain) +{ + int mhsz = msg_hdr_sz(mhdr); + int msz = mhsz + dsz; + int pktno = 1; + int pktsz; + int pktrem = pktmax; + int drem = dsz; + struct tipc_msg pkthdr; + struct sk_buff *buf, *prev; + char *pktpos; + int rc; + + msg_set_size(mhdr, msz); + + /* No fragmentation needed? */ + if (likely(msz <= pktmax)) { + buf = tipc_buf_acquire(msz); + *chain = buf; + if (unlikely(!buf)) + return -ENOMEM; + skb_copy_to_linear_data(buf, mhdr, mhsz); + pktpos = buf->data + mhsz; + if (!dsz || !memcpy_fromiovecend(pktpos, iov, offset, dsz)) + return dsz; + rc = -EFAULT; + goto error; + } + + /* Prepare reusable fragment header */ + tipc_msg_init(&pkthdr, MSG_FRAGMENTER, FIRST_FRAGMENT, + INT_H_SIZE, msg_destnode(mhdr)); + msg_set_size(&pkthdr, pktmax); + msg_set_fragm_no(&pkthdr, pktno); + + /* Prepare first fragment */ + *chain = buf = tipc_buf_acquire(pktmax); + if (!buf) + return -ENOMEM; + pktpos = buf->data; + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos += INT_H_SIZE; + pktrem -= INT_H_SIZE; + skb_copy_to_linear_data_offset(buf, INT_H_SIZE, mhdr, mhsz); + pktpos += mhsz; + pktrem -= mhsz; + + do { + if (drem < pktrem) + pktrem = drem; + + if (memcpy_fromiovecend(pktpos, iov, offset, pktrem)) { + rc = -EFAULT; + goto error; + } + drem -= pktrem; + offset += pktrem; + + if (!drem) + break; + + /* Prepare new fragment: */ + if (drem < (pktmax - INT_H_SIZE)) + pktsz = drem + INT_H_SIZE; + else + pktsz = pktmax; + prev = buf; + buf = tipc_buf_acquire(pktsz); + if (!buf) { + rc = -ENOMEM; + goto error; + } + prev->next = buf; + msg_set_type(&pkthdr, FRAGMENT); + msg_set_size(&pkthdr, pktsz); + msg_set_fragm_no(&pkthdr, ++pktno); + skb_copy_to_linear_data(buf, &pkthdr, INT_H_SIZE); + pktpos = buf->data + INT_H_SIZE; + pktrem = pktsz - INT_H_SIZE; + + } while (1); + + msg_set_type(buf_msg(buf), LAST_FRAGMENT); + return dsz; +error: + kfree_skb_list(*chain); + *chain = NULL; + return rc; +} + +/** + * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one + * @bbuf: the existing buffer ("bundle") + * @buf: buffer to be appended + * @mtu: max allowable size for the bundle buffer + * Consumes buffer if successful + * Returns true if bundling could be performed, otherwise false + */ +bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu) +{ + struct tipc_msg *bmsg = buf_msg(bbuf); + struct tipc_msg *msg = buf_msg(buf); + unsigned int bsz = msg_size(bmsg); + unsigned int msz = msg_size(msg); + u32 start = align(bsz); + u32 max = mtu - INT_H_SIZE; + u32 pad = start - bsz; + + if (likely(msg_user(msg) == MSG_FRAGMENTER)) + return false; + if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) + return false; + if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) + return false; + if (likely(msg_user(bmsg) != MSG_BUNDLER)) + return false; + if (likely(msg_type(bmsg) != BUNDLE_OPEN)) + return false; + if (unlikely(skb_tailroom(bbuf) < (pad + msz))) + return false; + if (unlikely(max < (start + msz))) + return false; + + skb_put(bbuf, pad + msz); + skb_copy_to_linear_data_offset(bbuf, start, buf->data, msz); + msg_set_size(bmsg, start + msz); + msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + bbuf->next = buf->next; + kfree_skb(buf); + return true; +} + +/** + * tipc_msg_make_bundle(): Create bundle buf and append message to its tail + * @buf: buffer to be appended and replaced + * @mtu: max allowable size for the bundle buffer, inclusive header + * @dnode: destination node for message. (Not always present in header) + * Replaces buffer if successful + * Returns true if sucess, otherwise false + */ +bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode) +{ + struct sk_buff *bbuf; + struct tipc_msg *bmsg; + struct tipc_msg *msg = buf_msg(*buf); + u32 msz = msg_size(msg); + u32 max = mtu - INT_H_SIZE; + + if (msg_user(msg) == MSG_FRAGMENTER) + return false; + if (msg_user(msg) == CHANGEOVER_PROTOCOL) + return false; + if (msg_user(msg) == BCAST_PROTOCOL) + return false; + if (msz > (max / 2)) + return false; + + bbuf = tipc_buf_acquire(max); + if (!bbuf) + return false; + + skb_trim(bbuf, INT_H_SIZE); + bmsg = buf_msg(bbuf); + tipc_msg_init(bmsg, MSG_BUNDLER, BUNDLE_OPEN, INT_H_SIZE, dnode); + msg_set_seqno(bmsg, msg_seqno(msg)); + msg_set_ack(bmsg, msg_ack(msg)); + msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); + bbuf->next = (*buf)->next; + tipc_msg_bundle(bbuf, *buf, mtu); + *buf = bbuf; + return true; +} + +/** + * tipc_msg_reverse(): swap source and destination addresses and add error code + * @buf: buffer containing message to be reversed + * @dnode: return value: node where to send message after reversal + * @err: error code to be set in message + * Consumes buffer if failure + * Returns true if success, otherwise false + */ +bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err) +{ + struct tipc_msg *msg = buf_msg(buf); + uint imp = msg_importance(msg); + struct tipc_msg ohdr; + uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); + + if (skb_linearize(buf)) + goto exit; + if (msg_dest_droppable(msg)) + goto exit; + if (msg_errcode(msg)) + goto exit; + + memcpy(&ohdr, msg, msg_hdr_sz(msg)); + imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE); + if (msg_isdata(msg)) + msg_set_importance(msg, imp); + msg_set_errcode(msg, err); + msg_set_origport(msg, msg_destport(&ohdr)); + msg_set_destport(msg, msg_origport(&ohdr)); + msg_set_prevnode(msg, tipc_own_addr); + if (!msg_short(msg)) { + msg_set_orignode(msg, msg_destnode(&ohdr)); + msg_set_destnode(msg, msg_orignode(&ohdr)); + } + msg_set_size(msg, msg_hdr_sz(msg) + rdsz); + skb_trim(buf, msg_size(msg)); + skb_orphan(buf); + *dnode = msg_orignode(&ohdr); + return true; +exit: + kfree_skb(buf); + return false; +} + +/** + * tipc_msg_eval: determine fate of message that found no destination + * @buf: the buffer containing the message. + * @dnode: return value: next-hop node, if message to be forwarded + * @err: error code to use, if message to be rejected + * + * Does not consume buffer + * Returns 0 (TIPC_OK) if message ok and we can try again, -TIPC error + * code if message to be rejected + */ +int tipc_msg_eval(struct sk_buff *buf, u32 *dnode) +{ + struct tipc_msg *msg = buf_msg(buf); + u32 dport; + + if (msg_type(msg) != TIPC_NAMED_MSG) + return -TIPC_ERR_NO_PORT; + if (skb_linearize(buf)) + return -TIPC_ERR_NO_NAME; + if (msg_data_sz(msg) > MAX_FORWARD_SIZE) + return -TIPC_ERR_NO_NAME; + if (msg_reroute_cnt(msg) > 0) + return -TIPC_ERR_NO_NAME; + + *dnode = addr_domain(msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(msg_nametype(msg), + msg_nameinst(msg), + dnode); + if (!dport) + return -TIPC_ERR_NO_NAME; + msg_incr_reroute_cnt(msg); + msg_set_destnode(msg, *dnode); + msg_set_destport(msg, dport); + return TIPC_OK; +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 503511903d1d..7d574346e75e 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -463,6 +463,11 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define FRAGMENT 1 #define LAST_FRAGMENT 2 +/* Bundling protocol message types + */ +#define BUNDLE_OPEN 0 +#define BUNDLE_CLOSED 1 + /* * Link management protocol message types */ @@ -706,12 +711,37 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -u32 tipc_msg_tot_importance(struct tipc_msg *m); +static inline u32 tipc_msg_tot_importance(struct tipc_msg *m) +{ + if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) + return msg_importance(msg_get_wrapped(m)); + return msg_importance(m); +} + +static inline u32 msg_tot_origport(struct tipc_msg *m) +{ + if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) + return msg_origport(msg_get_wrapped(m)); + return msg_origport(m); +} + +bool tipc_msg_reverse(struct sk_buff *buf, u32 *dnode, int err); + +int tipc_msg_eval(struct sk_buff *buf, u32 *dnode); + void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); + int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, unsigned int len, int max_size, struct sk_buff **buf); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); +bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu); + +bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode); + +int tipc_msg_build2(struct tipc_msg *mhdr, struct iovec const *iov, + int offset, int dsz, int mtu , struct sk_buff **chain); + #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index f64375e7f99f..7fcc94998fea 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -1,7 +1,7 @@ /* * net/tipc/net.c: TIPC network routing code * - * Copyright (c) 1995-2006, Ericsson AB + * Copyright (c) 1995-2006, 2014, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -104,67 +104,6 @@ * - A local spin_lock protecting the queue of subscriber events. */ -static void net_route_named_msg(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - u32 dnode; - u32 dport; - - if (!msg_named(msg)) { - kfree_skb(buf); - return; - } - - dnode = addr_domain(msg_lookup_scope(msg)); - dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode); - if (dport) { - msg_set_destnode(msg, dnode); - msg_set_destport(msg, dport); - tipc_net_route_msg(buf); - return; - } - tipc_reject_msg(buf, TIPC_ERR_NO_NAME); -} - -void tipc_net_route_msg(struct sk_buff *buf) -{ - struct tipc_msg *msg; - u32 dnode; - - if (!buf) - return; - msg = buf_msg(buf); - - /* Handle message for this node */ - dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); - if (tipc_in_scope(dnode, tipc_own_addr)) { - if (msg_isdata(msg)) { - if (msg_mcast(msg)) - tipc_port_mcast_rcv(buf, NULL); - else if (msg_destport(msg)) - tipc_sk_rcv(buf); - else - net_route_named_msg(buf); - return; - } - switch (msg_user(msg)) { - case NAME_DISTRIBUTOR: - tipc_named_rcv(buf); - break; - case CONN_MANAGER: - tipc_port_proto_rcv(buf); - break; - default: - kfree_skb(buf); - } - return; - } - - /* Handle message for another node */ - skb_trim(buf, msg_size(msg)); - tipc_link_xmit(buf, dnode, msg_link_selector(msg)); -} - int tipc_net_start(u32 addr) { char addr_string[16]; diff --git a/net/tipc/net.h b/net/tipc/net.h index c6c2b46f7c28..59ef3388be2c 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,8 +37,6 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -void tipc_net_route_msg(struct sk_buff *buf); - int tipc_net_start(u32 addr); void tipc_net_stop(void); diff --git a/net/tipc/node.c b/net/tipc/node.c index 5b44c3041be4..d959343043fd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1,7 +1,7 @@ /* * net/tipc/node.c: TIPC node management routines * - * Copyright (c) 2000-2006, 2012 Ericsson AB + * Copyright (c) 2000-2006, 2012-2014, Ericsson AB * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * @@ -155,21 +155,25 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (!active[0]) { active[0] = active[1] = l_ptr; node_established_contact(n_ptr); - return; + goto exit; } if (l_ptr->priority < active[0]->priority) { pr_info("New link <%s> becomes standby\n", l_ptr->name); - return; + goto exit; } tipc_link_dup_queue_xmit(active[0], l_ptr); if (l_ptr->priority == active[0]->priority) { active[0] = l_ptr; - return; + goto exit; } pr_info("Old link <%s> becomes standby\n", active[0]->name); if (active[1] != active[0]) pr_info("Old link <%s> becomes standby\n", active[1]->name); active[0] = active[1] = l_ptr; +exit: + /* Leave room for changeover header when returning 'mtu' to users: */ + n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; } /** @@ -229,6 +233,19 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) tipc_link_failover_send_queue(l_ptr); else node_lost_contact(n_ptr); + + /* Leave room for changeover header when returning 'mtu' to users: */ + if (active[0]) { + n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + return; + } + + /* Loopback link went down? No fragmentation needed from now on. */ + if (n_ptr->addr == tipc_own_addr) { + n_ptr->act_mtus[0] = MAX_MSG_SIZE; + n_ptr->act_mtus[1] = MAX_MSG_SIZE; + } } int tipc_node_active_links(struct tipc_node *n_ptr) diff --git a/net/tipc/node.h b/net/tipc/node.h index 9087063793f2..b61716a8218e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -41,6 +41,7 @@ #include "addr.h" #include "net.h" #include "bearer.h" +#include "msg.h" /* * Out-of-range value for node signature @@ -105,6 +106,7 @@ struct tipc_node { spinlock_t lock; struct hlist_node hash; struct tipc_link *active_links[2]; + u32 act_mtus[2]; struct tipc_link *links[MAX_BEARERS]; unsigned int action_flags; struct tipc_node_bclink bclink; @@ -143,4 +145,19 @@ static inline bool tipc_node_blocked(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN)); } +static inline uint tipc_node_get_mtu(u32 addr, u32 selector) +{ + struct tipc_node *node; + u32 mtu; + + node = tipc_node_find(addr); + + if (likely(node)) + mtu = node->act_mtus[selector & 1]; + else + mtu = MAX_MSG_SIZE; + + return mtu; +} + #endif diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 7c59ab1d6ecb..2d13eea8574a 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -84,11 +84,13 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) void tipc_nodesub_notify(struct list_head *nsub_list) { struct tipc_node_subscr *ns, *safe; + net_ev_handler handle_node_down; list_for_each_entry_safe(ns, safe, nsub_list, nodesub_list) { - if (ns->handle_node_down) { - ns->handle_node_down(ns->usr_handle); + handle_node_down = ns->handle_node_down; + if (handle_node_down) { ns->handle_node_down = NULL; + handle_node_down(ns->usr_handle); } } } diff --git a/net/tipc/port.c b/net/tipc/port.c index 5fd7acce01ea..0d09dcb6da18 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -42,8 +42,6 @@ /* Connection management: */ #define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ -#define CONFIRMED 0 -#define PROBING 1 #define MAX_REJECT_SIZE 1024 @@ -188,12 +186,6 @@ exit: tipc_port_list_free(dp); } - -void tipc_port_wakeup(struct tipc_port *port) -{ - tipc_sock_wakeup(tipc_port_to_sock(port)); -} - /* tipc_port_init - intiate TIPC port and lock it * * Returns obtained reference if initialization is successful, zero otherwise @@ -235,6 +227,8 @@ u32 tipc_port_init(struct tipc_port *p_ptr, void tipc_port_destroy(struct tipc_port *p_ptr) { struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; + u32 peer; tipc_withdraw(p_ptr, 0, NULL); @@ -246,14 +240,15 @@ void tipc_port_destroy(struct tipc_port *p_ptr) if (p_ptr->connected) { buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); + msg = buf_msg(buf); + peer = msg_destnode(msg); + tipc_link_xmit2(buf, peer, msg_link_selector(msg)); } - spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); list_del(&p_ptr->wait_list); spin_unlock_bh(&tipc_port_list_lock); k_term_timer(&p_ptr->timer); - tipc_net_route_msg(buf); } /* @@ -275,100 +270,16 @@ static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr, msg_set_destport(msg, tipc_port_peerport(p_ptr)); msg_set_origport(msg, p_ptr->ref); msg_set_msgcnt(msg, ack); + buf->next = NULL; } return buf; } -int tipc_reject_msg(struct sk_buff *buf, u32 err) -{ - struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *rbuf; - struct tipc_msg *rmsg; - int hdr_sz; - u32 imp; - u32 data_sz = msg_data_sz(msg); - u32 src_node; - u32 rmsg_sz; - - /* discard rejected message if it shouldn't be returned to sender */ - if (WARN(!msg_isdata(msg), - "attempt to reject message with user=%u", msg_user(msg))) { - dump_stack(); - goto exit; - } - if (msg_errcode(msg) || msg_dest_droppable(msg)) - goto exit; - - /* - * construct returned message by copying rejected message header and - * data (or subset), then updating header fields that need adjusting - */ - hdr_sz = msg_hdr_sz(msg); - rmsg_sz = hdr_sz + min_t(u32, data_sz, MAX_REJECT_SIZE); - - rbuf = tipc_buf_acquire(rmsg_sz); - if (rbuf == NULL) - goto exit; - - rmsg = buf_msg(rbuf); - skb_copy_to_linear_data(rbuf, msg, rmsg_sz); - - if (msg_connected(rmsg)) { - imp = msg_importance(rmsg); - if (imp < TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(rmsg, ++imp); - } - msg_set_non_seq(rmsg, 0); - msg_set_size(rmsg, rmsg_sz); - msg_set_errcode(rmsg, err); - msg_set_prevnode(rmsg, tipc_own_addr); - msg_swap_words(rmsg, 4, 5); - if (!msg_short(rmsg)) - msg_swap_words(rmsg, 6, 7); - - /* send self-abort message when rejecting on a connected port */ - if (msg_connected(msg)) { - struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); - - if (p_ptr) { - struct sk_buff *abuf = NULL; - - if (p_ptr->connected) - abuf = port_build_self_abort_msg(p_ptr, err); - tipc_port_unlock(p_ptr); - tipc_net_route_msg(abuf); - } - } - - /* send returned message & dispose of rejected message */ - src_node = msg_prevnode(msg); - if (in_own_node(src_node)) - tipc_sk_rcv(rbuf); - else - tipc_link_xmit(rbuf, src_node, msg_link_selector(rmsg)); -exit: - kfree_skb(buf); - return data_sz; -} - -int tipc_port_iovec_reject(struct tipc_port *p_ptr, struct tipc_msg *hdr, - struct iovec const *msg_sect, unsigned int len, - int err) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(hdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (!buf) - return res; - - return tipc_reject_msg(buf, err); -} - static void port_timeout(unsigned long ref) { struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; if (!p_ptr) return; @@ -379,15 +290,16 @@ static void port_timeout(unsigned long ref) } /* Last probe answered ? */ - if (p_ptr->probing_state == PROBING) { + if (p_ptr->probing_state == TIPC_CONN_PROBING) { buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT); } else { buf = port_build_proto_msg(p_ptr, CONN_PROBE, 0); - p_ptr->probing_state = PROBING; + p_ptr->probing_state = TIPC_CONN_PROBING; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); } tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -395,12 +307,14 @@ static void port_handle_node_down(unsigned long ref) { struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; + struct tipc_msg *msg = NULL; if (!p_ptr) return; buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_NODE); tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); } @@ -412,6 +326,7 @@ static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 er struct tipc_msg *msg = buf_msg(buf); msg_swap_words(msg, 4, 5); msg_swap_words(msg, 6, 7); + buf->next = NULL; } return buf; } @@ -436,60 +351,11 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er if (imp < TIPC_CRITICAL_IMPORTANCE) msg_set_importance(msg, ++imp); msg_set_errcode(msg, err); + buf->next = NULL; } return buf; } -void tipc_port_proto_rcv(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - struct tipc_port *p_ptr; - struct sk_buff *r_buf = NULL; - u32 destport = msg_destport(msg); - int wakeable; - - /* Validate connection */ - p_ptr = tipc_port_lock(destport); - if (!p_ptr || !p_ptr->connected || !tipc_port_peer_msg(p_ptr, msg)) { - r_buf = tipc_buf_acquire(BASIC_H_SIZE); - if (r_buf) { - msg = buf_msg(r_buf); - tipc_msg_init(msg, TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG, - BASIC_H_SIZE, msg_orignode(msg)); - msg_set_errcode(msg, TIPC_ERR_NO_PORT); - msg_set_origport(msg, destport); - msg_set_destport(msg, msg_origport(msg)); - } - if (p_ptr) - tipc_port_unlock(p_ptr); - goto exit; - } - - /* Process protocol message sent by peer */ - switch (msg_type(msg)) { - case CONN_ACK: - wakeable = tipc_port_congested(p_ptr) && p_ptr->congested; - p_ptr->acked += msg_msgcnt(msg); - if (!tipc_port_congested(p_ptr)) { - p_ptr->congested = 0; - if (wakeable) - tipc_port_wakeup(p_ptr); - } - break; - case CONN_PROBE: - r_buf = port_build_proto_msg(p_ptr, CONN_PROBE_REPLY, 0); - break; - default: - /* CONN_PROBE_REPLY or unrecognized - no action required */ - break; - } - p_ptr->probing_state = CONFIRMED; - tipc_port_unlock(p_ptr); -exit: - tipc_net_route_msg(r_buf); - kfree_skb(buf); -} - static int port_print(struct tipc_port *p_ptr, char *buf, int len, int full_id) { struct publication *publ; @@ -581,16 +447,19 @@ void tipc_acknowledge(u32 ref, u32 ack) { struct tipc_port *p_ptr; struct sk_buff *buf = NULL; + struct tipc_msg *msg; p_ptr = tipc_port_lock(ref); if (!p_ptr) return; - if (p_ptr->connected) { - p_ptr->conn_unacked -= ack; + if (p_ptr->connected) buf = port_build_proto_msg(p_ptr, CONN_ACK, ack); - } + tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + if (!buf) + return; + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); } int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, @@ -689,7 +558,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, msg_set_hdr_sz(msg, SHORT_H_SIZE); p_ptr->probing_interval = PROBING_INTERVAL; - p_ptr->probing_state = CONFIRMED; + p_ptr->probing_state = TIPC_CONN_OK; p_ptr->connected = 1; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); @@ -698,7 +567,7 @@ int __tipc_port_connect(u32 ref, struct tipc_port *p_ptr, (net_ev_handler)port_handle_node_down); res = 0; exit: - p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); + p_ptr->max_pkt = tipc_node_get_mtu(peer->node, ref); return res; } @@ -741,6 +610,7 @@ int tipc_port_disconnect(u32 ref) */ int tipc_port_shutdown(u32 ref) { + struct tipc_msg *msg; struct tipc_port *p_ptr; struct sk_buff *buf = NULL; @@ -750,149 +620,7 @@ int tipc_port_shutdown(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN); tipc_port_unlock(p_ptr); - tipc_net_route_msg(buf); + msg = buf_msg(buf); + tipc_link_xmit2(buf, msg_destnode(msg), msg_link_selector(msg)); return tipc_port_disconnect(ref); } - -/* - * tipc_port_iovec_rcv: Concatenate and deliver sectioned - * message for this node. - */ -static int tipc_port_iovec_rcv(struct tipc_port *sender, - struct iovec const *msg_sect, - unsigned int len) -{ - struct sk_buff *buf; - int res; - - res = tipc_msg_build(&sender->phdr, msg_sect, len, MAX_MSG_SIZE, &buf); - if (likely(buf)) - tipc_sk_rcv(buf); - return res; -} - -/** - * tipc_send - send message sections on connection - */ -int tipc_send(struct tipc_port *p_ptr, - struct iovec const *msg_sect, - unsigned int len) -{ - u32 destnode; - int res; - - if (!p_ptr->connected) - return -EINVAL; - - p_ptr->congested = 1; - if (!tipc_port_congested(p_ptr)) { - destnode = tipc_port_peernode(p_ptr); - if (likely(!in_own_node(destnode))) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - destnode); - else - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - - if (likely(res != -ELINKCONG)) { - p_ptr->congested = 0; - if (res > 0) - p_ptr->sent++; - return res; - } - } - if (tipc_port_unreliable(p_ptr)) { - p_ptr->congested = 0; - return len; - } - return -ELINKCONG; -} - -/** - * tipc_send2name - send message sections to port name - */ -int tipc_send2name(struct tipc_port *p_ptr, - struct tipc_name const *name, - unsigned int domain, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *msg; - u32 destnode = domain; - u32 destport; - int res; - - if (p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_NAMED_MSG); - msg_set_hdr_sz(msg, NAMED_H_SIZE); - msg_set_nametype(msg, name->type); - msg_set_nameinst(msg, name->instance); - msg_set_lookup_scope(msg, tipc_addr_scope(domain)); - destport = tipc_nametbl_translate(name->type, name->instance, &destnode); - msg_set_destnode(msg, destnode); - msg_set_destport(msg, destport); - - if (likely(destport || destnode)) { - if (likely(in_own_node(destnode))) - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - else if (tipc_own_addr) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - destnode); - else - res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, - len, TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (tipc_port_unreliable(p_ptr)) - return len; - - return -ELINKCONG; - } - return tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, - TIPC_ERR_NO_NAME); -} - -/** - * tipc_send2port - send message sections to port identity - */ -int tipc_send2port(struct tipc_port *p_ptr, - struct tipc_portid const *dest, - struct iovec const *msg_sect, - unsigned int len) -{ - struct tipc_msg *msg; - int res; - - if (p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_lookup_scope(msg, 0); - msg_set_destnode(msg, dest->node); - msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - - if (in_own_node(dest->node)) - res = tipc_port_iovec_rcv(p_ptr, msg_sect, len); - else if (tipc_own_addr) - res = tipc_link_iovec_xmit_fast(p_ptr, msg_sect, len, - dest->node); - else - res = tipc_port_iovec_reject(p_ptr, msg, msg_sect, len, - TIPC_ERR_NO_NODE); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (tipc_port_unreliable(p_ptr)) - return len; - - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h index cf4ca5b1d9a4..0e47052daa29 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -53,17 +53,13 @@ * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established * @conn_instance: TIPC instance used when connection was established - * @conn_unacked: number of unacknowledged messages received from peer port * @published: non-zero if port has one or more associated names - * @congested: non-zero if cannot send because of link or port congestion * @max_pkt: maximum packet size "hint" used when building messages sent by port * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages * @port_list: adjacent ports in TIPC's global list of ports * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: - * @sent: # of non-empty messages sent by port - * @acked: # of non-empty message acknowledgements from connected port's peer * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: @@ -76,17 +72,13 @@ struct tipc_port { int connected; u32 conn_type; u32 conn_instance; - u32 conn_unacked; int published; - u32 congested; u32 max_pkt; u32 ref; struct tipc_msg phdr; struct list_head port_list; struct list_head wait_list; u32 waiting_pkts; - u32 sent; - u32 acked; struct list_head publications; u32 pub_count; u32 probing_state; @@ -104,8 +96,6 @@ struct tipc_port_list; u32 tipc_port_init(struct tipc_port *p_ptr, const unsigned int importance); -int tipc_reject_msg(struct sk_buff *buf, u32 err); - void tipc_acknowledge(u32 port_ref, u32 ack); void tipc_port_destroy(struct tipc_port *p_ptr); @@ -122,8 +112,6 @@ int tipc_port_disconnect(u32 portref); int tipc_port_shutdown(u32 ref); -void tipc_port_wakeup(struct tipc_port *port); - /* * The following routines require that the port be locked on entry */ @@ -136,34 +124,12 @@ int tipc_port_peer_msg(struct tipc_port *p_ptr, struct tipc_msg *msg); * TIPC messaging routines */ -int tipc_send(struct tipc_port *port, - struct iovec const *msg_sect, - unsigned int len); - -int tipc_send2name(struct tipc_port *port, - struct tipc_name const *name, - u32 domain, - struct iovec const *msg_sect, - unsigned int len); - -int tipc_send2port(struct tipc_port *port, - struct tipc_portid const *dest, - struct iovec const *msg_sect, - unsigned int len); - int tipc_port_mcast_xmit(struct tipc_port *port, struct tipc_name_seq const *seq, struct iovec const *msg, unsigned int len); -int tipc_port_iovec_reject(struct tipc_port *p_ptr, - struct tipc_msg *hdr, - struct iovec const *msg_sect, - unsigned int len, - int err); - struct sk_buff *tipc_port_get_ports(void); -void tipc_port_proto_rcv(struct sk_buff *buf); void tipc_port_mcast_rcv(struct sk_buff *buf, struct tipc_port_list *dp); void tipc_port_reinit(void); @@ -185,12 +151,6 @@ static inline void tipc_port_unlock(struct tipc_port *p_ptr) spin_unlock_bh(p_ptr->lock); } -static inline int tipc_port_congested(struct tipc_port *p_ptr) -{ - return ((p_ptr->sent - p_ptr->acked) >= TIPC_FLOWCTRL_WIN); -} - - static inline u32 tipc_port_peernode(struct tipc_port *p_ptr) { return msg_destnode(&p_ptr->phdr); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ef0475568f9e..8c5600cfcc3e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -36,14 +36,17 @@ #include "core.h" #include "port.h" +#include "name_table.h" #include "node.h" - +#include "link.h" #include <linux/export.h> +#include "link.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define TIPC_FWD_MSG 1 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); @@ -123,9 +126,12 @@ static void advance_rx_queue(struct sock *sk) static void reject_rx_queue(struct sock *sk) { struct sk_buff *buf; + u32 dnode; - while ((buf = __skb_dequeue(&sk->sk_receive_queue))) - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); + while ((buf = __skb_dequeue(&sk->sk_receive_queue))) { + if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit2(buf, dnode, 0); + } } /** @@ -201,6 +207,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->sent_unacked = 0; atomic_set(&tsk->dupl_rcvcnt, 0); tipc_port_unlock(port); @@ -303,6 +310,7 @@ static int tipc_release(struct socket *sock) struct tipc_sock *tsk; struct tipc_port *port; struct sk_buff *buf; + u32 dnode; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -331,7 +339,8 @@ static int tipc_release(struct socket *sock) sock->state = SS_DISCONNECTING; tipc_port_disconnect(port->ref); } - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); + if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT)) + tipc_link_xmit2(buf, dnode, 0); } } @@ -504,12 +513,12 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch ((int)sock->state) { case SS_UNCONNECTED: - if (!tsk->port.congested) + if (!tsk->link_cong) mask |= POLLOUT; break; case SS_READY: case SS_CONNECTED: - if (!tsk->port.congested) + if (!tsk->link_cong && !tipc_sk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ case SS_CONNECTING: @@ -526,6 +535,43 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, } /** + * tipc_sk_proto_rcv - receive a connection mng protocol message + * @tsk: receiving socket + * @dnode: node to send response message to, if any + * @buf: buffer containing protocol message + * Returns 0 (TIPC_OK) if message was consumed, 1 (TIPC_FWD_MSG) if + * (CONN_PROBE_REPLY) message should be forwarded. + */ +int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode, struct sk_buff *buf) +{ + struct tipc_msg *msg = buf_msg(buf); + struct tipc_port *port = &tsk->port; + int conn_cong; + + /* Ignore if connection cannot be validated: */ + if (!port->connected || !tipc_port_peer_msg(port, msg)) + goto exit; + + port->probing_state = TIPC_CONN_OK; + + if (msg_type(msg) == CONN_ACK) { + conn_cong = tipc_sk_conn_cong(tsk); + tsk->sent_unacked -= msg_msgcnt(msg); + if (conn_cong) + tipc_sock_wakeup(tsk); + } else if (msg_type(msg) == CONN_PROBE) { + if (!tipc_msg_reverse(buf, dnode, TIPC_OK)) + return TIPC_OK; + msg_set_type(msg, CONN_PROBE_REPLY); + return TIPC_FWD_MSG; + } + /* Do nothing if msg_type() == CONN_PROBE_REPLY */ +exit: + kfree_skb(buf); + return TIPC_OK; +} + +/** * dest_name_check - verify user is permitted to send to specified port name * @dest: destination address * @m: descriptor for message to be sent @@ -539,6 +585,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) { struct tipc_cfg_msg_hdr hdr; + if (unlikely(dest->addrtype == TIPC_ADDR_ID)) + return 0; if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES)) return 0; if (likely(dest->addr.name.name.type == TIPC_TOP_SRV)) @@ -575,19 +623,55 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) return sock_intr_errno(*timeo_p); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, !tsk->port.congested); + done = sk_wait_event(sk, timeo_p, !tsk->link_cong); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; } +/** + * tipc_sendmcast - send multicast message + * @sock: socket structure + * @seq: destination address + * @iov: message data to send + * @dsz: total length of message data + * @timeo: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, + struct iovec *iov, size_t dsz, long timeo) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + int rc; + + do { + if (sock->state != SS_READY) { + rc = -EOPNOTSUPP; + break; + } + rc = tipc_port_mcast_xmit(&tsk->port, seq, iov, dsz); + if (likely(rc >= 0)) { + if (sock->state != SS_READY) + sock->state = SS_CONNECTING; + break; + } + if (rc != -ELINKCONG) + break; + rc = tipc_wait_for_sndmsg(sock, &timeo); + } while (!rc); + + return rc; +} /** * tipc_sendmsg - send message in connectionless manner * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send - * @total_len: length of message + * @dsz: amount of user data to be sent * * Message must have an destination specified explicitly. * Used for SOCK_RDM and SOCK_DGRAM messages, @@ -597,100 +681,123 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) * Returns the number of bytes sent on success, or errno otherwise */ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) + struct msghdr *m, size_t dsz) { + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_port *port = &tsk->port; - DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - int needs_conn; + struct tipc_msg *mhdr = &port->phdr; + struct iovec *iov = m->msg_iov; + u32 dnode, dport; + struct sk_buff *buf; + struct tipc_name_seq *seq = &dest->addr.nameseq; + u32 mtu; long timeo; - int res = -EINVAL; + int rc = -EINVAL; if (unlikely(!dest)) return -EDESTADDRREQ; + if (unlikely((m->msg_namelen < sizeof(*dest)) || (dest->family != AF_TIPC))) return -EINVAL; - if (total_len > TIPC_MAX_USER_MSG_SIZE) + + if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (iocb) lock_sock(sk); - needs_conn = (sock->state != SS_READY); - if (unlikely(needs_conn)) { + if (unlikely(sock->state != SS_READY)) { if (sock->state == SS_LISTENING) { - res = -EPIPE; + rc = -EPIPE; goto exit; } if (sock->state != SS_UNCONNECTED) { - res = -EISCONN; + rc = -EISCONN; goto exit; } if (tsk->port.published) { - res = -EOPNOTSUPP; + rc = -EOPNOTSUPP; goto exit; } if (dest->addrtype == TIPC_ADDR_NAME) { tsk->port.conn_type = dest->addr.name.name.type; tsk->port.conn_instance = dest->addr.name.name.instance; } - - /* Abort any pending connection attempts (very unlikely) */ - reject_rx_queue(sk); } + rc = dest_name_check(dest, m); + if (rc) + goto exit; timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); - do { - if (dest->addrtype == TIPC_ADDR_NAME) { - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_send2name(port, - &dest->addr.name.name, - dest->addr.name.domain, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_ID) { - res = tipc_send2port(port, - &dest->addr.id, - m->msg_iov, - total_len); - } else if (dest->addrtype == TIPC_ADDR_MCAST) { - if (needs_conn) { - res = -EOPNOTSUPP; - break; - } - res = dest_name_check(dest, m); - if (res) - break; - res = tipc_port_mcast_xmit(port, - &dest->addr.nameseq, - m->msg_iov, - total_len); + + if (dest->addrtype == TIPC_ADDR_MCAST) { + rc = tipc_sendmcast(sock, seq, iov, dsz, timeo); + goto exit; + } else if (dest->addrtype == TIPC_ADDR_NAME) { + u32 type = dest->addr.name.name.type; + u32 inst = dest->addr.name.name.instance; + u32 domain = dest->addr.name.domain; + + dnode = domain; + msg_set_type(mhdr, TIPC_NAMED_MSG); + msg_set_hdr_sz(mhdr, NAMED_H_SIZE); + msg_set_nametype(mhdr, type); + msg_set_nameinst(mhdr, inst); + msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); + dport = tipc_nametbl_translate(type, inst, &dnode); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dport); + if (unlikely(!dport && !dnode)) { + rc = -EHOSTUNREACH; + goto exit; } - if (likely(res != -ELINKCONG)) { - if (needs_conn && (res >= 0)) + } else if (dest->addrtype == TIPC_ADDR_ID) { + dnode = dest->addr.id.node; + msg_set_type(mhdr, TIPC_DIRECT_MSG); + msg_set_lookup_scope(mhdr, 0); + msg_set_destnode(mhdr, dnode); + msg_set_destport(mhdr, dest->addr.id.ref); + msg_set_hdr_sz(mhdr, BASIC_H_SIZE); + } + +new_mtu: + mtu = tipc_node_get_mtu(dnode, tsk->port.ref); + rc = tipc_msg_build2(mhdr, iov, 0, dsz, mtu, &buf); + if (rc < 0) + goto exit; + + do { + rc = tipc_link_xmit2(buf, dnode, tsk->port.ref); + if (likely(rc >= 0)) { + if (sock->state != SS_READY) sock->state = SS_CONNECTING; + rc = dsz; break; } - res = tipc_wait_for_sndmsg(sock, &timeo); - if (res) + if (rc == -EMSGSIZE) + goto new_mtu; + + if (rc != -ELINKCONG) break; - } while (1); + rc = tipc_wait_for_sndmsg(sock, &timeo); + if (rc) + kfree_skb_list(buf); + } while (!rc); exit: if (iocb) release_sock(sk); - return res; + + return rc; } static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_port *port = &tsk->port; DEFINE_WAIT(wait); int done; @@ -709,37 +816,49 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); done = sk_wait_event(sk, timeo_p, - (!port->congested || !port->connected)); + (!tsk->link_cong && + !tipc_sk_conn_cong(tsk)) || + !tsk->port.connected); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; } /** - * tipc_send_packet - send a connection-oriented message - * @iocb: if NULL, indicates that socket lock is already held + * tipc_send_stream - send stream-oriented data + * @iocb: (unused) * @sock: socket structure - * @m: message to send - * @total_len: length of message + * @m: data to send + * @dsz: total length of data to be transmitted * - * Used for SOCK_SEQPACKET messages and SOCK_STREAM data. + * Used for SOCK_STREAM data. * - * Returns the number of bytes sent on success, or errno otherwise + * Returns the number of bytes sent on success (or partial success), + * or errno if no data sent */ -static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_port *port = &tsk->port; + struct tipc_msg *mhdr = &port->phdr; + struct sk_buff *buf; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - int res = -EINVAL; + u32 ref = port->ref; + int rc = -EINVAL; long timeo; + u32 dnode; + uint mtu, send, sent = 0; /* Handle implied connection establishment */ - if (unlikely(dest)) - return tipc_sendmsg(iocb, sock, m, total_len); - - if (total_len > TIPC_MAX_USER_MSG_SIZE) + if (unlikely(dest)) { + rc = tipc_sendmsg(iocb, sock, m, dsz); + if (dsz && (dsz == rc)) + tsk->sent_unacked = 1; + return rc; + } + if (dsz > (uint)INT_MAX) return -EMSGSIZE; if (iocb) @@ -747,123 +866,66 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_DISCONNECTING) - res = -EPIPE; + rc = -EPIPE; else - res = -ENOTCONN; + rc = -ENOTCONN; goto exit; } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + dnode = tipc_port_peernode(port); + +next: + mtu = port->max_pkt; + send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); + rc = tipc_msg_build2(mhdr, m->msg_iov, sent, send, mtu, &buf); + if (unlikely(rc < 0)) + goto exit; do { - res = tipc_send(&tsk->port, m->msg_iov, total_len); - if (likely(res != -ELINKCONG)) - break; - res = tipc_wait_for_sndpkt(sock, &timeo); - if (res) - break; - } while (1); + if (likely(!tipc_sk_conn_cong(tsk))) { + rc = tipc_link_xmit2(buf, dnode, ref); + if (likely(!rc)) { + tsk->sent_unacked++; + sent += send; + if (sent == dsz) + break; + goto next; + } + if (rc == -EMSGSIZE) { + port->max_pkt = tipc_node_get_mtu(dnode, ref); + goto next; + } + if (rc != -ELINKCONG) + break; + } + rc = tipc_wait_for_sndpkt(sock, &timeo); + if (rc) + kfree_skb_list(buf); + } while (!rc); exit: if (iocb) release_sock(sk); - return res; + return sent ? sent : rc; } /** - * tipc_send_stream - send stream-oriented data - * @iocb: (unused) + * tipc_send_packet - send a connection-oriented message + * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure - * @m: data to send - * @total_len: total length of data to be sent + * @m: message to send + * @dsz: length of data to be transmitted * - * Used for SOCK_STREAM data. + * Used for SOCK_SEQPACKET messages. * - * Returns the number of bytes sent on success (or partial success), - * or errno if no data sent + * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t dsz) { - struct sock *sk = sock->sk; - struct tipc_sock *tsk = tipc_sk(sk); - struct msghdr my_msg; - struct iovec my_iov; - struct iovec *curr_iov; - int curr_iovlen; - char __user *curr_start; - u32 hdr_size; - int curr_left; - int bytes_to_send; - int bytes_sent; - int res; - - lock_sock(sk); - - /* Handle special cases where there is no connection */ - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_UNCONNECTED) - res = tipc_send_packet(NULL, sock, m, total_len); - else - res = sock->state == SS_DISCONNECTING ? -EPIPE : -ENOTCONN; - goto exit; - } - - if (unlikely(m->msg_name)) { - res = -EISCONN; - goto exit; - } - - if (total_len > (unsigned int)INT_MAX) { - res = -EMSGSIZE; - goto exit; - } - - /* - * Send each iovec entry using one or more messages - * - * Note: This algorithm is good for the most likely case - * (i.e. one large iovec entry), but could be improved to pass sets - * of small iovec entries into send_packet(). - */ - curr_iov = m->msg_iov; - curr_iovlen = m->msg_iovlen; - my_msg.msg_iov = &my_iov; - my_msg.msg_iovlen = 1; - my_msg.msg_flags = m->msg_flags; - my_msg.msg_name = NULL; - bytes_sent = 0; - - hdr_size = msg_hdr_sz(&tsk->port.phdr); - - while (curr_iovlen--) { - curr_start = curr_iov->iov_base; - curr_left = curr_iov->iov_len; - - while (curr_left) { - bytes_to_send = tsk->port.max_pkt - hdr_size; - if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) - bytes_to_send = TIPC_MAX_USER_MSG_SIZE; - if (curr_left < bytes_to_send) - bytes_to_send = curr_left; - my_iov.iov_base = curr_start; - my_iov.iov_len = bytes_to_send; - res = tipc_send_packet(NULL, sock, &my_msg, - bytes_to_send); - if (res < 0) { - if (bytes_sent) - res = bytes_sent; - goto exit; - } - curr_left -= bytes_to_send; - curr_start += bytes_to_send; - bytes_sent += bytes_to_send; - } + if (dsz > TIPC_MAX_USER_MSG_SIZE) + return -EMSGSIZE; - curr_iov++; - } - res = bytes_sent; -exit: - release_sock(sk); - return res; + return tipc_send_stream(iocb, sock, m, dsz); } /** @@ -1104,8 +1166,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { if ((sock->state != SS_READY) && - (++port->conn_unacked >= TIPC_CONNACK_INTV)) - tipc_acknowledge(port->ref, port->conn_unacked); + (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_acknowledge(port->ref, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } advance_rx_queue(sk); } exit: @@ -1213,8 +1277,10 @@ restart: /* Consume received message (optional) */ if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++port->conn_unacked >= TIPC_CONNACK_INTV)) - tipc_acknowledge(port->ref, port->conn_unacked); + if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { + tipc_acknowledge(port->ref, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + } advance_rx_queue(sk); } @@ -1269,17 +1335,16 @@ static void tipc_data_ready(struct sock *sk) * @tsk: TIPC socket * @msg: message * - * Returns TIPC error status code and socket error status code - * once it encounters some errors + * Returns 0 (TIPC_OK) if everyting ok, -TIPC_ERR_NO_PORT otherwise */ -static u32 filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) +static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf) { struct sock *sk = &tsk->sk; struct tipc_port *port = &tsk->port; struct socket *sock = sk->sk_socket; struct tipc_msg *msg = buf_msg(*buf); - u32 retval = TIPC_ERR_NO_PORT; + int retval = -TIPC_ERR_NO_PORT; int res; if (msg_mcast(msg)) @@ -1382,32 +1447,37 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) * * Called with socket lock already taken; port lock may also be taken. * - * Returns TIPC error status code (TIPC_OK if message is not to be rejected) + * Returns 0 (TIPC_OK) if message was consumed, -TIPC error code if message + * to be rejected, 1 (TIPC_FWD_MSG) if (CONN_MANAGER) message to be forwarded */ -static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) +static int filter_rcv(struct sock *sk, struct sk_buff *buf) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *msg = buf_msg(buf); unsigned int limit = rcvbuf_limit(sk, buf); - u32 res = TIPC_OK; + u32 onode; + int rc = TIPC_OK; + + if (unlikely(msg_user(msg) == CONN_MANAGER)) + return tipc_sk_proto_rcv(tsk, &onode, buf); /* Reject message if it is wrong sort of message for socket */ if (msg_type(msg) > TIPC_DIRECT_MSG) - return TIPC_ERR_NO_PORT; + return -TIPC_ERR_NO_PORT; if (sock->state == SS_READY) { if (msg_connected(msg)) - return TIPC_ERR_NO_PORT; + return -TIPC_ERR_NO_PORT; } else { - res = filter_connect(tsk, &buf); - if (res != TIPC_OK || buf == NULL) - return res; + rc = filter_connect(tsk, &buf); + if (rc != TIPC_OK || buf == NULL) + return rc; } /* Reject message if there isn't room to queue it */ if (sk_rmem_alloc_get(sk) + buf->truesize >= limit) - return TIPC_ERR_OVERLOAD; + return -TIPC_ERR_OVERLOAD; /* Enqueue message */ TIPC_SKB_CB(buf)->handle = NULL; @@ -1429,16 +1499,23 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) */ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf) { - u32 res; + int rc; + u32 onode; struct tipc_sock *tsk = tipc_sk(sk); uint truesize = buf->truesize; - res = filter_rcv(sk, buf); - if (unlikely(res)) - tipc_reject_msg(buf, res); + rc = filter_rcv(sk, buf); + + if (likely(!rc)) { + if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) + atomic_add(truesize, &tsk->dupl_rcvcnt); + return 0; + } - if (atomic_read(&tsk->dupl_rcvcnt) < TIPC_CONN_OVERLOAD_LIMIT) - atomic_add(truesize, &tsk->dupl_rcvcnt); + if ((rc < 0) && !tipc_msg_reverse(buf, &onode, -rc)) + return 0; + + tipc_link_xmit2(buf, onode, 0); return 0; } @@ -1455,19 +1532,14 @@ int tipc_sk_rcv(struct sk_buff *buf) struct tipc_port *port; struct sock *sk; u32 dport = msg_destport(buf_msg(buf)); - int err = TIPC_OK; + int rc = TIPC_OK; uint limit; + u32 dnode; - /* Forward unresolved named message */ - if (unlikely(!dport)) { - tipc_net_route_msg(buf); - return 0; - } - - /* Validate destination */ + /* Validate destination and message */ port = tipc_port_lock(dport); if (unlikely(!port)) { - err = TIPC_ERR_NO_PORT; + rc = tipc_msg_eval(buf, &dnode); goto exit; } @@ -1478,23 +1550,25 @@ int tipc_sk_rcv(struct sk_buff *buf) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - err = filter_rcv(sk, buf); + rc = filter_rcv(sk, buf); } else { if (sk->sk_backlog.len == 0) atomic_set(&tsk->dupl_rcvcnt, 0); limit = rcvbuf_limit(sk, buf) + atomic_read(&tsk->dupl_rcvcnt); if (sk_add_backlog(sk, buf, limit)) - err = TIPC_ERR_OVERLOAD; + rc = -TIPC_ERR_OVERLOAD; } - bh_unlock_sock(sk); tipc_port_unlock(port); - if (likely(!err)) + if (likely(!rc)) return 0; exit: - tipc_reject_msg(buf, err); - return -EHOSTUNREACH; + if ((rc < 0) && !tipc_msg_reverse(buf, &dnode, -rc)) + return -EHOSTUNREACH; + + tipc_link_xmit2(buf, dnode, 0); + return (rc < 0) ? -EHOSTUNREACH : 0; } static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) @@ -1758,6 +1832,7 @@ static int tipc_shutdown(struct socket *sock, int how) struct tipc_sock *tsk = tipc_sk(sk); struct tipc_port *port = &tsk->port; struct sk_buff *buf; + u32 peer; int res; if (how != SHUT_RDWR) @@ -1778,7 +1853,8 @@ restart: goto restart; } tipc_port_disconnect(port->ref); - tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN); + if (tipc_msg_reverse(buf, &peer, TIPC_CONN_SHUTDOWN)) + tipc_link_xmit2(buf, peer, 0); } else { tipc_port_shutdown(port->ref); } diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 3afcd2a70b31..2cdede9eda1b 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -38,6 +38,9 @@ #include "port.h" #include <net/sock.h> +#define TIPC_CONN_OK 0 +#define TIPC_CONN_PROBING 1 + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API @@ -45,6 +48,9 @@ * @peer_name: the peer of the connection, if any * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @link_cong: non-zero if owner must sleep because of link congestion + * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @rcv_unacked: # messages read by user, but not yet acked back to peer */ struct tipc_sock { @@ -52,6 +58,9 @@ struct tipc_sock { struct tipc_port port; unsigned int conn_timeout; atomic_t dupl_rcvcnt; + int link_cong; + uint sent_unacked; + uint rcv_unacked; }; static inline struct tipc_sock *tipc_sk(const struct sock *sk) @@ -69,6 +78,11 @@ static inline void tipc_sock_wakeup(struct tipc_sock *tsk) tsk->sk.sk_write_space(&tsk->sk); } +static inline int tipc_sk_conn_cong(struct tipc_sock *tsk) +{ + return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; +} + int tipc_sk_rcv(struct sk_buff *buf); #endif |