summaryrefslogtreecommitdiff
path: root/net/rds
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 16:49:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 16:49:49 -0700
commite0456717e483bb8a9431b80a5bdc99a928b9b003 (patch)
tree5eb5add2bafd1f20326d70f5cb3b711d00a40b10 /net/rds
parent98ec21a01896751b673b6c731ca8881daa8b2c6d (diff)
parent1ea2d020ba477cb7011a7174e8501a9e04a325d4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add TX fast path in mac80211, from Johannes Berg. 2) Add TSO/GRO support to ibmveth, from Thomas Falcon 3) Move away from cached routes in ipv6, just like ipv4, from Martin KaFai Lau. 4) Lots of new rhashtable tests, from Thomas Graf. 5) Run ingress qdisc lockless, from Alexei Starovoitov. 6) Allow servers to fetch TCP packet headers for SYN packets of new connections, for fingerprinting. From Eric Dumazet. 7) Add mode parameter to pktgen, for testing receive. From Alexei Starovoitov. 8) Cache access optimizations via simplifications of build_skb(), from Alexander Duyck. 9) Move page frag allocator under mm/, also from Alexander. 10) Add xmit_more support to hv_netvsc, from KY Srinivasan. 11) Add a counter guard in case we try to perform endless reclassify loops in the packet scheduler. 12) Extern flow dissector to be programmable and use it in new "Flower" classifier. From Jiri Pirko. 13) AF_PACKET fanout rollover fixes, performance improvements, and new statistics. From Willem de Bruijn. 14) Add netdev driver for GENEVE tunnels, from John W Linville. 15) Add ingress netfilter hooks and filtering, from Pablo Neira Ayuso. 16) Fix handling of epoll edge triggers in TCP, from Eric Dumazet. 17) Add an ECN retry fallback for the initial TCP handshake, from Daniel Borkmann. 18) Add tail call support to BPF, from Alexei Starovoitov. 19) Add several pktgen helper scripts, from Jesper Dangaard Brouer. 20) Add zerocopy support to AF_UNIX, from Hannes Frederic Sowa. 21) Favor even port numbers for allocation to connect() requests, and odd port numbers for bind(0), in an effort to help avoid ip_local_port_range exhaustion. From Eric Dumazet. 22) Add Cavium ThunderX driver, from Sunil Goutham. 23) Allow bpf programs to access skb_iif and dev->ifindex SKB metadata, from Alexei Starovoitov. 24) Add support for T6 chips in cxgb4vf driver, from Hariprasad Shenai. 25) Double TCP Small Queues default to 256K to accomodate situations like the XEN driver and wireless aggregation. From Wei Liu. 26) Add more entropy inputs to flow dissector, from Tom Herbert. 27) Add CDG congestion control algorithm to TCP, from Kenneth Klette Jonassen. 28) Convert ipset over to RCU locking, from Jozsef Kadlecsik. 29) Track and act upon link status of ipv4 route nexthops, from Andy Gospodarek. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1670 commits) bridge: vlan: flush the dynamically learned entries on port vlan delete bridge: multicast: add a comment to br_port_state_selection about blocking state net: inet_diag: export IPV6_V6ONLY sockopt stmmac: troubleshoot unexpected bits in des0 & des1 net: ipv4 sysctl option to ignore routes when nexthop link is down net: track link-status of ipv4 nexthops net: switchdev: ignore unsupported bridge flags net: Cavium: Fix MAC address setting in shutdown state drivers: net: xgene: fix for ACPI support without ACPI ip: report the original address of ICMP messages net/mlx5e: Prefetch skb data on RX net/mlx5e: Pop cq outside mlx5e_get_cqe net/mlx5e: Remove mlx5e_cq.sqrq back-pointer net/mlx5e: Remove extra spaces net/mlx5e: Avoid TX CQE generation if more xmit packets expected net/mlx5e: Avoid redundant dev_kfree_skb() upon NOP completion net/mlx5e: Remove re-assignment of wq type in mlx5e_enable_rq() net/mlx5e: Use skb_shinfo(skb)->gso_segs rather than counting them net/mlx5e: Static mapping of netdev priv resources to/from netdev TX queues net/mlx4_en: Use HW counters for rx/tx bytes/packets in PF device ...
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/af_rds.c43
-rw-r--r--net/rds/bind.c4
-rw-r--r--net/rds/ib.h22
-rw-r--r--net/rds/rds.h6
-rw-r--r--net/rds/transport.c21
5 files changed, 82 insertions, 14 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 11b623c2840c..896834cd3b9a 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -261,6 +261,28 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
return ret;
}
+static int rds_set_transport(struct rds_sock *rs, char __user *optval,
+ int optlen)
+{
+ int t_type;
+
+ if (rs->rs_transport)
+ return -EOPNOTSUPP; /* previously attached to transport */
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+
+ if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type)))
+ return -EFAULT;
+
+ if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
+ return -EINVAL;
+
+ rs->rs_transport = rds_trans_get(t_type);
+
+ return rs->rs_transport ? 0 : -ENOPROTOOPT;
+}
+
static int rds_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
@@ -291,6 +313,11 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
case RDS_CONG_MONITOR:
ret = rds_cong_monitor(rs, optval, optlen);
break;
+ case SO_RDS_TRANSPORT:
+ lock_sock(sock->sk);
+ ret = rds_set_transport(rs, optval, optlen);
+ release_sock(sock->sk);
+ break;
default:
ret = -ENOPROTOOPT;
}
@@ -303,6 +330,7 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
{
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
int ret = -ENOPROTOOPT, len;
+ int trans;
if (level != SOL_RDS)
goto out;
@@ -328,6 +356,19 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
else
ret = 0;
break;
+ case SO_RDS_TRANSPORT:
+ if (len < sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
+ trans = (rs->rs_transport ? rs->rs_transport->t_type :
+ RDS_TRANS_NONE); /* unbound */
+ if (put_user(trans, (int __user *)optval) ||
+ put_user(sizeof(int), optlen))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
default:
break;
}
@@ -431,7 +472,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_SEQPACKET || protocol)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto);
+ sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/rds/bind.c b/net/rds/bind.c
index a2e6562da751..4ebd29c128b6 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -181,6 +181,10 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (ret)
goto out;
+ if (rs->rs_transport) { /* previously bound */
+ ret = 0;
+ goto out;
+ }
trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
if (!trans) {
ret = -EADDRNOTAVAIL;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 333611d9e07d..86d88ec5d556 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -235,28 +235,34 @@ extern struct workqueue_struct *rds_ib_wq;
* doesn't define it.
*/
static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
- struct scatterlist *sg, unsigned int sg_dma_len, int direction)
+ struct scatterlist *sglist,
+ unsigned int sg_dma_len,
+ int direction)
{
+ struct scatterlist *sg;
unsigned int i;
- for (i = 0; i < sg_dma_len; ++i) {
+ for_each_sg(sglist, sg, sg_dma_len, i) {
ib_dma_sync_single_for_cpu(dev,
- ib_sg_dma_address(dev, &sg[i]),
- ib_sg_dma_len(dev, &sg[i]),
+ ib_sg_dma_address(dev, sg),
+ ib_sg_dma_len(dev, sg),
direction);
}
}
#define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu
static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
- struct scatterlist *sg, unsigned int sg_dma_len, int direction)
+ struct scatterlist *sglist,
+ unsigned int sg_dma_len,
+ int direction)
{
+ struct scatterlist *sg;
unsigned int i;
- for (i = 0; i < sg_dma_len; ++i) {
+ for_each_sg(sglist, sg, sg_dma_len, i) {
ib_dma_sync_single_for_device(dev,
- ib_sg_dma_address(dev, &sg[i]),
- ib_sg_dma_len(dev, &sg[i]),
+ ib_sg_dma_address(dev, sg),
+ ib_sg_dma_len(dev, sg),
direction);
}
}
diff --git a/net/rds/rds.h b/net/rds/rds.h
index cc549858ed0e..2260c1e434b1 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -410,11 +410,6 @@ struct rds_notifier {
* should try hard not to block.
*/
-#define RDS_TRANS_IB 0
-#define RDS_TRANS_IWARP 1
-#define RDS_TRANS_TCP 2
-#define RDS_TRANS_COUNT 3
-
struct rds_transport {
char t_name[TRANSNAMSIZ];
struct list_head t_item;
@@ -804,6 +799,7 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr);
void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
+struct rds_transport *rds_trans_get(int t_type);
int rds_trans_init(void);
void rds_trans_exit(void);
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7f2ac4fec367..8b4a6cd2c3a7 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -101,6 +101,27 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr)
return ret;
}
+struct rds_transport *rds_trans_get(int t_type)
+{
+ struct rds_transport *ret = NULL;
+ struct rds_transport *trans;
+ unsigned int i;
+
+ down_read(&rds_trans_sem);
+ for (i = 0; i < RDS_TRANS_COUNT; i++) {
+ trans = transports[i];
+
+ if (trans && trans->t_type == t_type &&
+ (!trans->t_owner || try_module_get(trans->t_owner))) {
+ ret = trans;
+ break;
+ }
+ }
+ up_read(&rds_trans_sem);
+
+ return ret;
+}
+
/*
* This returns the number of stats entries in the snapshot and only
* copies them using the iter if there is enough space for them. The