summaryrefslogtreecommitdiff
path: root/drivers/net/ivshmem-net.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ivshmem-net.c')
-rw-r--r--drivers/net/ivshmem-net.c1077
1 files changed, 1077 insertions, 0 deletions
diff --git a/drivers/net/ivshmem-net.c b/drivers/net/ivshmem-net.c
new file mode 100644
index 000000000000..c4821b80aec8
--- /dev/null
+++ b/drivers/net/ivshmem-net.c
@@ -0,0 +1,1077 @@
+/*
+ * Copyright 2016 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) Siemens AG, 2016-2020
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/ivshmem.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/virtio_ring.h>
+
+#define DRV_NAME "ivshmem-net"
+
+#define IVSHM_NET_STATE_RESET 0
+#define IVSHM_NET_STATE_INIT 1
+#define IVSHM_NET_STATE_READY 2
+#define IVSHM_NET_STATE_RUN 3
+
+#define IVSHM_NET_FLAG_RUN 0
+
+#define IVSHM_NET_MTU_DEF 16384
+
+#define IVSHM_NET_FRAME_SIZE(s) ALIGN(18 + (s), SMP_CACHE_BYTES)
+
+#define IVSHM_NET_VQ_ALIGN 64
+
+#define IVSHM_NET_SECTION_TX 0
+#define IVSHM_NET_SECTION_RX 1
+
+#define IVSHM_NET_MSIX_STATE 0
+#define IVSHM_NET_MSIX_TX_RX 1
+
+#define IVSHM_NET_NUM_VECTORS 2
+
+struct ivshm_net_queue {
+ struct vring vr;
+ u32 free_head;
+ u32 num_free;
+ u32 num_added;
+ u16 last_avail_idx;
+ u16 last_used_idx;
+
+ void *data;
+ void *end;
+ u32 size;
+ u32 head;
+ u32 tail;
+};
+
+struct ivshm_net_stats {
+ u32 tx_rx_interrupts;
+ u32 tx_packets;
+ u32 tx_notify;
+ u32 tx_pause;
+ u32 rx_packets;
+ u32 rx_notify;
+ u32 napi_poll;
+ u32 napi_complete;
+ u32 napi_poll_n[10];
+};
+
+struct ivshm_net {
+ struct ivshm_net_queue rx;
+ struct ivshm_net_queue tx;
+
+ u32 vrsize;
+ u32 qlen;
+ u32 qsize;
+
+ struct napi_struct napi;
+
+ u32 state;
+ u32 last_peer_state;
+ u32 *state_table;
+
+ unsigned long flags;
+
+ struct workqueue_struct *state_wq;
+ struct work_struct state_work;
+
+ struct ivshm_net_stats stats;
+
+ struct ivshm_regs __iomem *ivshm_regs;
+ void *shm[2];
+ resource_size_t shmlen;
+ u32 peer_id;
+
+ u32 tx_rx_vector;
+
+ struct pci_dev *pdev;
+};
+
+static void *ivshm_net_desc_data(struct ivshm_net *in,
+ struct ivshm_net_queue *q,
+ unsigned int region,
+ struct vring_desc *desc,
+ u32 *len)
+{
+ u64 offs = READ_ONCE(desc->addr);
+ u32 dlen = READ_ONCE(desc->len);
+ u16 flags = READ_ONCE(desc->flags);
+ void *data;
+
+ if (flags)
+ return NULL;
+
+ if (offs >= in->shmlen)
+ return NULL;
+
+ data = in->shm[region] + offs;
+
+ if (data < q->data || data >= q->end)
+ return NULL;
+
+ if (dlen > q->end - data)
+ return NULL;
+
+ *len = dlen;
+
+ return data;
+}
+
+static void ivshm_net_init_queue(struct ivshm_net *in,
+ struct ivshm_net_queue *q,
+ void *mem, unsigned int len)
+{
+ memset(q, 0, sizeof(*q));
+
+ vring_init(&q->vr, len, mem, IVSHM_NET_VQ_ALIGN);
+ q->data = mem + in->vrsize;
+ q->end = q->data + in->qsize;
+ q->size = in->qsize;
+}
+
+static void ivshm_net_init_queues(struct net_device *ndev)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+ void *tx;
+ void *rx;
+ int i;
+
+ tx = in->shm[IVSHM_NET_SECTION_TX];
+ rx = in->shm[IVSHM_NET_SECTION_RX];
+
+ memset(tx, 0, in->shmlen);
+
+ ivshm_net_init_queue(in, &in->tx, tx, in->qlen);
+ ivshm_net_init_queue(in, &in->rx, rx, in->qlen);
+
+ swap(in->rx.vr.used, in->tx.vr.used);
+
+ in->tx.num_free = in->tx.vr.num;
+
+ for (i = 0; i < in->tx.vr.num - 1; i++)
+ in->tx.vr.desc[i].next = i + 1;
+}
+
+static int ivshm_net_calc_qsize(struct net_device *ndev)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+ unsigned int vrsize;
+ unsigned int qsize;
+ unsigned int qlen;
+
+ for (qlen = 4096; qlen > 32; qlen >>= 1) {
+ vrsize = vring_size(qlen, IVSHM_NET_VQ_ALIGN);
+ vrsize = ALIGN(vrsize, IVSHM_NET_VQ_ALIGN);
+ if (vrsize < in->shmlen / 8)
+ break;
+ }
+
+ if (vrsize > in->shmlen)
+ return -EINVAL;
+
+ qsize = in->shmlen - vrsize;
+
+ if (qsize < 4 * ETH_MIN_MTU)
+ return -EINVAL;
+
+ in->vrsize = vrsize;
+ in->qlen = qlen;
+ in->qsize = qsize;
+
+ return 0;
+}
+
+static void ivshm_net_notify_tx(struct ivshm_net *in, unsigned int num)
+{
+ u16 evt, old, new;
+
+ virt_mb();
+
+ evt = READ_ONCE(vring_avail_event(&in->tx.vr));
+ old = in->tx.last_avail_idx - num;
+ new = in->tx.last_avail_idx;
+
+ if (vring_need_event(evt, new, old)) {
+ writel(in->tx_rx_vector | (in->peer_id << 16),
+ &in->ivshm_regs->doorbell);
+ in->stats.tx_notify++;
+ }
+}
+
+static void ivshm_net_enable_rx_irq(struct ivshm_net *in)
+{
+ vring_avail_event(&in->rx.vr) = in->rx.last_avail_idx;
+ virt_wmb();
+}
+
+static void ivshm_net_notify_rx(struct ivshm_net *in, unsigned int num)
+{
+ u16 evt, old, new;
+
+ virt_mb();
+
+ evt = READ_ONCE(vring_used_event(&in->rx.vr));
+ old = in->rx.last_used_idx - num;
+ new = in->rx.last_used_idx;
+
+ if (vring_need_event(evt, new, old)) {
+ writel(in->tx_rx_vector | (in->peer_id << 16),
+ &in->ivshm_regs->doorbell);
+ in->stats.rx_notify++;
+ }
+}
+
+static void ivshm_net_enable_tx_irq(struct ivshm_net *in)
+{
+ vring_used_event(&in->tx.vr) = in->tx.last_used_idx;
+ virt_wmb();
+}
+
+static bool ivshm_net_rx_avail(struct ivshm_net *in)
+{
+ virt_mb();
+ return READ_ONCE(in->rx.vr.avail->idx) != in->rx.last_avail_idx;
+}
+
+static size_t ivshm_net_tx_space(struct ivshm_net *in)
+{
+ struct ivshm_net_queue *tx = &in->tx;
+ u32 tail = tx->tail;
+ u32 head = tx->head;
+ u32 space;
+
+ if (head < tail)
+ space = tail - head;
+ else
+ space = max(tx->size - head, tail);
+
+ return space;
+}
+
+static bool ivshm_net_tx_ok(struct net_device *ndev)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+
+ return in->tx.num_free >= 2 &&
+ ivshm_net_tx_space(in) >= 2 * IVSHM_NET_FRAME_SIZE(ndev->mtu);
+}
+
+static u32 ivshm_net_tx_advance(struct ivshm_net_queue *q, u32 *pos, u32 len)
+{
+ u32 p = *pos;
+
+ len = IVSHM_NET_FRAME_SIZE(len);
+
+ if (q->size - p < len)
+ p = 0;
+ *pos = p + len;
+
+ return p;
+}
+
+static bool ivshm_net_tx_clean(struct net_device *ndev)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+ struct ivshm_net_queue *tx = &in->tx;
+ struct vring_used_elem *used;
+ struct vring *vr = &tx->vr;
+ struct vring_desc *desc;
+ struct vring_desc *fdesc;
+ u16 last = tx->last_used_idx;
+ unsigned int num;
+ bool tx_ok;
+ u32 fhead;
+
+ fdesc = NULL;
+ fhead = 0;
+ num = 0;
+
+ while (last != virt_load_acquire(&vr->used->idx)) {
+ void *data;
+ u32 len;
+ u32 tail;
+
+ used = vr->used->ring + (last % vr->num);
+ if (used->id >= vr->num || used->len != 1) {
+ netdev_err(ndev, "invalid tx used->id %d ->len %d\n",
+ used->id, used->len);
+ break;
+ }
+
+ desc = &vr->desc[used->id];
+
+ data = ivshm_net_desc_data(in, &in->tx, IVSHM_NET_SECTION_TX,
+ desc, &len);
+ if (!data) {
+ netdev_err(ndev, "bad tx descriptor, data == NULL\n");
+ break;
+ }
+
+ tail = ivshm_net_tx_advance(tx, &tx->tail, len);
+ if (data != tx->data + tail) {
+ netdev_err(ndev, "bad tx descriptor\n");
+ break;
+ }
+
+ if (!num)
+ fdesc = desc;
+ else
+ desc->next = fhead;
+
+ fhead = used->id;
+
+ tx->last_used_idx = ++last;
+ num++;
+ tx->num_free++;
+ BUG_ON(tx->num_free > vr->num);
+
+ tx_ok = ivshm_net_tx_ok(ndev);
+ if (!tx_ok)
+ ivshm_net_enable_tx_irq(in);
+ }
+
+ if (num) {
+ fdesc->next = tx->free_head;
+ tx->free_head = fhead;
+ } else {
+ tx_ok = ivshm_net_tx_ok(ndev);
+ }
+
+ return tx_ok;
+}
+
+static void ivshm_net_tx_poll(struct net_device *ndev)
+{
+ struct netdev_queue *txq = netdev_get_tx_queue(ndev, 0);
+
+ if (!__netif_tx_trylock(txq))
+ return;
+
+ if (ivshm_net_tx_clean(ndev) && netif_queue_stopped(ndev))
+ netif_wake_queue(ndev);
+
+ __netif_tx_unlock(txq);
+}
+
+static struct vring_desc *ivshm_net_rx_desc(struct net_device *ndev)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+ struct ivshm_net_queue *rx = &in->rx;
+ struct vring *vr = &rx->vr;
+ unsigned int avail;
+ u16 avail_idx;
+
+ avail_idx = virt_load_acquire(&vr->avail->idx);
+
+ if (avail_idx == rx->last_avail_idx)
+ return NULL;
+
+ avail = vr->avail->ring[rx->last_avail_idx++ & (vr->num - 1)];
+ if (avail >= vr->num) {
+ netdev_err(ndev, "invalid rx avail %d\n", avail);
+ return NULL;
+ }
+
+ return &vr->desc[avail];
+}
+
+static void ivshm_net_rx_finish(struct ivshm_net *in, struct vring_desc *desc)
+{
+ struct ivshm_net_queue *rx = &in->rx;
+ struct vring *vr = &rx->vr;
+ unsigned int desc_id = desc - vr->desc;
+ unsigned int used;
+
+ used = rx->last_used_idx++ & (vr->num - 1);
+ vr->used->ring[used].id = desc_id;
+ vr->used->ring[used].len = 1;
+
+ virt_store_release(&vr->used->idx, rx->last_used_idx);
+}
+
+static int ivshm_net_poll(struct napi_struct *napi, int budget)
+{
+ struct net_device *ndev = napi->dev;
+ struct ivshm_net *in = container_of(napi, struct ivshm_net, napi);
+ int received = 0;
+
+ in->stats.napi_poll++;
+
+ ivshm_net_tx_poll(ndev);
+
+ while (received < budget) {
+ struct vring_desc *desc;
+ struct sk_buff *skb;
+ void *data;
+ u32 len;
+
+ desc = ivshm_net_rx_desc(ndev);
+ if (!desc)
+ break;
+
+ data = ivshm_net_desc_data(in, &in->rx, IVSHM_NET_SECTION_RX,
+ desc, &len);
+ if (!data) {
+ netdev_err(ndev, "bad rx descriptor\n");
+ break;
+ }
+
+ skb = napi_alloc_skb(napi, len);
+
+ if (skb) {
+ memcpy(skb_put(skb, len), data, len);
+ skb->protocol = eth_type_trans(skb, ndev);
+ napi_gro_receive(napi, skb);
+ }
+
+ ndev->stats.rx_packets++;
+ ndev->stats.rx_bytes += len;
+
+ ivshm_net_rx_finish(in, desc);
+ received++;
+ }
+
+ if (received < budget) {
+ in->stats.napi_complete++;
+ napi_complete_done(napi, received);
+ ivshm_net_enable_rx_irq(in);
+ if (ivshm_net_rx_avail(in))
+ napi_schedule(napi);
+ }
+
+ if (received)
+ ivshm_net_notify_rx(in, received);
+
+ in->stats.rx_packets += received;
+ in->stats.napi_poll_n[received ? 1 + min(ilog2(received), 8) : 0]++;
+
+ return received;
+}
+
+static netdev_tx_t ivshm_net_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+ struct ivshm_net_queue *tx = &in->tx;
+ bool xmit_more = netdev_xmit_more();
+ struct vring *vr = &tx->vr;
+ struct vring_desc *desc;
+ unsigned int desc_idx;
+ unsigned int avail;
+ u32 head;
+ void *buf;
+
+ if (!ivshm_net_tx_clean(ndev)) {
+ netif_stop_queue(ndev);
+
+ netdev_err(ndev, "BUG: tx ring full when queue awake!\n");
+ return NETDEV_TX_BUSY;
+ }
+
+ desc_idx = tx->free_head;
+ desc = &vr->desc[desc_idx];
+ tx->free_head = desc->next;
+ tx->num_free--;
+
+ head = ivshm_net_tx_advance(tx, &tx->head, skb->len);
+
+ if (!ivshm_net_tx_ok(ndev)) {
+ ivshm_net_enable_tx_irq(in);
+ netif_stop_queue(ndev);
+ xmit_more = false;
+ in->stats.tx_pause++;
+ }
+
+ buf = tx->data + head;
+ skb_copy_and_csum_dev(skb, buf);
+
+ desc->addr = buf - in->shm[IVSHM_NET_SECTION_TX];
+ desc->len = skb->len;
+ desc->flags = 0;
+
+ avail = tx->last_avail_idx++ & (vr->num - 1);
+ vr->avail->ring[avail] = desc_idx;
+ tx->num_added++;
+
+ virt_store_release(&vr->avail->idx, tx->last_avail_idx);
+
+ if (!xmit_more) {
+ ivshm_net_notify_tx(in, tx->num_added);
+ tx->num_added = 0;
+ }
+
+ in->stats.tx_packets++;
+ ndev->stats.tx_packets++;
+ ndev->stats.tx_bytes += skb->len;
+
+ dev_consume_skb_any(skb);
+
+ return NETDEV_TX_OK;
+}
+
+static void ivshm_net_set_state(struct ivshm_net *in, u32 state)
+{
+ virt_wmb();
+ WRITE_ONCE(in->state, state);
+ writel(state, &in->ivshm_regs->state);
+}
+
+static void ivshm_net_run(struct net_device *ndev)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+
+ if (in->state < IVSHM_NET_STATE_READY)
+ return;
+
+ if (!netif_running(ndev))
+ return;
+
+ if (test_and_set_bit(IVSHM_NET_FLAG_RUN, &in->flags))
+ return;
+
+ netif_start_queue(ndev);
+ napi_enable(&in->napi);
+ napi_schedule(&in->napi);
+ ivshm_net_set_state(in, IVSHM_NET_STATE_RUN);
+}
+
+static void ivshm_net_do_stop(struct net_device *ndev)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+
+ ivshm_net_set_state(in, IVSHM_NET_STATE_RESET);
+
+ if (!test_and_clear_bit(IVSHM_NET_FLAG_RUN, &in->flags))
+ return;
+
+ netif_stop_queue(ndev);
+ napi_disable(&in->napi);
+}
+
+static void ivshm_net_state_change(struct work_struct *work)
+{
+ struct ivshm_net *in = container_of(work, struct ivshm_net, state_work);
+ struct net_device *ndev = in->napi.dev;
+ u32 peer_state = READ_ONCE(in->state_table[in->peer_id]);
+
+ switch (in->state) {
+ case IVSHM_NET_STATE_RESET:
+ /*
+ * Wait for the remote to leave READY/RUN before transitioning
+ * to INIT.
+ */
+ if (peer_state < IVSHM_NET_STATE_READY)
+ ivshm_net_set_state(in, IVSHM_NET_STATE_INIT);
+ break;
+
+ case IVSHM_NET_STATE_INIT:
+ /*
+ * Wait for the remote to leave RESET before performing the
+ * initialization and moving to READY.
+ */
+ if (peer_state > IVSHM_NET_STATE_RESET) {
+ ivshm_net_init_queues(ndev);
+ ivshm_net_set_state(in, IVSHM_NET_STATE_READY);
+
+ rtnl_lock();
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, ndev);
+ rtnl_unlock();
+ }
+ break;
+
+ case IVSHM_NET_STATE_READY:
+ /*
+ * Link is up and we are running once the remote is in READY or
+ * RUN.
+ */
+ if (peer_state >= IVSHM_NET_STATE_READY) {
+ netif_carrier_on(ndev);
+ ivshm_net_run(ndev);
+ break;
+ }
+ /* fall through */
+ case IVSHM_NET_STATE_RUN:
+ /*
+ * If the remote goes to RESET, we need to follow immediately.
+ */
+ if (peer_state == IVSHM_NET_STATE_RESET) {
+ netif_carrier_off(ndev);
+ ivshm_net_do_stop(ndev);
+ }
+ break;
+ }
+
+ virt_wmb();
+ WRITE_ONCE(in->last_peer_state, peer_state);
+}
+
+static void ivshm_net_check_state(struct ivshm_net *in)
+{
+ if (in->state_table[in->peer_id] != in->last_peer_state ||
+ !test_bit(IVSHM_NET_FLAG_RUN, &in->flags))
+ queue_work(in->state_wq, &in->state_work);
+}
+
+static irqreturn_t ivshm_net_int_state(int irq, void *data)
+{
+ struct ivshm_net *in = data;
+
+ ivshm_net_check_state(in);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ivshm_net_int_tx_rx(int irq, void *data)
+{
+ struct ivshm_net *in = data;
+
+ in->stats.tx_rx_interrupts++;
+
+ napi_schedule_irqoff(&in->napi);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ivshm_net_intx(int irq, void *data)
+{
+ ivshm_net_int_state(irq, data);
+ ivshm_net_int_tx_rx(irq, data);
+
+ return IRQ_HANDLED;
+}
+
+static int ivshm_net_open(struct net_device *ndev)
+{
+ netdev_reset_queue(ndev);
+ ndev->operstate = IF_OPER_UP;
+ ivshm_net_run(ndev);
+
+ return 0;
+}
+
+static int ivshm_net_stop(struct net_device *ndev)
+{
+ ndev->operstate = IF_OPER_DOWN;
+ ivshm_net_do_stop(ndev);
+
+ return 0;
+}
+
+static int ivshm_net_change_mtu(struct net_device *ndev, int mtu)
+{
+ if (netif_running(ndev)) {
+ netdev_err(ndev, "must be stopped to change its MTU\n");
+ return -EBUSY;
+ }
+
+ ndev->mtu = mtu;
+
+ return 0;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void ivshm_net_poll_controller(struct net_device *ndev)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+
+ napi_schedule(&in->napi);
+}
+#endif
+
+static const struct net_device_ops ivshm_net_ops = {
+ .ndo_open = ivshm_net_open,
+ .ndo_stop = ivshm_net_stop,
+ .ndo_start_xmit = ivshm_net_xmit,
+ .ndo_change_mtu = ivshm_net_change_mtu,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = ivshm_net_poll_controller,
+#endif
+};
+
+static const char ivshm_net_stats[][ETH_GSTRING_LEN] = {
+ "tx_rx_interrupts",
+ "tx_packets",
+ "tx_notify",
+ "tx_pause",
+ "rx_packets",
+ "rx_notify",
+ "napi_poll",
+ "napi_complete",
+ "napi_poll_0",
+ "napi_poll_1",
+ "napi_poll_2",
+ "napi_poll_4",
+ "napi_poll_8",
+ "napi_poll_16",
+ "napi_poll_32",
+ "napi_poll_64",
+ "napi_poll_128",
+ "napi_poll_256",
+};
+
+#define NUM_STATS ARRAY_SIZE(ivshm_net_stats)
+
+static int ivshm_net_get_sset_count(struct net_device *ndev, int sset)
+{
+ if (sset == ETH_SS_STATS)
+ return NUM_STATS;
+
+ return -EOPNOTSUPP;
+}
+
+static void ivshm_net_get_strings(struct net_device *ndev, u32 sset, u8 *buf)
+{
+ if (sset == ETH_SS_STATS)
+ memcpy(buf, &ivshm_net_stats, sizeof(ivshm_net_stats));
+}
+
+static void ivshm_net_get_ethtool_stats(struct net_device *ndev,
+ struct ethtool_stats *estats, u64 *st)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+ unsigned int n = 0;
+ unsigned int i;
+
+ st[n++] = in->stats.tx_rx_interrupts;
+ st[n++] = in->stats.tx_packets;
+ st[n++] = in->stats.tx_notify;
+ st[n++] = in->stats.tx_pause;
+ st[n++] = in->stats.rx_packets;
+ st[n++] = in->stats.rx_notify;
+ st[n++] = in->stats.napi_poll;
+ st[n++] = in->stats.napi_complete;
+
+ for (i = 0; i < ARRAY_SIZE(in->stats.napi_poll_n); i++)
+ st[n++] = in->stats.napi_poll_n[i];
+
+ memset(&in->stats, 0, sizeof(in->stats));
+}
+
+#define IVSHM_NET_REGS_LEN (3 * sizeof(u32) + 6 * sizeof(u16))
+
+static int ivshm_net_get_regs_len(struct net_device *ndev)
+{
+ return IVSHM_NET_REGS_LEN;
+}
+
+static void ivshm_net_get_regs(struct net_device *ndev,
+ struct ethtool_regs *regs, void *p)
+{
+ struct ivshm_net *in = netdev_priv(ndev);
+ u32 *reg32 = p;
+ u16 *reg16;
+
+ *reg32++ = in->state;
+ *reg32++ = in->last_peer_state;
+ *reg32++ = in->qlen;
+
+ reg16 = (u16 *)reg32;
+
+ *reg16++ = in->tx.vr.avail ? in->tx.vr.avail->idx : 0;
+ *reg16++ = in->tx.vr.used ? in->tx.vr.used->idx : 0;
+ *reg16++ = in->tx.vr.avail ? vring_avail_event(&in->tx.vr) : 0;
+
+ *reg16++ = in->rx.vr.avail ? in->rx.vr.avail->idx : 0;
+ *reg16++ = in->rx.vr.used ? in->rx.vr.used->idx : 0;
+ *reg16++ = in->rx.vr.avail ? vring_avail_event(&in->rx.vr) : 0;
+}
+
+static const struct ethtool_ops ivshm_net_ethtool_ops = {
+ .get_sset_count = ivshm_net_get_sset_count,
+ .get_strings = ivshm_net_get_strings,
+ .get_ethtool_stats = ivshm_net_get_ethtool_stats,
+ .get_regs_len = ivshm_net_get_regs_len,
+ .get_regs = ivshm_net_get_regs,
+};
+
+static u64 get_config_qword(struct pci_dev *pdev, unsigned int pos)
+{
+ u32 lo, hi;
+
+ pci_read_config_dword(pdev, pos, &lo);
+ pci_read_config_dword(pdev, pos + 4, &hi);
+ return lo | ((u64)hi << 32);
+}
+
+static int ivshm_net_probe(struct pci_dev *pdev,
+ const struct pci_device_id *pci_id)
+{
+ phys_addr_t output_sections_addr, section_addr;
+ resource_size_t section_sz, output_section_sz;
+ void *state_table, *output_sections;
+ struct ivshm_regs __iomem *regs;
+ struct net_device *ndev;
+ struct ivshm_net *in;
+ unsigned int cap_pos;
+ char *device_name;
+ int vendor_cap;
+ u32 id, dword;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "pci_enable_device: %d\n", ret);
+ return ret;
+ }
+
+ ret = pcim_iomap_regions(pdev, BIT(0), DRV_NAME);
+ if (ret) {
+ dev_err(&pdev->dev, "pcim_iomap_regions: %d\n", ret);
+ return ret;
+ }
+
+ regs = pcim_iomap_table(pdev)[0];
+
+ id = readl(&regs->id);
+ if (id > 1) {
+ dev_err(&pdev->dev, "invalid ID %d\n", id);
+ return -EINVAL;
+ }
+ if (readl(&regs->max_peers) > 2) {
+ dev_err(&pdev->dev, "only 2 peers supported\n");
+ return -EINVAL;
+ }
+
+ vendor_cap = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
+ if (vendor_cap < 0) {
+ dev_err(&pdev->dev, "missing vendor capability\n");
+ return -EINVAL;
+ }
+
+ if (pci_resource_len(pdev, 2) > 0) {
+ section_addr = pci_resource_start(pdev, 2);
+ } else {
+ cap_pos = vendor_cap + IVSHM_CFG_ADDRESS;
+ section_addr = get_config_qword(pdev, cap_pos);
+ }
+
+ cap_pos = vendor_cap + IVSHM_CFG_STATE_TAB_SZ;
+ pci_read_config_dword(pdev, cap_pos, &dword);
+ section_sz = dword;
+
+ if (!devm_request_mem_region(&pdev->dev, section_addr, section_sz,
+ DRV_NAME))
+ return -EBUSY;
+
+ state_table = devm_memremap(&pdev->dev, section_addr, section_sz,
+ MEMREMAP_WB);
+ if (!state_table)
+ return -ENOMEM;
+
+ output_sections_addr = section_addr + section_sz;
+
+ cap_pos = vendor_cap + IVSHM_CFG_RW_SECTION_SZ;
+ section_sz = get_config_qword(pdev, cap_pos);
+ if (section_sz > 0) {
+ dev_info(&pdev->dev, "R/W section detected - "
+ "unused by this driver version\n");
+ output_sections_addr += section_sz;
+ }
+
+ cap_pos = vendor_cap + IVSHM_CFG_OUTPUT_SECTION_SZ;
+ output_section_sz = get_config_qword(pdev, cap_pos);
+ if (output_section_sz == 0) {
+ dev_err(&pdev->dev, "Missing input/output sections\n");
+ return -EINVAL;
+ }
+
+ if (!devm_request_mem_region(&pdev->dev, output_sections_addr,
+ output_section_sz * 2, DRV_NAME))
+ return -EBUSY;
+
+ output_sections = devm_memremap(&pdev->dev, output_sections_addr,
+ output_section_sz * 2, MEMREMAP_WB);
+ if (!output_sections)
+ return -ENOMEM;
+
+ section_addr = output_sections_addr + output_section_sz * id;
+ dev_info(&pdev->dev, "TX memory at %pa, size %pa\n",
+ &section_addr, &output_section_sz);
+ section_addr = output_sections_addr + output_section_sz * !id;
+ dev_info(&pdev->dev, "RX memory at %pa, size %pa\n",
+ &section_addr, &output_section_sz);
+
+ device_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s[%s]", DRV_NAME,
+ dev_name(&pdev->dev));
+ if (!device_name)
+ return -ENOMEM;
+
+ ndev = alloc_etherdev(sizeof(*in));
+ if (!ndev)
+ return -ENOMEM;
+
+ pci_set_drvdata(pdev, ndev);
+ SET_NETDEV_DEV(ndev, &pdev->dev);
+
+ in = netdev_priv(ndev);
+ in->ivshm_regs = regs;
+ in->state_table = state_table;
+
+ in->shm[IVSHM_NET_SECTION_TX] =
+ output_sections + output_section_sz * id;
+ in->shm[IVSHM_NET_SECTION_RX] =
+ output_sections + output_section_sz * !id;
+
+ in->shmlen = output_section_sz;
+
+ in->peer_id = !id;
+ in->pdev = pdev;
+
+ ret = ivshm_net_calc_qsize(ndev);
+ if (ret)
+ goto err_free;
+
+ in->state_wq = alloc_ordered_workqueue(device_name, 0);
+ if (!in->state_wq)
+ goto err_free;
+
+ INIT_WORK(&in->state_work, ivshm_net_state_change);
+
+ eth_random_addr(ndev->dev_addr);
+ ndev->netdev_ops = &ivshm_net_ops;
+ ndev->ethtool_ops = &ivshm_net_ethtool_ops;
+ ndev->mtu = min_t(u32, IVSHM_NET_MTU_DEF, in->qsize / 16);
+ ndev->min_mtu = ETH_MIN_MTU;
+ ndev->max_mtu = min_t(u32, ETH_MAX_MTU, in->qsize / 4);
+ ndev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG;
+ ndev->features = ndev->hw_features;
+
+ netif_carrier_off(ndev);
+ netif_napi_add(ndev, &in->napi, ivshm_net_poll, NAPI_POLL_WEIGHT);
+
+ ret = register_netdev(ndev);
+ if (ret)
+ goto err_wq;
+
+ ret = pci_alloc_irq_vectors(pdev, 1, 2, PCI_IRQ_LEGACY | PCI_IRQ_MSIX);
+ if (ret < 0)
+ goto err_alloc_irq;
+
+ if (pdev->msix_enabled) {
+ if (ret != 2) {
+ ret = -EBUSY;
+ goto err_request_irq;
+ }
+
+ device_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "%s-state[%s]", DRV_NAME,
+ dev_name(&pdev->dev));
+ if (!device_name) {
+ ret = -ENOMEM;
+ goto err_request_irq;
+ }
+
+ ret = request_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_STATE),
+ ivshm_net_int_state, 0, device_name, in);
+ if (ret)
+ goto err_request_irq;
+
+ device_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+ "%s-tx-rx[%s]", DRV_NAME,
+ dev_name(&pdev->dev));
+ if (!device_name) {
+ ret = -ENOMEM;
+ goto err_request_irq2;
+ }
+
+ ret = request_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_TX_RX),
+ ivshm_net_int_tx_rx, 0, device_name, in);
+ if (ret)
+ goto err_request_irq2;
+
+ in->tx_rx_vector = IVSHM_NET_MSIX_TX_RX;
+ } else {
+ ret = request_irq(pci_irq_vector(pdev, 0), ivshm_net_intx, 0,
+ device_name, in);
+ if (ret)
+ goto err_request_irq;
+
+ in->tx_rx_vector = 0;
+ }
+
+ pci_set_master(pdev);
+
+ pci_write_config_byte(pdev, vendor_cap + IVSHM_CFG_PRIV_CNTL, 0);
+ writel(IVSHM_INT_ENABLE, &in->ivshm_regs->int_control);
+
+ writel(IVSHM_NET_STATE_RESET, &in->ivshm_regs->state);
+ ivshm_net_check_state(in);
+
+ return 0;
+
+err_request_irq2:
+ free_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_STATE), in);
+err_request_irq:
+ pci_free_irq_vectors(pdev);
+err_alloc_irq:
+ unregister_netdev(ndev);
+err_wq:
+ destroy_workqueue(in->state_wq);
+err_free:
+ free_netdev(ndev);
+
+ return ret;
+}
+
+static void ivshm_net_remove(struct pci_dev *pdev)
+{
+ struct net_device *ndev = pci_get_drvdata(pdev);
+ struct ivshm_net *in = netdev_priv(ndev);
+
+ writel(IVSHM_NET_STATE_RESET, &in->ivshm_regs->state);
+ writel(0, &in->ivshm_regs->int_control);
+
+ if (pdev->msix_enabled) {
+ free_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_STATE), in);
+ free_irq(pci_irq_vector(pdev, IVSHM_NET_MSIX_TX_RX), in);
+ } else {
+ free_irq(pci_irq_vector(pdev, 0), in);
+ }
+ pci_free_irq_vectors(pdev);
+
+ unregister_netdev(ndev);
+ cancel_work_sync(&in->state_work);
+ destroy_workqueue(in->state_wq);
+ free_netdev(ndev);
+}
+
+static const struct pci_device_id ivshm_net_id_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_SIEMENS, PCI_DEVICE_ID_IVSHMEM),
+ (PCI_CLASS_OTHERS << 16) | IVSHM_PROTO_NET, 0xffffff },
+ { 0 }
+};
+MODULE_DEVICE_TABLE(pci, ivshm_net_id_table);
+
+static struct pci_driver ivshm_net_driver = {
+ .name = DRV_NAME,
+ .id_table = ivshm_net_id_table,
+ .probe = ivshm_net_probe,
+ .remove = ivshm_net_remove,
+};
+module_pci_driver(ivshm_net_driver);
+
+MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
+MODULE_LICENSE("GPL");