summaryrefslogtreecommitdiff
path: root/samples/bpf
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-05-24 22:20:51 -0400
committerDavid S. Miller <davem@davemloft.net>2018-05-24 22:20:51 -0400
commit90fed9c94625718a3a10db7d1e8e4efe093bbf5f (patch)
tree09b3bc9ea679316372b139338179a230105306dc /samples/bpf
parent49a473f5b5f54f33e0bd8618158d33f83153c921 (diff)
parent10f678683e4026e43524b0492068a371d00fdeed (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2018-05-24 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Björn Töpel cleans up AF_XDP (removes rebind, explicit cache alignment from uapi, etc). 2) David Ahern adds mtu checks to bpf_ipv{4,6}_fib_lookup() helpers. 3) Jesper Dangaard Brouer adds bulking support to ndo_xdp_xmit. 4) Jiong Wang adds support for indirect and arithmetic shifts to NFP 5) Martin KaFai Lau cleans up BTF uapi and makes the btf_header extensible. 6) Mathieu Xhonneux adds an End.BPF action to seg6local with BPF helpers allowing to edit/grow/shrink a SRH and apply on a packet generic SRv6 actions. 7) Sandipan Das adds support for bpf2bpf function calls in ppc64 JIT. 8) Yonghong Song adds BPF_TASK_FD_QUERY command for introspection of tracing events. 9) other misc fixes from Gustavo A. R. Silva, Sirio Balmelli, John Fastabend, and Magnus Karlsson ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/task_fd_query_kern.c19
-rw-r--r--samples/bpf/task_fd_query_user.c382
-rw-r--r--samples/bpf/xdp_monitor_kern.c49
-rw-r--r--samples/bpf/xdp_monitor_user.c69
-rw-r--r--samples/bpf/xdpsock_user.c135
6 files changed, 599 insertions, 59 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 62a99ab680e3..1303af10e54d 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -51,6 +51,7 @@ hostprogs-y += cpustat
hostprogs-y += xdp_adjust_tail
hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
+hostprogs-y += task_fd_query
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := bpf_load.o xdpsock_user.o
xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
+task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -160,6 +162,7 @@ always += cpustat_kern.o
always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
always += xdp_fwd_kern.o
+always += task_fd_query_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
HOST_LOADLIBES += $(LIBBPF) -lelf
HOSTLOADLIBES_tracex4 += -lrt
diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
new file mode 100644
index 000000000000..f4b0a9ea674d
--- /dev/null
+++ b/samples/bpf/task_fd_query_kern.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("kprobe/blk_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kretprobe/blk_account_io_completion")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
new file mode 100644
index 000000000000..8381d792f138
--- /dev/null
+++ b/samples/bpf/task_fd_query_user.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+#define CHECK_PERROR_RET(condition) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("FAIL: %s:\n", __func__); \
+ perror(" "); \
+ return -1; \
+ } \
+})
+
+#define CHECK_AND_RET(condition) ({ \
+ int __ret = !!(condition); \
+ if (__ret) \
+ return -1; \
+})
+
+static __u64 ptr_to_u64(void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
+static int bpf_find_probe_type(const char *event_type)
+{
+ char buf[256];
+ int fd, ret;
+
+ ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ fd = open(buf, O_RDONLY);
+ CHECK_PERROR_RET(fd < 0);
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ errno = 0;
+ ret = (int)strtol(buf, NULL, 10);
+ CHECK_PERROR_RET(errno);
+ return ret;
+}
+
+#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
+static int bpf_get_retprobe_bit(const char *event_type)
+{
+ char buf[256];
+ int fd, ret;
+
+ ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ fd = open(buf, O_RDONLY);
+ CHECK_PERROR_RET(fd < 0);
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+ CHECK_PERROR_RET(strlen(buf) < strlen("config:"));
+
+ errno = 0;
+ ret = (int)strtol(buf + strlen("config:"), NULL, 10);
+ CHECK_PERROR_RET(errno);
+ return ret;
+}
+
+static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name,
+ __u32 expected_fd_type)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ char buf[256];
+ int err;
+
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len,
+ &prog_id, &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
+ __func__, prog_fd_idx, fn_name);
+ perror(" :");
+ return -1;
+ }
+ if (strcmp(buf, fn_name) != 0 ||
+ fd_type != expected_fd_type ||
+ probe_offset != 0x0 || probe_addr != 0x0) {
+ printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n",
+ prog_fd_idx);
+ printf("buf: %s, fd_type: %u, probe_offset: 0x%llx,"
+ " probe_addr: 0x%llx\n",
+ buf, fd_type, probe_offset, probe_addr);
+ return -1;
+ }
+ return 0;
+}
+
+static int test_nondebug_fs_kuprobe_common(const char *event_type,
+ const char *name, __u64 offset, __u64 addr, bool is_return,
+ char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+ __u64 *probe_offset, __u64 *probe_addr)
+{
+ int is_return_bit = bpf_get_retprobe_bit(event_type);
+ int type = bpf_find_probe_type(event_type);
+ struct perf_event_attr attr = {};
+ int fd;
+
+ if (type < 0 || is_return_bit < 0) {
+ printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
+ __func__, type, is_return_bit);
+ return -1;
+ }
+
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ if (is_return)
+ attr.config |= 1 << is_return_bit;
+
+ if (name) {
+ attr.config1 = ptr_to_u64((void *)name);
+ attr.config2 = offset;
+ } else {
+ attr.config1 = 0;
+ attr.config2 = addr;
+ }
+ attr.size = sizeof(attr);
+ attr.type = type;
+
+ fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+ CHECK_PERROR_RET(fd < 0);
+
+ CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0);
+ CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
+ CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
+ prog_id, fd_type, probe_offset, probe_addr) < 0);
+
+ return 0;
+}
+
+static int test_nondebug_fs_probe(const char *event_type, const char *name,
+ __u64 offset, __u64 addr, bool is_return,
+ __u32 expected_fd_type,
+ __u32 expected_ret_fd_type,
+ char *buf, __u32 buf_len)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 prog_id, fd_type;
+ int err;
+
+ err = test_nondebug_fs_kuprobe_common(event_type, name,
+ offset, addr, is_return,
+ buf, &buf_len, &prog_id,
+ &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, "
+ "for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n",
+ __func__, name ? name : "", offset, addr, is_return);
+ perror(" :");
+ return -1;
+ }
+ if ((is_return && fd_type != expected_ret_fd_type) ||
+ (!is_return && fd_type != expected_fd_type)) {
+ printf("FAIL: %s, incorrect fd_type %u\n",
+ __func__, fd_type);
+ return -1;
+ }
+ if (name) {
+ if (strcmp(name, buf) != 0) {
+ printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+ return -1;
+ }
+ if (probe_offset != offset) {
+ printf("FAIL: %s, incorrect probe_offset 0x%llx\n",
+ __func__, probe_offset);
+ return -1;
+ }
+ } else {
+ if (buf_len != 0) {
+ printf("FAIL: %s, incorrect buf %p\n",
+ __func__, buf);
+ return -1;
+ }
+
+ if (probe_addr != addr) {
+ printf("FAIL: %s, incorrect probe_addr 0x%llx\n",
+ __func__, probe_addr);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
+{
+ const char *event_type = "uprobe";
+ struct perf_event_attr attr = {};
+ char buf[256], event_alias[256];
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ int err, res, kfd, efd;
+ ssize_t bytes;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
+ event_type);
+ kfd = open(buf, O_WRONLY | O_APPEND, 0);
+ CHECK_PERROR_RET(kfd < 0);
+
+ res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
+ CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias));
+
+ res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx",
+ is_return ? 'r' : 'p', event_type, event_alias,
+ binary_path, offset);
+ CHECK_PERROR_RET(res < 0 || res >= sizeof(buf));
+ CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0);
+
+ close(kfd);
+ kfd = -1;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id",
+ event_type, event_alias);
+ efd = open(buf, O_RDONLY, 0);
+ CHECK_PERROR_RET(efd < 0);
+
+ bytes = read(efd, buf, sizeof(buf));
+ CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf));
+ close(efd);
+ buf[bytes] = '\0';
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ CHECK_PERROR_RET(kfd < 0);
+ CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
+ CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0);
+
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len,
+ &prog_id, &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, binary_path %s\n", __func__, binary_path);
+ perror(" :");
+ return -1;
+ }
+ if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) ||
+ (!is_return && fd_type != BPF_FD_TYPE_UPROBE)) {
+ printf("FAIL: %s, incorrect fd_type %u\n", __func__,
+ fd_type);
+ return -1;
+ }
+ if (strcmp(binary_path, buf) != 0) {
+ printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+ return -1;
+ }
+ if (probe_offset != offset) {
+ printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__,
+ probe_offset);
+ return -1;
+ }
+
+ close(kfd);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {1024*1024, RLIM_INFINITY};
+ extern char __executable_start;
+ char filename[256], buf[256];
+ __u64 uprobe_file_offset;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ if (load_kallsyms()) {
+ printf("failed to process /proc/kallsyms\n");
+ return 1;
+ }
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ /* test two functions in the corresponding *_kern.c file */
+ CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request",
+ BPF_FD_TYPE_KPROBE));
+ CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
+ BPF_FD_TYPE_KRETPROBE));
+
+ /* test nondebug fs kprobe */
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+ false, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+#ifdef __x86_64__
+ /* set a kprobe on "bpf_check + 0x5", which is x64 specific */
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0,
+ false, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+#endif
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+ true, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), false,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), false,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ NULL, 0));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), true,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), true,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ 0, 0));
+
+ /* test nondebug fs uprobe */
+ /* the calculation of uprobe file offset is based on gcc 7.3.1 on x64
+ * and the default linker script, which defines __executable_start as
+ * the start of the .text section. The calculation could be different
+ * on different systems with different compilers. The right way is
+ * to parse the ELF file. We took a shortcut here.
+ */
+ uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
+ CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+ uprobe_file_offset, 0x0, false,
+ BPF_FD_TYPE_UPROBE,
+ BPF_FD_TYPE_URETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+ uprobe_file_offset, 0x0, true,
+ BPF_FD_TYPE_UPROBE,
+ BPF_FD_TYPE_URETPROBE,
+ buf, sizeof(buf)));
+
+ /* test debug fs uprobe */
+ CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+ false));
+ CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+ true));
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 211db8ded0de..ad10fe700d7d 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -125,6 +125,7 @@ struct datarec {
u64 processed;
u64 dropped;
u64 info;
+ u64 err;
};
#define MAX_CPUS 64
@@ -208,3 +209,51 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
return 0;
}
+
+struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct devmap_xmit_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ u32 map_index; // offset:16; size:4; signed:0;
+ int drops; // offset:20; size:4; signed:1;
+ int sent; // offset:24; size:4; signed:1;
+ int from_ifindex; // offset:28; size:4; signed:1;
+ int to_ifindex; // offset:32; size:4; signed:1;
+ int err; // offset:36; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_devmap_xmit")
+int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
+{
+ struct datarec *rec;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->sent;
+ rec->dropped += ctx->drops;
+
+ /* Record bulk events, then userspace can calc average bulk size */
+ rec->info += 1;
+
+ /* Record error cases, where no frame were sent */
+ if (ctx->err)
+ rec->err++;
+
+ /* Catch API error of drv ndo_xdp_xmit sent more than count */
+ if (ctx->drops < 0)
+ rec->err++;
+
+ return 1;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index bf09b5188acd..dd558cbb2309 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -117,6 +117,7 @@ struct datarec {
__u64 processed;
__u64 dropped;
__u64 info;
+ __u64 err;
};
#define MAX_CPUS 64
@@ -141,6 +142,7 @@ struct stats_record {
struct record_u64 xdp_exception[XDP_ACTION_MAX];
struct record xdp_cpumap_kthread;
struct record xdp_cpumap_enqueue[MAX_CPUS];
+ struct record xdp_devmap_xmit;
};
static bool map_collect_record(int fd, __u32 key, struct record *rec)
@@ -151,6 +153,7 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
__u64 sum_processed = 0;
__u64 sum_dropped = 0;
__u64 sum_info = 0;
+ __u64 sum_err = 0;
int i;
if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
@@ -169,10 +172,13 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
sum_dropped += values[i].dropped;
rec->cpu[i].info = values[i].info;
sum_info += values[i].info;
+ rec->cpu[i].err = values[i].err;
+ sum_err += values[i].err;
}
rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped;
rec->total.info = sum_info;
+ rec->total.err = sum_err;
return true;
}
@@ -273,6 +279,18 @@ static double calc_info(struct datarec *r, struct datarec *p, double period)
return pps;
}
+static double calc_err(struct datarec *r, struct datarec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->err - p->err;
+ pps = packets / period;
+ }
+ return pps;
+}
+
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
bool err_only)
@@ -397,7 +415,7 @@ static void stats_print(struct stats_record *stats_rec,
info = calc_info(r, p, t);
if (info > 0)
i_str = "sched";
- if (pps > 0)
+ if (pps > 0 || drop > 0)
printf(fmt1, "cpumap-kthread",
i, pps, drop, info, i_str);
}
@@ -409,6 +427,50 @@ static void stats_print(struct stats_record *stats_rec,
printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
}
+ /* devmap ndo_xdp_xmit stats */
+ {
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
+ struct record *rec, *prev;
+ double drop, info, err;
+ char *i_str = "";
+ char *err_str = "";
+
+ rec = &stats_rec->xdp_devmap_xmit;
+ prev = &stats_prev->xdp_devmap_xmit;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop(r, p, t);
+ info = calc_info(r, p, t);
+ err = calc_err(r, p, t);
+ if (info > 0) {
+ i_str = "bulk-average";
+ info = (pps+drop) / info; /* calc avg bulk */
+ }
+ if (err > 0)
+ err_str = "drv-err";
+ if (pps > 0 || drop > 0)
+ printf(fmt1, "devmap-xmit",
+ i, pps, drop, info, i_str, err_str);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop(&rec->total, &prev->total, t);
+ info = calc_info(&rec->total, &prev->total, t);
+ err = calc_err(&rec->total, &prev->total, t);
+ if (info > 0) {
+ i_str = "bulk-average";
+ info = (pps+drop) / info; /* calc avg bulk */
+ }
+ if (err > 0)
+ err_str = "drv-err";
+ printf(fmt2, "devmap-xmit", "total", pps, drop,
+ info, i_str, err_str);
+ }
+
printf("\n");
}
@@ -437,6 +499,9 @@ static bool stats_collect(struct stats_record *rec)
fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
+ fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
+ map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
+
return true;
}
@@ -480,6 +545,7 @@ static struct stats_record *alloc_stats_record(void)
rec_sz = sizeof(struct datarec);
rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
+ rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
for (i = 0; i < MAX_CPUS; i++)
rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
@@ -498,6 +564,7 @@ static void free_stats_record(struct stats_record *r)
free(r->xdp_exception[i].cpu);
free(r->xdp_cpumap_kthread.cpu);
+ free(r->xdp_devmap_xmit.cpu);
for (i = 0; i < MAX_CPUS; i++)
free(r->xdp_cpumap_enqueue[i].cpu);
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 7fe60f6f7d53..e379eac034ac 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1,15 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2017 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
+/* Copyright(c) 2017 - 2018 Intel Corporation. */
#include <assert.h>
#include <errno.h>
@@ -89,7 +79,10 @@ struct xdp_umem_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
- struct xdp_umem_ring *ring;
+ u32 *producer;
+ u32 *consumer;
+ u32 *ring;
+ void *map;
};
struct xdp_umem {
@@ -104,7 +97,10 @@ struct xdp_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
- struct xdp_rxtx_ring *ring;
+ u32 *producer;
+ u32 *consumer;
+ struct xdp_desc *ring;
+ void *map;
};
struct xdpsock {
@@ -165,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
return free_entries;
/* Refresh the local tail pointer */
- q->cached_cons = q->ring->ptrs.consumer;
+ q->cached_cons = *q->consumer;
return q->size - (q->cached_prod - q->cached_cons);
}
@@ -178,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
return free_entries;
/* Refresh the local tail pointer */
- q->cached_cons = q->ring->ptrs.consumer + q->size;
+ q->cached_cons = *q->consumer + q->size;
return q->cached_cons - q->cached_prod;
}
@@ -187,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
- q->cached_prod = q->ring->ptrs.producer;
+ q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@@ -199,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
- q->cached_prod = q->ring->ptrs.producer;
+ q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@@ -218,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
- fq->ring->desc[idx] = d[i].idx;
+ fq->ring[idx] = d[i].idx;
}
u_smp_wmb();
- fq->ring->ptrs.producer = fq->cached_prod;
+ *fq->producer = fq->cached_prod;
return 0;
}
@@ -239,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
- fq->ring->desc[idx] = d[i];
+ fq->ring[idx] = d[i];
}
u_smp_wmb();
- fq->ring->ptrs.producer = fq->cached_prod;
+ *fq->producer = fq->cached_prod;
return 0;
}
@@ -258,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
for (i = 0; i < entries; i++) {
idx = cq->cached_cons++ & cq->mask;
- d[i] = cq->ring->desc[idx];
+ d[i] = cq->ring[idx];
}
if (entries > 0) {
u_smp_wmb();
- cq->ring->ptrs.consumer = cq->cached_cons;
+ *cq->consumer = cq->cached_cons;
}
return entries;
@@ -280,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq,
const struct xdp_desc *descs,
unsigned int ndescs)
{
- struct xdp_rxtx_ring *r = uq->ring;
+ struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@@ -289,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
- r->desc[idx].idx = descs[i].idx;
- r->desc[idx].len = descs[i].len;
- r->desc[idx].offset = descs[i].offset;
+ r[idx].idx = descs[i].idx;
+ r[idx].len = descs[i].len;
+ r[idx].offset = descs[i].offset;
}
u_smp_wmb();
- r->ptrs.producer = uq->cached_prod;
+ *uq->producer = uq->cached_prod;
return 0;
}
static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
__u32 idx, unsigned int ndescs)
{
- struct xdp_rxtx_ring *q = uq->ring;
+ struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@@ -312,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
- q->desc[idx].idx = idx + i;
- q->desc[idx].len = sizeof(pkt_data) - 1;
- q->desc[idx].offset = 0;
+ r[idx].idx = idx + i;
+ r[idx].len = sizeof(pkt_data) - 1;
+ r[idx].offset = 0;
}
u_smp_wmb();
- q->ptrs.producer = uq->cached_prod;
+ *uq->producer = uq->cached_prod;
return 0;
}
@@ -327,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq,
struct xdp_desc *descs,
int ndescs)
{
- struct xdp_rxtx_ring *r = uq->ring;
+ struct xdp_desc *r = uq->ring;
unsigned int idx;
int i, entries;
@@ -337,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq,
for (i = 0; i < entries; i++) {
idx = uq->cached_cons++ & uq->mask;
- descs[i] = r->desc[idx];
+ descs[i] = r[idx];
}
if (entries > 0) {
u_smp_wmb();
- r->ptrs.consumer = uq->cached_cons;
+ *uq->consumer = uq->cached_cons;
}
return entries;
@@ -402,8 +398,10 @@ static size_t gen_eth_frame(char *frame)
static struct xdp_umem *xdp_umem_configure(int sfd)
{
int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
+ struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
struct xdp_umem *umem;
+ socklen_t optlen;
void *bufs;
umem = calloc(1, sizeof(*umem));
@@ -423,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
sizeof(int)) == 0);
- umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
- FQ_NUM_DESCS * sizeof(u32),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, sfd,
- XDP_UMEM_PGOFF_FILL_RING);
- lassert(umem->fq.ring != MAP_FAILED);
+ optlen = sizeof(off);
+ lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
+ &optlen) == 0);
+
+ umem->fq.map = mmap(0, off.fr.desc +
+ FQ_NUM_DESCS * sizeof(u32),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, sfd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ lassert(umem->fq.map != MAP_FAILED);
umem->fq.mask = FQ_NUM_DESCS - 1;
umem->fq.size = FQ_NUM_DESCS;
+ umem->fq.producer = umem->fq.map + off.fr.producer;
+ umem->fq.consumer = umem->fq.map + off.fr.consumer;
+ umem->fq.ring = umem->fq.map + off.fr.desc;
- umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
+ umem->cq.map = mmap(0, off.cr.desc +
CQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_COMPLETION_RING);
- lassert(umem->cq.ring != MAP_FAILED);
+ lassert(umem->cq.map != MAP_FAILED);
umem->cq.mask = CQ_NUM_DESCS - 1;
umem->cq.size = CQ_NUM_DESCS;
+ umem->cq.producer = umem->cq.map + off.cr.producer;
+ umem->cq.consumer = umem->cq.map + off.cr.consumer;
+ umem->cq.ring = umem->cq.map + off.cr.desc;
umem->frames = (char (*)[FRAME_SIZE])bufs;
umem->fd = sfd;
@@ -459,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
static struct xdpsock *xsk_configure(struct xdp_umem *umem)
{
struct sockaddr_xdp sxdp = {};
+ struct xdp_mmap_offsets off;
int sfd, ndescs = NUM_DESCS;
struct xdpsock *xsk;
bool shared = true;
+ socklen_t optlen;
u32 i;
sfd = socket(PF_XDP, SOCK_RAW, 0);
@@ -484,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
&ndescs, sizeof(int)) == 0);
lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
&ndescs, sizeof(int)) == 0);
+ optlen = sizeof(off);
+ lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
+ &optlen) == 0);
/* Rx */
- xsk->rx.ring = mmap(NULL,
- sizeof(struct xdp_ring) +
- NUM_DESCS * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, sfd,
- XDP_PGOFF_RX_RING);
- lassert(xsk->rx.ring != MAP_FAILED);
+ xsk->rx.map = mmap(NULL,
+ off.rx.desc +
+ NUM_DESCS * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, sfd,
+ XDP_PGOFF_RX_RING);
+ lassert(xsk->rx.map != MAP_FAILED);
if (!shared) {
for (i = 0; i < NUM_DESCS / 2; i++)
@@ -501,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
}
/* Tx */
- xsk->tx.ring = mmap(NULL,
- sizeof(struct xdp_ring) +
- NUM_DESCS * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, sfd,
- XDP_PGOFF_TX_RING);
- lassert(xsk->tx.ring != MAP_FAILED);
+ xsk->tx.map = mmap(NULL,
+ off.tx.desc +
+ NUM_DESCS * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, sfd,
+ XDP_PGOFF_TX_RING);
+ lassert(xsk->tx.map != MAP_FAILED);
xsk->rx.mask = NUM_DESCS - 1;
xsk->rx.size = NUM_DESCS;
+ xsk->rx.producer = xsk->rx.map + off.rx.producer;
+ xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
+ xsk->rx.ring = xsk->rx.map + off.rx.desc;
xsk->tx.mask = NUM_DESCS - 1;
xsk->tx.size = NUM_DESCS;
+ xsk->tx.producer = xsk->tx.map + off.tx.producer;
+ xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
+ xsk->tx.ring = xsk->tx.map + off.tx.desc;
sxdp.sxdp_family = PF_XDP;
sxdp.sxdp_ifindex = opt_ifindex;