summaryrefslogtreecommitdiff
path: root/samples
diff options
context:
space:
mode:
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/Makefile14
-rw-r--r--samples/bpf/bpf_helpers.h10
-rw-r--r--samples/bpf/bpf_load.c57
-rw-r--r--samples/bpf/lathist_kern.c99
-rw-r--r--samples/bpf/lathist_user.c103
-rw-r--r--samples/bpf/sockex3_kern.c290
-rw-r--r--samples/bpf/sockex3_user.c66
-rw-r--r--samples/bpf/tcbpf1_kern.c8
-rw-r--r--samples/bpf/test_verifier.c84
-rw-r--r--samples/bpf/tracex2_kern.c24
-rw-r--r--samples/bpf/tracex2_user.c67
-rw-r--r--samples/bpf/tracex5_kern.c75
-rw-r--r--samples/bpf/tracex5_user.c46
-rw-r--r--samples/pktgen/README.rst43
-rw-r--r--samples/pktgen/functions.sh121
-rw-r--r--samples/pktgen/parameters.sh97
-rwxr-xr-xsamples/pktgen/pktgen.conf-1-159
-rwxr-xr-xsamples/pktgen/pktgen.conf-2-166
-rwxr-xr-xsamples/pktgen/pktgen.conf-2-273
-rwxr-xr-xsamples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh86
-rwxr-xr-xsamples/pktgen/pktgen_sample01_simple.sh71
-rwxr-xr-xsamples/pktgen/pktgen_sample02_multiqueue.sh75
-rwxr-xr-xsamples/pktgen/pktgen_sample03_burst_single_flow.sh82
23 files changed, 1485 insertions, 231 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 76e3458a5419..4450fed91ab4 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -6,45 +6,57 @@ hostprogs-y := test_verifier test_maps
hostprogs-y += sock_example
hostprogs-y += sockex1
hostprogs-y += sockex2
+hostprogs-y += sockex3
hostprogs-y += tracex1
hostprogs-y += tracex2
hostprogs-y += tracex3
hostprogs-y += tracex4
+hostprogs-y += tracex5
+hostprogs-y += lathist
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
sock_example-objs := sock_example.o libbpf.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
+sockex3-objs := bpf_load.o libbpf.o sockex3_user.o
tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
+tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
+lathist-objs := bpf_load.o libbpf.o lathist_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
always += sockex1_kern.o
always += sockex2_kern.o
+always += sockex3_kern.o
always += tracex1_kern.o
always += tracex2_kern.o
always += tracex3_kern.o
always += tracex4_kern.o
+always += tracex5_kern.o
always += tcbpf1_kern.o
+always += lathist_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
HOSTLOADLIBES_sockex1 += -lelf
HOSTLOADLIBES_sockex2 += -lelf
+HOSTLOADLIBES_sockex3 += -lelf
HOSTLOADLIBES_tracex1 += -lelf
HOSTLOADLIBES_tracex2 += -lelf
HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt
+HOSTLOADLIBES_tracex5 += -lelf
+HOSTLOADLIBES_lathist += -lelf
# point this to your LLVM backend with bpf support
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
-%.o: %.c
+$(obj)/%.o: $(src)/%.c
clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index f960b5fb3ed8..bdf1c1607b80 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -21,6 +21,16 @@ static unsigned long long (*bpf_ktime_get_ns)(void) =
(void *) BPF_FUNC_ktime_get_ns;
static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+ (void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+ (void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+ (void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_comm;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 38dac5a53b51..da86a8e0a95a 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -16,6 +16,7 @@
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <poll.h>
+#include <ctype.h>
#include "libbpf.h"
#include "bpf_helpers.h"
#include "bpf_load.h"
@@ -29,6 +30,19 @@ int map_fd[MAX_MAPS];
int prog_fd[MAX_PROGS];
int event_fd[MAX_PROGS];
int prog_cnt;
+int prog_array_fd = -1;
+
+static int populate_prog_array(const char *event, int prog_fd)
+{
+ int ind = atoi(event), err;
+
+ err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
+ if (err < 0) {
+ printf("failed to store prog_fd in prog_array\n");
+ return -1;
+ }
+ return 0;
+}
static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
{
@@ -54,12 +68,40 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return -1;
}
+ fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
+ if (fd < 0) {
+ printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
+ return -1;
+ }
+
+ prog_fd[prog_cnt++] = fd;
+
+ if (is_socket) {
+ event += 6;
+ if (*event != '/')
+ return 0;
+ event++;
+ if (!isdigit(*event)) {
+ printf("invalid prog number\n");
+ return -1;
+ }
+ return populate_prog_array(event, fd);
+ }
+
if (is_kprobe || is_kretprobe) {
if (is_kprobe)
event += 7;
else
event += 10;
+ if (*event == 0) {
+ printf("event name cannot be empty\n");
+ return -1;
+ }
+
+ if (isdigit(*event))
+ return populate_prog_array(event, fd);
+
snprintf(buf, sizeof(buf),
"echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
is_kprobe ? 'p' : 'r', event, event);
@@ -71,18 +113,6 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
}
}
- fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
-
- if (fd < 0) {
- printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
- return -1;
- }
-
- prog_fd[prog_cnt++] = fd;
-
- if (is_socket)
- return 0;
-
strcpy(buf, DEBUGFS);
strcat(buf, "events/kprobes/");
strcat(buf, event);
@@ -130,6 +160,9 @@ static int load_maps(struct bpf_map_def *maps, int len)
maps[i].max_entries);
if (map_fd[i] < 0)
return 1;
+
+ if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ prog_array_fd = map_fd[i];
}
return 0;
}
diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c
new file mode 100644
index 000000000000..18fa088473cd
--- /dev/null
+++ b/samples/bpf/lathist_kern.c
@@ -0,0 +1,99 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2015 BMW Car IT GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define MAX_ENTRIES 20
+#define MAX_CPU 4
+
+/* We need to stick to static allocated memory (an array instead of
+ * hash table) because managing dynamic memory from the
+ * trace_preempt_[on|off] tracepoints hooks is not supported.
+ */
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU,
+};
+
+SEC("kprobe/trace_preempt_off")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ int cpu = bpf_get_smp_processor_id();
+ u64 *ts = bpf_map_lookup_elem(&my_map, &cpu);
+
+ if (ts)
+ *ts = bpf_ktime_get_ns();
+
+ return 0;
+}
+
+static unsigned int log2(unsigned int v)
+{
+ unsigned int r;
+ unsigned int shift;
+
+ r = (v > 0xFFFF) << 4; v >>= r;
+ shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+ shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+ shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+ r |= (v >> 1);
+
+ return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+ unsigned int hi = v >> 32;
+
+ if (hi)
+ return log2(hi) + 32;
+ else
+ return log2(v);
+}
+
+struct bpf_map_def SEC("maps") my_lat = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(long),
+ .max_entries = MAX_CPU * MAX_ENTRIES,
+};
+
+SEC("kprobe/trace_preempt_on")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ u64 *ts, cur_ts, delta;
+ int key, cpu;
+ long *val;
+
+ cpu = bpf_get_smp_processor_id();
+ ts = bpf_map_lookup_elem(&my_map, &cpu);
+ if (!ts)
+ return 0;
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = log2l(cur_ts - *ts);
+
+ if (delta > MAX_ENTRIES - 1)
+ delta = MAX_ENTRIES - 1;
+
+ key = cpu * MAX_ENTRIES + delta;
+ val = bpf_map_lookup_elem(&my_lat, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, 1);
+
+ return 0;
+
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
new file mode 100644
index 000000000000..65da8c1576de
--- /dev/null
+++ b/samples/bpf/lathist_user.c
@@ -0,0 +1,103 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2015 BMW Car IT GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_ENTRIES 20
+#define MAX_CPU 4
+#define MAX_STARS 40
+
+struct cpu_hist {
+ long data[MAX_ENTRIES];
+ long max;
+};
+
+static struct cpu_hist cpu_hist[MAX_CPU];
+
+static void stars(char *str, long val, long max, int width)
+{
+ int i;
+
+ for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
+ str[i] = '*';
+ if (val > max)
+ str[i - 1] = '+';
+ str[i] = '\0';
+}
+
+static void print_hist(void)
+{
+ char starstr[MAX_STARS];
+ struct cpu_hist *hist;
+ int i, j;
+
+ /* clear screen */
+ printf("\033[2J");
+
+ for (j = 0; j < MAX_CPU; j++) {
+ hist = &cpu_hist[j];
+
+ /* ignore CPUs without data (maybe offline?) */
+ if (hist->max == 0)
+ continue;
+
+ printf("CPU %d\n", j);
+ printf(" latency : count distribution\n");
+ for (i = 1; i <= MAX_ENTRIES; i++) {
+ stars(starstr, hist->data[i - 1], hist->max, MAX_STARS);
+ printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
+ (1l << i) >> 1, (1l << i) - 1,
+ hist->data[i - 1], MAX_STARS, starstr);
+ }
+ }
+}
+
+static void get_data(int fd)
+{
+ long key, value;
+ int c, i;
+
+ for (i = 0; i < MAX_CPU; i++)
+ cpu_hist[i].max = 0;
+
+ for (c = 0; c < MAX_CPU; c++) {
+ for (i = 0; i < MAX_ENTRIES; i++) {
+ key = c * MAX_ENTRIES + i;
+ bpf_lookup_elem(fd, &key, &value);
+
+ cpu_hist[c].data[i] = value;
+ if (value > cpu_hist[c].max)
+ cpu_hist[c].max = value;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ while (1) {
+ get_data(map_fd[1]);
+ print_hist();
+ sleep(5);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
new file mode 100644
index 000000000000..41ae2fd21b13
--- /dev/null
+++ b/samples/bpf/sockex3_kern.c
@@ -0,0 +1,290 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/in.h>
+#include <uapi/linux/if.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/if_tunnel.h>
+#include <uapi/linux/mpls.h>
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+
+#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
+
+struct bpf_map_def SEC("maps") jmp_table = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 8,
+};
+
+#define PARSE_VLAN 1
+#define PARSE_MPLS 2
+#define PARSE_IP 3
+#define PARSE_IPV6 4
+
+/* protocol dispatch routine.
+ * It tail-calls next BPF program depending on eth proto
+ * Note, we could have used:
+ * bpf_tail_call(skb, &jmp_table, proto);
+ * but it would need large prog_array
+ */
+static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
+{
+ switch (proto) {
+ case ETH_P_8021Q:
+ case ETH_P_8021AD:
+ bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
+ break;
+ case ETH_P_MPLS_UC:
+ case ETH_P_MPLS_MC:
+ bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
+ break;
+ case ETH_P_IP:
+ bpf_tail_call(skb, &jmp_table, PARSE_IP);
+ break;
+ case ETH_P_IPV6:
+ bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
+ break;
+ }
+}
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
+{
+ return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
+ & (IP_MF | IP_OFFSET);
+}
+
+static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
+{
+ __u64 w0 = load_word(ctx, off);
+ __u64 w1 = load_word(ctx, off + 4);
+ __u64 w2 = load_word(ctx, off + 8);
+ __u64 w3 = load_word(ctx, off + 12);
+
+ return (__u32)(w0 ^ w1 ^ w2 ^ w3);
+}
+
+struct globals {
+ struct flow_keys flow;
+};
+
+struct bpf_map_def SEC("maps") percpu_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct globals),
+ .max_entries = 32,
+};
+
+/* user poor man's per_cpu until native support is ready */
+static struct globals *this_cpu_globals(void)
+{
+ u32 key = bpf_get_smp_processor_id();
+
+ return bpf_map_lookup_elem(&percpu_map, &key);
+}
+
+/* some simple stats for user space consumption */
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct flow_keys),
+ .value_size = sizeof(struct pair),
+ .max_entries = 1024,
+};
+
+static void update_stats(struct __sk_buff *skb, struct globals *g)
+{
+ struct flow_keys key = g->flow;
+ struct pair *value;
+
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value) {
+ __sync_fetch_and_add(&value->packets, 1);
+ __sync_fetch_and_add(&value->bytes, skb->len);
+ } else {
+ struct pair val = {1, skb->len};
+
+ bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
+ }
+}
+
+static __always_inline void parse_ip_proto(struct __sk_buff *skb,
+ struct globals *g, __u32 ip_proto)
+{
+ __u32 nhoff = skb->cb[0];
+ int poff;
+
+ switch (ip_proto) {
+ case IPPROTO_GRE: {
+ struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+ };
+
+ __u32 gre_flags = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, flags));
+ __u32 gre_proto = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, proto));
+
+ if (gre_flags & (GRE_VERSION|GRE_ROUTING))
+ break;
+
+ nhoff += 4;
+ if (gre_flags & GRE_CSUM)
+ nhoff += 4;
+ if (gre_flags & GRE_KEY)
+ nhoff += 4;
+ if (gre_flags & GRE_SEQ)
+ nhoff += 4;
+
+ skb->cb[0] = nhoff;
+ parse_eth_proto(skb, gre_proto);
+ break;
+ }
+ case IPPROTO_IPIP:
+ parse_eth_proto(skb, ETH_P_IP);
+ break;
+ case IPPROTO_IPV6:
+ parse_eth_proto(skb, ETH_P_IPV6);
+ break;
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ g->flow.ports = load_word(skb, nhoff);
+ case IPPROTO_ICMP:
+ g->flow.ip_proto = ip_proto;
+ update_stats(skb, g);
+ break;
+ default:
+ break;
+ }
+}
+
+PROG(PARSE_IP)(struct __sk_buff *skb)
+{
+ struct globals *g = this_cpu_globals();
+ __u32 nhoff, verlen, ip_proto;
+
+ if (!g)
+ return 0;
+
+ nhoff = skb->cb[0];
+
+ if (unlikely(ip_is_fragment(skb, nhoff)))
+ return 0;
+
+ ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
+
+ if (ip_proto != IPPROTO_GRE) {
+ g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
+ g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
+ }
+
+ verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
+ nhoff += (verlen & 0xF) << 2;
+
+ skb->cb[0] = nhoff;
+ parse_ip_proto(skb, g, ip_proto);
+ return 0;
+}
+
+PROG(PARSE_IPV6)(struct __sk_buff *skb)
+{
+ struct globals *g = this_cpu_globals();
+ __u32 nhoff, ip_proto;
+
+ if (!g)
+ return 0;
+
+ nhoff = skb->cb[0];
+
+ ip_proto = load_byte(skb,
+ nhoff + offsetof(struct ipv6hdr, nexthdr));
+ g->flow.src = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, saddr));
+ g->flow.dst = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, daddr));
+ nhoff += sizeof(struct ipv6hdr);
+
+ skb->cb[0] = nhoff;
+ parse_ip_proto(skb, g, ip_proto);
+ return 0;
+}
+
+PROG(PARSE_VLAN)(struct __sk_buff *skb)
+{
+ __u32 nhoff, proto;
+
+ nhoff = skb->cb[0];
+
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ skb->cb[0] = nhoff;
+
+ parse_eth_proto(skb, proto);
+
+ return 0;
+}
+
+PROG(PARSE_MPLS)(struct __sk_buff *skb)
+{
+ __u32 nhoff, label;
+
+ nhoff = skb->cb[0];
+
+ label = load_word(skb, nhoff);
+ nhoff += sizeof(struct mpls_label);
+ skb->cb[0] = nhoff;
+
+ if (label & MPLS_LS_S_MASK) {
+ __u8 verlen = load_byte(skb, nhoff);
+ if ((verlen & 0xF0) == 4)
+ parse_eth_proto(skb, ETH_P_IP);
+ else
+ parse_eth_proto(skb, ETH_P_IPV6);
+ } else {
+ parse_eth_proto(skb, ETH_P_MPLS_UC);
+ }
+
+ return 0;
+}
+
+SEC("socket/0")
+int main_prog(struct __sk_buff *skb)
+{
+ __u32 nhoff = ETH_HLEN;
+ __u32 proto = load_half(skb, 12);
+
+ skb->cb[0] = nhoff;
+ parse_eth_proto(skb, proto);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
new file mode 100644
index 000000000000..2617772d060d
--- /dev/null
+++ b/samples/bpf/sockex3_user.c
@@ -0,0 +1,66 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
+ sizeof(__u32)) == 0);
+
+ if (argc > 1)
+ f = popen("ping -c5 localhost", "r");
+ else
+ f = popen("netperf -l 4 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ struct flow_keys key = {}, next_key;
+ struct pair value;
+
+ sleep(1);
+ printf("IP src.port -> dst.port bytes packets\n");
+ while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[2], &next_key, &value);
+ printf("%s.%05d -> %s.%05d %12lld %12lld\n",
+ inet_ntoa((struct in_addr){htonl(next_key.src)}),
+ next_key.port16[0],
+ inet_ntoa((struct in_addr){htonl(next_key.dst)}),
+ next_key.port16[1],
+ value.bytes, value.packets);
+ key = next_key;
+ }
+ }
+ return 0;
+}
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 7c27710f8296..9bfb2eb34563 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -21,7 +21,7 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
{
- __u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF);
+ __u8 old_tos = load_byte(skb, TOS_OFF);
bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
@@ -34,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
{
- __u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF));
+ __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
@@ -44,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
{
- __u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF));
+ __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
@@ -53,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
SEC("classifier")
int bpf_prog1(struct __sk_buff *skb)
{
- __u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol));
+ __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
long *value;
if (proto == IPPROTO_TCP) {
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index 12f3780af73f..693605997abc 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -29,6 +29,7 @@ struct bpf_test {
ACCEPT,
REJECT
} result;
+ enum bpf_prog_type prog_type;
};
static struct bpf_test tests[] = {
@@ -743,6 +744,84 @@ static struct bpf_test tests[] = {
.errstr = "different pointers",
.result = REJECT,
},
+ {
+ "check skb->mark is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check skb->tc_index is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check non-u32 access to cb",
+ .insns = {
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check out of range skb->cb access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[60])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ },
+ {
+ "write skb fields from socket prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "write skb fields from tc_cls_act prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
};
static int probe_filter_length(struct bpf_insn *fp)
@@ -775,6 +854,7 @@ static int test(void)
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_insn *prog = tests[i].insns;
+ int prog_type = tests[i].prog_type;
int prog_len = probe_filter_length(prog);
int *fixup = tests[i].fixup;
int map_fd = -1;
@@ -789,8 +869,8 @@ static int test(void)
}
printf("#%d %s ", i, tests[i].descr);
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog,
- prog_len * sizeof(struct bpf_insn),
+ prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
+ prog, prog_len * sizeof(struct bpf_insn),
"GPL", 0);
if (tests[i].result == ACCEPT) {
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index 19ec1cfc45db..dc50f4f2943f 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -62,11 +62,18 @@ static unsigned int log2l(unsigned long v)
return log2(v);
}
+struct hist_key {
+ char comm[16];
+ u64 pid_tgid;
+ u64 uid_gid;
+ u32 index;
+};
+
struct bpf_map_def SEC("maps") my_hist_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(u32),
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct hist_key),
.value_size = sizeof(long),
- .max_entries = 64,
+ .max_entries = 1024,
};
SEC("kprobe/sys_write")
@@ -75,11 +82,18 @@ int bpf_prog3(struct pt_regs *ctx)
long write_size = ctx->dx; /* arg3 */
long init_val = 1;
long *value;
- u32 index = log2l(write_size);
+ struct hist_key key = {};
+
+ key.index = log2l(write_size);
+ key.pid_tgid = bpf_get_current_pid_tgid();
+ key.uid_gid = bpf_get_current_uid_gid();
+ bpf_get_current_comm(&key.comm, sizeof(key.comm));
- value = bpf_map_lookup_elem(&my_hist_map, &index);
+ value = bpf_map_lookup_elem(&my_hist_map, &key);
if (value)
__sync_fetch_and_add(value, 1);
+ else
+ bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
return 0;
}
char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index 91b8d0896fbb..cd0241c1447a 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -3,6 +3,7 @@
#include <stdlib.h>
#include <signal.h>
#include <linux/bpf.h>
+#include <string.h>
#include "libbpf.h"
#include "bpf_load.h"
@@ -20,23 +21,42 @@ static void stars(char *str, long val, long max, int width)
str[i] = '\0';
}
-static void print_hist(int fd)
+struct task {
+ char comm[16];
+ __u64 pid_tgid;
+ __u64 uid_gid;
+};
+
+struct hist_key {
+ struct task t;
+ __u32 index;
+};
+
+#define SIZE sizeof(struct task)
+
+static void print_hist_for_pid(int fd, void *task)
{
- int key;
+ struct hist_key key = {}, next_key;
+ char starstr[MAX_STARS];
long value;
long data[MAX_INDEX] = {};
- char starstr[MAX_STARS];
- int i;
int max_ind = -1;
long max_value = 0;
+ int i, ind;
- for (key = 0; key < MAX_INDEX; key++) {
- bpf_lookup_elem(fd, &key, &value);
- data[key] = value;
- if (value && key > max_ind)
- max_ind = key;
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ if (memcmp(&next_key, task, SIZE)) {
+ key = next_key;
+ continue;
+ }
+ bpf_lookup_elem(fd, &next_key, &value);
+ ind = next_key.index;
+ data[ind] = value;
+ if (value && ind > max_ind)
+ max_ind = ind;
if (value > max_value)
max_value = value;
+ key = next_key;
}
printf(" syscall write() stats\n");
@@ -48,6 +68,35 @@ static void print_hist(int fd)
MAX_STARS, starstr);
}
}
+
+static void print_hist(int fd)
+{
+ struct hist_key key = {}, next_key;
+ static struct task tasks[1024];
+ int task_cnt = 0;
+ int i;
+
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ int found = 0;
+
+ for (i = 0; i < task_cnt; i++)
+ if (memcmp(&tasks[i], &next_key, SIZE) == 0)
+ found = 1;
+ if (!found)
+ memcpy(&tasks[task_cnt++], &next_key, SIZE);
+ key = next_key;
+ }
+
+ for (i = 0; i < task_cnt; i++) {
+ printf("\npid %d cmd %s uid %d\n",
+ (__u32) tasks[i].pid_tgid,
+ tasks[i].comm,
+ (__u32) tasks[i].uid_gid);
+ print_hist_for_pid(fd, &tasks[i]);
+ }
+
+}
+
static void int_exit(int sig)
{
print_hist(map_fd[1]);
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
new file mode 100644
index 000000000000..b71fe07a7a7a
--- /dev/null
+++ b/samples/bpf/tracex5_kern.c
@@ -0,0 +1,75 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/seccomp.h>
+#include "bpf_helpers.h"
+
+#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
+
+struct bpf_map_def SEC("maps") progs = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 1024,
+};
+
+SEC("kprobe/seccomp_phase1")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
+
+ /* dispatch into next BPF program depending on syscall number */
+ bpf_tail_call(ctx, &progs, sd.nr);
+
+ /* fall through -> unknown syscall */
+ if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) {
+ char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n";
+ bpf_trace_printk(fmt, sizeof(fmt), sd.nr);
+ }
+ return 0;
+}
+
+/* we jump here when syscall number == __NR_write */
+PROG(__NR_write)(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
+ if (sd.args[2] == 512) {
+ char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
+ bpf_trace_printk(fmt, sizeof(fmt),
+ sd.args[0], sd.args[1], sd.args[2]);
+ }
+ return 0;
+}
+
+PROG(__NR_read)(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
+ if (sd.args[2] > 128 && sd.args[2] <= 1024) {
+ char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
+ bpf_trace_printk(fmt, sizeof(fmt),
+ sd.args[0], sd.args[1], sd.args[2]);
+ }
+ return 0;
+}
+
+PROG(__NR_mmap)(struct pt_regs *ctx)
+{
+ char fmt[] = "mmap\n";
+ bpf_trace_printk(fmt, sizeof(fmt));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
new file mode 100644
index 000000000000..a04dd3cd4358
--- /dev/null
+++ b/samples/bpf/tracex5_user.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+/* install fake seccomp program to enable seccomp code path inside the kernel,
+ * so that our kprobe attached to seccomp_phase1() can be triggered
+ */
+static void install_accept_all_seccomp(void)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ .filter = filter,
+ };
+ if (prctl(PR_SET_SECCOMP, 2, &prog))
+ perror("prctl");
+}
+
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ install_accept_all_seccomp();
+
+ f = popen("dd if=/dev/zero of=/dev/null count=5", "r");
+ (void) f;
+
+ read_trace_pipe();
+
+ return 0;
+}
diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
new file mode 100644
index 000000000000..8365c4e5c513
--- /dev/null
+++ b/samples/pktgen/README.rst
@@ -0,0 +1,43 @@
+Sample and benchmark scripts for pktgen (packet generator)
+==========================================================
+This directory contains some pktgen sample and benchmark scripts, that
+can easily be copied and adjusted for your own use-case.
+
+General doc is located in kernel: Documentation/networking/pktgen.txt
+
+Helper include files
+====================
+This directory contains two helper shell files, that can be "included"
+by shell source'ing. Namely "functions.sh" and "parameters.sh".
+
+Common parameters
+-----------------
+The parameters.sh file support easy and consistant parameter parsing
+across the sample scripts. Usage example is printed on errors::
+
+ Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX
+ -i : ($DEV) output interface/device (required)
+ -s : ($PKT_SIZE) packet size
+ -d : ($DEST_IP) destination IP
+ -m : ($DST_MAC) destination MAC-addr
+ -t : ($THREADS) threads to start
+ -c : ($SKB_CLONE) SKB clones send before alloc new SKB
+ -b : ($BURST) HW level bursting of SKBs
+ -v : ($VERBOSE) verbose
+ -x : ($DEBUG) debug
+
+The global variable being set is also listed. E.g. the required
+interface/device parameter "-i" sets variable $DEV.
+
+Common functions
+----------------
+The functions.sh file provides; Three different shell functions for
+configuring the different components of pktgen: pg_ctrl(), pg_thread()
+and pg_set().
+
+These functions correspond to pktgens different components.
+ * pg_ctrl() control "pgctrl" (/proc/net/pktgen/pgctrl)
+ * pg_thread() control the kernel threads and binding to devices
+ * pg_set() control setup of individual devices
+
+See sample scripts for usage examples.
diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh
new file mode 100644
index 000000000000..205e4cde4601
--- /dev/null
+++ b/samples/pktgen/functions.sh
@@ -0,0 +1,121 @@
+#
+# Common functions used by pktgen scripts
+# - Depending on bash 3 (or higher) syntax
+#
+# Author: Jesper Dangaaard Brouer
+# License: GPL
+
+## -- General shell logging cmds --
+function err() {
+ local exitcode=$1
+ shift
+ echo "ERROR: $@" >&2
+ exit $exitcode
+}
+
+function warn() {
+ echo "WARN : $@" >&2
+}
+
+function info() {
+ if [[ -n "$VERBOSE" ]]; then
+ echo "INFO : $@" >&2
+ fi
+}
+
+## -- Pktgen proc config commands -- ##
+export PROC_DIR=/proc/net/pktgen
+#
+# Three different shell functions for configuring the different
+# components of pktgen:
+# pg_ctrl(), pg_thread() and pg_set().
+#
+# These functions correspond to pktgens different components.
+# * pg_ctrl() control "pgctrl" (/proc/net/pktgen/pgctrl)
+# * pg_thread() control the kernel threads and binding to devices
+# * pg_set() control setup of individual devices
+function pg_ctrl() {
+ local proc_file="pgctrl"
+ proc_cmd ${proc_file} "$@"
+}
+
+function pg_thread() {
+ local thread=$1
+ local proc_file="kpktgend_${thread}"
+ shift
+ proc_cmd ${proc_file} "$@"
+}
+
+function pg_set() {
+ local dev=$1
+ local proc_file="$dev"
+ shift
+ proc_cmd ${proc_file} "$@"
+}
+
+# More generic replacement for pgset(), that does not depend on global
+# variable for proc file.
+function proc_cmd() {
+ local result
+ local proc_file=$1
+ # after shift, the remaining args are contained in $@
+ shift
+ local proc_ctrl=${PROC_DIR}/$proc_file
+ if [[ ! -e "$proc_ctrl" ]]; then
+ err 3 "proc file:$proc_ctrl does not exists (dev added to thread?)"
+ else
+ if [[ ! -w "$proc_ctrl" ]]; then
+ err 4 "proc file:$proc_ctrl not writable, not root?!"
+ fi
+ fi
+
+ if [[ "$DEBUG" == "yes" ]]; then
+ echo "cmd: $@ > $proc_ctrl"
+ fi
+ # Quoting of "$@" is important for space expansion
+ echo "$@" > "$proc_ctrl"
+ local status=$?
+
+ result=$(grep "Result: OK:" $proc_ctrl)
+ # Due to pgctrl, cannot use exit code $? from grep
+ if [[ "$result" == "" ]]; then
+ grep "Result:" $proc_ctrl >&2
+ fi
+ if (( $status != 0 )); then
+ err 5 "Write error($status) occurred cmd: \"$@ > $proc_ctrl\""
+ fi
+}
+
+# Old obsolete "pgset" function, with slightly improved err handling
+function pgset() {
+ local result
+
+ if [[ "$DEBUG" == "yes" ]]; then
+ echo "cmd: $1 > $PGDEV"
+ fi
+ echo $1 > $PGDEV
+ local status=$?
+
+ result=`cat $PGDEV | fgrep "Result: OK:"`
+ if [[ "$result" == "" ]]; then
+ cat $PGDEV | fgrep Result:
+ fi
+ if (( $status != 0 )); then
+ err 5 "Write error($status) occurred cmd: \"$1 > $PGDEV\""
+ fi
+}
+
+## -- General shell tricks --
+
+function root_check_run_with_sudo() {
+ # Trick so, program can be run as normal user, will just use "sudo"
+ # call as root_check_run_as_sudo "$@"
+ if [ "$EUID" -ne 0 ]; then
+ if [ -x $0 ]; then # Directly executable use sudo
+ info "Not root, running with sudo"
+ sudo "$0" "$@"
+ exit $?
+ fi
+ err 4 "cannot perform sudo run of $0"
+ fi
+}
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
new file mode 100644
index 000000000000..33b70fdd5a4a
--- /dev/null
+++ b/samples/pktgen/parameters.sh
@@ -0,0 +1,97 @@
+#
+# Common parameter parsing for pktgen scripts
+#
+
+function usage() {
+ echo ""
+ echo "Usage: $0 [-vx] -i ethX"
+ echo " -i : (\$DEV) output interface/device (required)"
+ echo " -s : (\$PKT_SIZE) packet size"
+ echo " -d : (\$DEST_IP) destination IP"
+ echo " -m : (\$DST_MAC) destination MAC-addr"
+ echo " -t : (\$THREADS) threads to start"
+ echo " -c : (\$SKB_CLONE) SKB clones send before alloc new SKB"
+ echo " -b : (\$BURST) HW level bursting of SKBs"
+ echo " -v : (\$VERBOSE) verbose"
+ echo " -x : (\$DEBUG) debug"
+ echo ""
+}
+
+## --- Parse command line arguments / parameters ---
+## echo "Commandline options:"
+while getopts "s:i:d:m:t:c:b:vxh" option; do
+ case $option in
+ i) # interface
+ export DEV=$OPTARG
+ info "Output device set to: DEV=$DEV"
+ ;;
+ s)
+ export PKT_SIZE=$OPTARG
+ info "Packet size set to: PKT_SIZE=$PKT_SIZE bytes"
+ ;;
+ d) # destination IP
+ export DEST_IP=$OPTARG
+ info "Destination IP set to: DEST_IP=$DEST_IP"
+ ;;
+ m) # MAC
+ export DST_MAC=$OPTARG
+ info "Destination MAC set to: DST_MAC=$DST_MAC"
+ ;;
+ t)
+ export THREADS=$OPTARG
+ export CPU_THREADS=$OPTARG
+ let "CPU_THREADS -= 1"
+ info "Number of threads to start: $THREADS (0 to $CPU_THREADS)"
+ ;;
+ c)
+ export CLONE_SKB=$OPTARG
+ info "CLONE_SKB=$CLONE_SKB"
+ ;;
+ b)
+ export BURST=$OPTARG
+ info "SKB bursting: BURST=$BURST"
+ ;;
+ v)
+ export VERBOSE=yes
+ info "Verbose mode: VERBOSE=$VERBOSE"
+ ;;
+ x)
+ export DEBUG=yes
+ info "Debug mode: DEBUG=$DEBUG"
+ ;;
+ h|?|*)
+ usage;
+ err 2 "[ERROR] Unknown parameters!!!"
+ esac
+done
+shift $(( $OPTIND - 1 ))
+
+if [ -z "$PKT_SIZE" ]; then
+ # NIC adds 4 bytes CRC
+ export PKT_SIZE=60
+ info "Default packet size set to: set to: $PKT_SIZE bytes"
+fi
+
+if [ -z "$THREADS" ]; then
+ # Zero CPU threads means one thread, because CPU numbers are zero indexed
+ export CPU_THREADS=0
+ export THREADS=1
+fi
+
+if [ -z "$DEV" ]; then
+ usage
+ err 2 "Please specify output device"
+fi
+
+if [ -z "$DST_MAC" ]; then
+ warn "Missing destination MAC address"
+fi
+
+if [ -z "$DEST_IP" ]; then
+ warn "Missing destination IP address"
+fi
+
+if [ ! -d /proc/net/pktgen ]; then
+ info "Loading kernel module: pktgen"
+ modprobe pktgen
+fi
diff --git a/samples/pktgen/pktgen.conf-1-1 b/samples/pktgen/pktgen.conf-1-1
deleted file mode 100755
index f91daad9e916..000000000000
--- a/samples/pktgen/pktgen.conf-1-1
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
- local result
-
- echo $1 > $PGDEV
-
- result=`cat $PGDEV | fgrep "Result: OK:"`
- if [ "$result" = "" ]; then
- cat $PGDEV | fgrep Result:
- fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. One CPU example. We add eth1.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
- echo "Removing all devices"
- pgset "rem_device_all"
- echo "Adding eth1"
- pgset "add_device eth1"
-
-
-# device config
-# delay 0 means maximum speed.
-
-CLONE_SKB="clone_skb 1000000"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
- echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- pgset "dst 10.10.11.2"
- pgset "dst_mac 00:04:23:08:91:dc"
-
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-2-1 b/samples/pktgen/pktgen.conf-2-1
deleted file mode 100755
index e108e97d6d89..000000000000
--- a/samples/pktgen/pktgen.conf-2-1
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
- local result
-
- echo $1 > $PGDEV
-
- result=`cat $PGDEV | fgrep "Result: OK:"`
- if [ "$result" = "" ]; then
- cat $PGDEV | fgrep Result:
- fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. Two CPU example. We add eth1 to the first
-# and leave the second idle.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
- echo "Removing all devices"
- pgset "rem_device_all"
- echo "Adding eth1"
- pgset "add_device eth1"
-
-# We need to remove old config since we dont use this thread. We can only
-# one NIC on one CPU due to affinity reasons.
-
-PGDEV=/proc/net/pktgen/kpktgend_1
- echo "Removing all devices"
- pgset "rem_device_all"
-
-# device config
-# delay 0 means maximum speed.
-
-CLONE_SKB="clone_skb 1000000"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
- echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- pgset "dst 10.10.11.2"
- pgset "dst_mac 00:04:23:08:91:dc"
-
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1
diff --git a/samples/pktgen/pktgen.conf-2-2 b/samples/pktgen/pktgen.conf-2-2
deleted file mode 100755
index acea15503e71..000000000000
--- a/samples/pktgen/pktgen.conf-2-2
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-
-#modprobe pktgen
-
-
-function pgset() {
- local result
-
- echo $1 > $PGDEV
-
- result=`cat $PGDEV | fgrep "Result: OK:"`
- if [ "$result" = "" ]; then
- cat $PGDEV | fgrep Result:
- fi
-}
-
-# Config Start Here -----------------------------------------------------------
-
-
-# thread config
-# Each CPU has its own thread. Two CPU example. We add eth1, eth2 respectively.
-
-PGDEV=/proc/net/pktgen/kpktgend_0
- echo "Removing all devices"
- pgset "rem_device_all"
- echo "Adding eth1"
- pgset "add_device eth1"
-
-PGDEV=/proc/net/pktgen/kpktgend_1
- echo "Removing all devices"
- pgset "rem_device_all"
- echo "Adding eth2"
- pgset "add_device eth2"
-
-
-# device config
-# delay 0 means maximum speed.
-
-CLONE_SKB="clone_skb 1000000"
-# NIC adds 4 bytes CRC
-PKT_SIZE="pkt_size 60"
-
-# COUNT 0 means forever
-#COUNT="count 0"
-COUNT="count 10000000"
-DELAY="delay 0"
-
-PGDEV=/proc/net/pktgen/eth1
- echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- pgset "dst 10.10.11.2"
- pgset "dst_mac 00:04:23:08:91:dc"
-
-PGDEV=/proc/net/pktgen/eth2
- echo "Configuring $PGDEV"
- pgset "$COUNT"
- pgset "$CLONE_SKB"
- pgset "$PKT_SIZE"
- pgset "$DELAY"
- pgset "dst 192.168.2.2"
- pgset "dst_mac 00:04:23:08:91:de"
-
-# Time to run
-PGDEV=/proc/net/pktgen/pgctrl
-
- echo "Running... ctrl^C to stop"
- trap true INT
- pgset "start"
- echo "Done"
- cat /proc/net/pktgen/eth1 /proc/net/pktgen/eth2
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
new file mode 100755
index 000000000000..cb1590331b47
--- /dev/null
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+#
+# Benchmark script:
+# - developed for benchmarking ingress qdisc path
+#
+# Script for injecting packets into RX path of the stack with pktgen
+# "xmit_mode netif_receive". With an invalid dst_mac this will only
+# measure the ingress code path as packets gets dropped in ip_rcv().
+#
+# This script don't really need any hardware. It benchmarks software
+# RX path just after NIC driver level. With bursting is also
+# "removes" the SKB alloc/free overhead.
+#
+# Setup scenarios for measuring ingress qdisc (with invalid dst_mac):
+# ------------------------------------------------------------------
+# (1) no ingress (uses static_key_false(&ingress_needed))
+#
+# (2) ingress on other dev (change ingress_needed and calls
+# handle_ing() but exit early)
+#
+# config: tc qdisc add dev $SOMEDEV handle ffff: ingress
+#
+# (3) ingress on this dev, handle_ing() -> tc_classify()
+#
+# config: tc qdisc add dev $DEV handle ffff: ingress
+#
+# (4) ingress on this dev + drop at u32 classifier/action.
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+# Using invalid DST_MAC will cause the packets to get dropped in
+# ip_rcv() which is part of the test
+[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+[ -z "$BURST" ] && BURST=1024
+
+# Base Config
+DELAY="0" # Zero means max speed
+COUNT="10000000" # Zero means indefinitely
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+ # The device name is extended with @name, using thread number to
+ # make then unique, but any name will do.
+ dev=${DEV}@${thread}
+
+ # Add remove all other devices and add_device $dev to thread
+ pg_thread $thread "rem_device_all"
+ pg_thread $thread "add_device" $dev
+
+ # Base config of dev
+ pg_set $dev "flag QUEUE_MAP_CPU"
+ pg_set $dev "count $COUNT"
+ pg_set $dev "pkt_size $PKT_SIZE"
+ pg_set $dev "delay $DELAY"
+ pg_set $dev "flag NO_TIMESTAMP"
+
+ # Destination
+ pg_set $dev "dst_mac $DST_MAC"
+ pg_set $dev "dst $DEST_IP"
+
+ # Inject packet into RX path of stack
+ pg_set $dev "xmit_mode netif_receive"
+
+ # Burst allow us to avoid measuring SKB alloc/free overhead
+ pg_set $dev "burst $BURST"
+done
+
+# start_run
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+echo "Done" >&2
+
+# Print results
+for ((thread = 0; thread < $THREADS; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+done
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
new file mode 100755
index 000000000000..8c9d318c221b
--- /dev/null
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+#
+# Simple example:
+# * pktgen sending with single thread and single interface
+# * flow variation via random UDP source port
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+# - go look in parameters.sh to see which setting are avail
+# - required param is the interface "-i" stored in $DEV
+source ${basedir}/parameters.sh
+#
+# Set some default params, if they didn't get set
+[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
+# Example enforce param "-m" for dst_mac
+[ -z "$DST_MAC" ] && usage && err 2 "Must specify -m dst_mac"
+
+# Base Config
+DELAY="0" # Zero means max speed
+COUNT="100000" # Zero means indefinitely
+
+# Flow variation random source port between min and max
+UDP_MIN=9
+UDP_MAX=109
+
+# General cleanup everything since last run
+# (especially important if other threads were configured by other scripts)
+pg_ctrl "reset"
+
+# Add remove all other devices and add_device $DEV to thread 0
+thread=0
+pg_thread $thread "rem_device_all"
+pg_thread $thread "add_device" $DEV
+
+# How many packets to send (zero means indefinitely)
+pg_set $DEV "count $COUNT"
+
+# Reduce alloc cost by sending same SKB many times
+# - this obviously affects the randomness within the packet
+pg_set $DEV "clone_skb $CLONE_SKB"
+
+# Set packet size
+pg_set $DEV "pkt_size $PKT_SIZE"
+
+# Delay between packets (zero means max speed)
+pg_set $DEV "delay $DELAY"
+
+# Flag example disabling timestamping
+pg_set $DEV "flag NO_TIMESTAMP"
+
+# Destination
+pg_set $DEV "dst_mac $DST_MAC"
+pg_set $DEV "dst $DEST_IP"
+
+# Setup random UDP port src range
+pg_set $DEV "flag UDPSRC_RND"
+pg_set $DEV "udp_src_min $UDP_MIN"
+pg_set $DEV "udp_src_max $UDP_MAX"
+
+# start_run
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+echo "Done" >&2
+
+# Print results
+echo "Result device: $DEV"
+cat /proc/net/pktgen/$DEV
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
new file mode 100755
index 000000000000..32467aea8e47
--- /dev/null
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+#
+# Multiqueue: Using pktgen threads for sending on multiple CPUs
+# * adding devices to kernel threads
+# * notice the naming scheme for keeping device names unique
+# * nameing scheme: dev@thread_number
+# * flow variation via random UDP source port
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+#
+# Required param: -i dev in $DEV
+source ${basedir}/parameters.sh
+
+# Base Config
+DELAY="0" # Zero means max speed
+COUNT="100000" # Zero means indefinitely
+[ -z "$CLONE_SKB" ] && CLONE_SKB="0"
+
+# Flow variation random source port between min and max
+UDP_MIN=9
+UDP_MAX=109
+
+# (example of setting default params in your script)
+[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+ # The device name is extended with @name, using thread number to
+ # make then unique, but any name will do.
+ dev=${DEV}@${thread}
+
+ # Add remove all other devices and add_device $dev to thread
+ pg_thread $thread "rem_device_all"
+ pg_thread $thread "add_device" $dev
+
+ # Notice config queue to map to cpu (mirrors smp_processor_id())
+ # It is beneficial to map IRQ /proc/irq/*/smp_affinity 1:1 to CPU number
+ pg_set $dev "flag QUEUE_MAP_CPU"
+
+ # Base config of dev
+ pg_set $dev "count $COUNT"
+ pg_set $dev "clone_skb $CLONE_SKB"
+ pg_set $dev "pkt_size $PKT_SIZE"
+ pg_set $dev "delay $DELAY"
+
+ # Flag example disabling timestamping
+ pg_set $dev "flag NO_TIMESTAMP"
+
+ # Destination
+ pg_set $dev "dst_mac $DST_MAC"
+ pg_set $dev "dst $DEST_IP"
+
+ # Setup random UDP port src range
+ pg_set $dev "flag UDPSRC_RND"
+ pg_set $dev "udp_src_min $UDP_MIN"
+ pg_set $dev "udp_src_max $UDP_MAX"
+done
+
+# start_run
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"
+echo "Done" >&2
+
+# Print results
+for ((thread = 0; thread < $THREADS; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+done
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
new file mode 100755
index 000000000000..775f5d0a1e53
--- /dev/null
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+#
+# Script for max single flow performance
+# - If correctly tuned[1], single CPU 10G wirespeed small pkts is possible[2]
+#
+# Using pktgen "burst" option (use -b $N)
+# - To boost max performance
+# - Avail since: kernel v3.18
+# * commit 38b2cf2982dc73 ("net: pktgen: packet bursting via skb->xmit_more")
+# - This avoids writing the HW tailptr on every driver xmit
+# - The performance boost is impressive, see commit and blog [2]
+#
+# Notice: On purpose generates a single (UDP) flow towards target,
+# reason behind this is to only overload/activate a single CPU on
+# target host. And no randomness for pktgen also makes it faster.
+#
+# Tuning see:
+# [1] http://netoptimizer.blogspot.dk/2014/06/pktgen-for-network-overload-testing.html
+# [2] http://netoptimizer.blogspot.dk/2014/10/unlocked-10gbps-tx-wirespeed-smallest.html
+#
+basedir=`dirname $0`
+source ${basedir}/functions.sh
+root_check_run_with_sudo "$@"
+
+# Parameter parsing via include
+source ${basedir}/parameters.sh
+# Set some default params, if they didn't get set
+[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42"
+[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff"
+[ -z "$BURST" ] && BURST=32
+[ -z "$CLONE_SKB" ] && CLONE_SKB="100000"
+
+# Base Config
+DELAY="0" # Zero means max speed
+COUNT="0" # Zero means indefinitely
+
+# General cleanup everything since last run
+pg_ctrl "reset"
+
+# Threads are specified with parameter -t value in $THREADS
+for ((thread = 0; thread < $THREADS; thread++)); do
+ dev=${DEV}@${thread}
+
+ # Add remove all other devices and add_device $dev to thread
+ pg_thread $thread "rem_device_all"
+ pg_thread $thread "add_device" $dev
+
+ # Base config
+ pg_set $dev "flag QUEUE_MAP_CPU"
+ pg_set $dev "count $COUNT"
+ pg_set $dev "clone_skb $CLONE_SKB"
+ pg_set $dev "pkt_size $PKT_SIZE"
+ pg_set $dev "delay $DELAY"
+ pg_set $dev "flag NO_TIMESTAMP"
+
+ # Destination
+ pg_set $dev "dst_mac $DST_MAC"
+ pg_set $dev "dst $DEST_IP"
+
+ # Setup burst, for easy testing -b 0 disable bursting
+ # (internally in pktgen default and minimum burst=1)
+ if [[ ${BURST} -ne 0 ]]; then
+ pg_set $dev "burst $BURST"
+ else
+ info "$dev: Not using burst"
+ fi
+done
+
+# Run if user hits control-c
+function control_c() {
+ # Print results
+ for ((thread = 0; thread < $THREADS; thread++)); do
+ dev=${DEV}@${thread}
+ echo "Device: $dev"
+ cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+ done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap control_c SIGINT
+
+echo "Running... ctrl^C to stop" >&2
+pg_ctrl "start"