diff options
| author | David S. Miller <davem@davemloft.net> | 2018-03-21 12:08:01 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2018-03-21 12:08:01 -0400 |
| commit | 454bfe97837a3e3a5a15b768f8293f228e0f2f06 (patch) | |
| tree | 7ec9cbb8532f58e54be0d74e425edc76f74aaf51 /samples | |
| parent | 0466080c751ec2de9efae3ac6305225cc4326047 (diff) | |
| parent | 78262f4575c29f185947fe58952cd1beabc74f82 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2018-03-21
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Add a BPF hook for sendmsg and sendfile by reusing the ULP infrastructure
and sockmap. Three helpers are added along with this, bpf_msg_apply_bytes(),
bpf_msg_cork_bytes(), and bpf_msg_pull_data(). The first is used to tell
for how many bytes the verdict should be applied to, the second to tell
that x bytes need to be queued first to retrigger the BPF program for a
verdict, and the third helper is mainly for the sendfile case to pull in
data for making it private for reading and/or writing, from John.
2) Improve address to symbol resolution of user stack traces in BPF stackmap.
Currently, the latter stores the address for each entry in the call trace,
however to map these addresses to user space files, it is necessary to
maintain the mapping from these virtual addresses to symbols in the binary
which is not practical for system-wide profiling. Instead, this option for
the stackmap rather stores the ELF build id and offset for the call trace
entries, from Song.
3) Add support that allows BPF programs attached to perf events to read the
address values recorded with the perf events. They are requested through
PERF_SAMPLE_ADDR via perf_event_open(). Main motivation behind it is to
support building memory or lock access profiling and tracing tools with
the help of BPF, from Teng.
4) Several improvements to the tools/bpf/ Makefiles. The 'make bpf' in the
tools directory does not provide the standard quiet output except for
bpftool and it also does not respect specifying a build output directory.
'make bpf_install' command neither respects specified destination nor
prefix, all from Jiri. In addition, Jakub fixes several other minor issues
in the Makefiles on top of that, e.g. fixing dependency paths, phony
targets and more.
5) Various doc updates e.g. add a comment for BPF fs about reserved names
to make the dentry lookup from there a bit more obvious, and a comment
to the bpf_devel_QA file in order to explain the diff between native
and bpf target clang usage with regards to pointer size, from Quentin
and Daniel.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
| -rw-r--r-- | samples/bpf/bpf_load.c | 8 | ||||
| -rw-r--r-- | samples/bpf/trace_event_kern.c | 4 | ||||
| -rw-r--r-- | samples/bpf/trace_event_user.c | 15 | ||||
| -rw-r--r-- | samples/sockmap/sockmap_kern.c | 197 | ||||
| -rwxr-xr-x | samples/sockmap/sockmap_test.sh | 450 | ||||
| -rw-r--r-- | samples/sockmap/sockmap_user.c | 301 |
6 files changed, 949 insertions, 26 deletions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 69806d74fa53..b1a310c3ae89 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -67,6 +67,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; bool is_sockops = strncmp(event, "sockops", 7) == 0; bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0; + bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0; size_t insns_cnt = size / sizeof(struct bpf_insn); enum bpf_prog_type prog_type; char buf[256]; @@ -96,6 +97,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_SOCK_OPS; } else if (is_sk_skb) { prog_type = BPF_PROG_TYPE_SK_SKB; + } else if (is_sk_msg) { + prog_type = BPF_PROG_TYPE_SK_MSG; } else { printf("Unknown event '%s'\n", event); return -1; @@ -113,7 +116,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) return 0; - if (is_socket || is_sockops || is_sk_skb) { + if (is_socket || is_sockops || is_sk_skb || is_sk_msg) { if (is_socket) event += 6; else @@ -589,7 +592,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) memcmp(shname, "socket", 6) == 0 || memcmp(shname, "cgroup/", 7) == 0 || memcmp(shname, "sockops", 7) == 0 || - memcmp(shname, "sk_skb", 6) == 0) { + memcmp(shname, "sk_skb", 6) == 0 || + memcmp(shname, "sk_msg", 6) == 0) { ret = load_and_attach(shname, data->d_buf, data->d_size); if (ret != 0) diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c index a77a583d94d4..7068fbdde951 100644 --- a/samples/bpf/trace_event_kern.c +++ b/samples/bpf/trace_event_kern.c @@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx) { char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu"; char time_fmt2[] = "Get Time Failed, ErrCode: %d"; + char addr_fmt[] = "Address recorded on event: %llx"; char fmt[] = "CPU-%d period %lld ip %llx"; u32 cpu = bpf_get_smp_processor_id(); struct bpf_perf_event_value value_buf; @@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx) else bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret); + if (ctx->addr != 0) + bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr); + val = bpf_map_lookup_elem(&counts, &key); if (val) (*val)++; diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index bf4f1b6d9a52..56f7a259a7c9 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -215,6 +215,17 @@ static void test_bpf_perf_event(void) /* Intel Instruction Retired */ .config = 0xc0, }; + struct perf_event_attr attr_type_raw_lock_load = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_RAW, + /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */ + .config = 0x21d0, + /* Request to record lock address from PEBS */ + .sample_type = PERF_SAMPLE_ADDR, + /* Record address value requires precise event */ + .precise_ip = 2, + }; printf("Test HW_CPU_CYCLES\n"); test_perf_event_all_cpu(&attr_type_hw); @@ -236,6 +247,10 @@ static void test_bpf_perf_event(void) test_perf_event_all_cpu(&attr_type_raw); test_perf_event_task(&attr_type_raw); + printf("Test Lock Load\n"); + test_perf_event_all_cpu(&attr_type_raw_lock_load); + test_perf_event_task(&attr_type_raw_lock_load); + printf("*** PASS ***\n"); } diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c index 52b0053274f4..9ad5ba79c85a 100644 --- a/samples/sockmap/sockmap_kern.c +++ b/samples/sockmap/sockmap_kern.c @@ -43,6 +43,42 @@ struct bpf_map_def SEC("maps") sock_map = { .max_entries = 20, }; +struct bpf_map_def SEC("maps") sock_map_txmsg = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +struct bpf_map_def SEC("maps") sock_map_redir = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") sock_apply_bytes = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1 +}; + +struct bpf_map_def SEC("maps") sock_cork_bytes = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1 +}; + +struct bpf_map_def SEC("maps") sock_pull_bytes = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2 +}; + + SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { @@ -105,4 +141,165 @@ int bpf_sockmap(struct bpf_sock_ops *skops) return 0; } + +SEC("sk_msg1") +int bpf_prog4(struct sk_msg_md *msg) +{ + int *bytes, zero = 0, one = 1; + int *start, *end; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + bpf_msg_cork_bytes(msg, *bytes); + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) + bpf_msg_pull_data(msg, *start, *end, 0); + return SK_PASS; +} + +SEC("sk_msg2") +int bpf_prog5(struct sk_msg_md *msg) +{ + int err1 = -1, err2 = -1, zero = 0, one = 1; + int *bytes, *start, *end, len1, len2; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + err1 = bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + err2 = bpf_msg_cork_bytes(msg, *bytes); + len1 = (__u64)msg->data_end - (__u64)msg->data; + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) { + int err; + + bpf_printk("sk_msg2: pull(%i:%i)\n", + start ? *start : 0, end ? *end : 0); + err = bpf_msg_pull_data(msg, *start, *end, 0); + if (err) + bpf_printk("sk_msg2: pull_data err %i\n", + err); + len2 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length update %i->%i\n", + len1, len2); + } + bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", + len1, err1, err2); + return SK_PASS; +} + +SEC("sk_msg3") +int bpf_prog6(struct sk_msg_md *msg) +{ + int *bytes, zero = 0, one = 1; + int *start, *end; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + bpf_msg_cork_bytes(msg, *bytes); + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) + bpf_msg_pull_data(msg, *start, *end, 0); + return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0); +} + +SEC("sk_msg4") +int bpf_prog7(struct sk_msg_md *msg) +{ + int err1 = 0, err2 = 0, zero = 0, one = 1; + int *bytes, *start, *end, len1, len2; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + err1 = bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + err2 = bpf_msg_cork_bytes(msg, *bytes); + len1 = (__u64)msg->data_end - (__u64)msg->data; + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) { + int err; + + bpf_printk("sk_msg2: pull(%i:%i)\n", + start ? *start : 0, end ? *end : 0); + err = bpf_msg_pull_data(msg, *start, *end, 0); + if (err) + bpf_printk("sk_msg2: pull_data err %i\n", + err); + len2 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length update %i->%i\n", + len1, len2); + } + bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n", + len1, err1, err2); + return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0); +} + +SEC("sk_msg5") +int bpf_prog8(struct sk_msg_md *msg) +{ + void *data_end = (void *)(long) msg->data_end; + void *data = (void *)(long) msg->data; + int ret = 0, *bytes, zero = 0; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) { + ret = bpf_msg_apply_bytes(msg, *bytes); + if (ret) + return SK_DROP; + } else { + return SK_DROP; + } + return SK_PASS; +} +SEC("sk_msg6") +int bpf_prog9(struct sk_msg_md *msg) +{ + void *data_end = (void *)(long) msg->data_end; + void *data = (void *)(long) msg->data; + int ret = 0, *bytes, zero = 0; + + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) { + if (((__u64)data_end - (__u64)data) >= *bytes) + return SK_PASS; + ret = bpf_msg_cork_bytes(msg, *bytes); + if (ret) + return SK_DROP; + } + return SK_PASS; +} + +SEC("sk_msg7") +int bpf_prog10(struct sk_msg_md *msg) +{ + int *bytes, zero = 0, one = 1; + int *start, *end; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) + bpf_msg_apply_bytes(msg, *bytes); + bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); + if (bytes) + bpf_msg_cork_bytes(msg, *bytes); + start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); + end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + if (start && end) + bpf_msg_pull_data(msg, *start, *end, 0); + + return SK_DROP; +} + + char _license[] SEC("license") = "GPL"; diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh new file mode 100755 index 000000000000..6d8cc40cca22 --- /dev/null +++ b/samples/sockmap/sockmap_test.sh @@ -0,0 +1,450 @@ +#Test a bunch of positive cases to verify basic functionality +for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do +for t in "sendmsg" "sendpage"; do +for r in 1 10 100; do + for i in 1 10 100; do + for l in 1 10 100; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 + done + done +done +done +done + +#Test max iov +t="sendmsg" +r=1 +i=1024 +l=1 +prog="--txmsg" + +TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" +echo $TEST +$TEST +sleep 2 +prog="--txmsg_redir" +TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" +echo $TEST +$TEST + +# Test max iov with 1k send + +t="sendmsg" +r=1 +i=1024 +l=1024 +prog="--txmsg" + +TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" +echo $TEST +$TEST +sleep 2 +prog="--txmsg_redir" +TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" +echo $TEST +$TEST +sleep 2 + +# Test apply with 1B +r=1 +i=1024 +l=1024 +prog="--txmsg_apply 1" + +for t in "sendmsg" "sendpage"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test apply with larger value than send +r=1 +i=8 +l=1024 +prog="--txmsg_apply 2048" + +for t in "sendmsg" "sendpage"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test apply with apply that never reaches limit +r=1024 +i=1 +l=1 +prog="--txmsg_apply 2048" + +for t in "sendmsg" "sendpage"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test apply and redirect with 1B +r=1 +i=1024 +l=1024 +prog="--txmsg_redir --txmsg_apply 1" + +for t in "sendmsg" "sendpage"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test apply and redirect with larger value than send +r=1 +i=8 +l=1024 +prog="--txmsg_redir --txmsg_apply 2048" + +for t in "sendmsg" "sendpage"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test apply and redirect with apply that never reaches limit +r=1024 +i=1 +l=1 +prog="--txmsg_apply 2048" + +for t in "sendmsg" "sendpage"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test cork with 1B not really useful but test it anyways +r=1 +i=1024 +l=1024 +prog="--txmsg_cork 1" + +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test cork with a more reasonable 100B +r=1 +i=1000 +l=1000 +prog="--txmsg_cork 100" + +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test cork with larger value than send +r=1 +i=8 +l=1024 +prog="--txmsg_cork 2048" + +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test cork with cork that never reaches limit +r=1024 +i=1 +l=1 +prog="--txmsg_cork 2048" + +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +r=1 +i=1024 +l=1024 +prog="--txmsg_redir --txmsg_cork 1" + +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test cork with a more reasonable 100B +r=1 +i=1000 +l=1000 +prog="--txmsg_redir --txmsg_cork 100" + +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test cork with larger value than send +r=1 +i=8 +l=1024 +prog="--txmsg_redir --txmsg_cork 2048" + +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test cork with cork that never reaches limit +r=1024 +i=1 +l=1 +prog="--txmsg_cork 2048" + +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + + +# mix and match cork and apply not really useful but valid programs + +# Test apply < cork +r=100 +i=1 +l=5 +prog="--txmsg_apply 10 --txmsg_cork 100" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Try again with larger sizes so we hit overflow case +r=100 +i=1000 +l=2048 +prog="--txmsg_apply 4096 --txmsg_cork 8096" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test apply > cork +r=100 +i=1 +l=5 +prog="--txmsg_apply 100 --txmsg_cork 10" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Again with larger sizes so we hit overflow cases +r=100 +i=1000 +l=2048 +prog="--txmsg_apply 8096 --txmsg_cork 4096" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + + +# Test apply = cork +r=100 +i=1 +l=5 +prog="--txmsg_apply 10 --txmsg_cork 10" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +r=100 +i=1000 +l=2048 +prog="--txmsg_apply 4096 --txmsg_cork 4096" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test apply < cork +r=100 +i=1 +l=5 +prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Try again with larger sizes so we hit overflow case +r=100 +i=1000 +l=2048 +prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Test apply > cork +r=100 +i=1 +l=5 +prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Again with larger sizes so we hit overflow cases +r=100 +i=1000 +l=2048 +prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + + +# Test apply = cork +r=100 +i=1 +l=5 +prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +r=100 +i=1000 +l=2048 +prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096" +for t in "sendpage" "sendmsg"; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog" + echo $TEST + $TEST + sleep 2 +done + +# Tests for bpf_msg_pull_data() +for i in `seq 99 100 1600`; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ + --txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600" + echo $TEST + $TEST + sleep 2 +done + +for i in `seq 199 100 1600`; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ + --txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600" + echo $TEST + $TEST + sleep 2 +done + +TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ + --txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600" +echo $TEST +$TEST +sleep 2 + +TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ + --txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600" +echo $TEST +$TEST +sleep 2 + +TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ + --txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600" +echo $TEST +$TEST +sleep 2 + +TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ + --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600" +echo $TEST +$TEST +sleep 2 + +TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \ + --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602" +echo $TEST +$TEST +sleep 2 + +# Run through gamut again with start and end +for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do +for t in "sendmsg" "sendpage"; do +for r in 1 10 100; do + for i in 1 10 100; do + for l in 1 10 100; do + TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2" + echo $TEST + $TEST + sleep 2 + done + done +done +done +done + +# Some specific tests to cover specific code paths +./sockmap --cgroup /mnt/cgroup2/ -t sendpage \ + -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3 +./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \ + -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3 +./sockmap --cgroup /mnt/cgroup2/ -t sendpage \ + -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5 +./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \ + -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5 diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c index 95a54a89a532..07aa237221d1 100644 --- a/samples/sockmap/sockmap_user.c +++ b/samples/sockmap/sockmap_user.c @@ -29,6 +29,7 @@ #include <sys/time.h> #include <sys/resource.h> #include <sys/types.h> +#include <sys/sendfile.h> #include <linux/netlink.h> #include <linux/socket.h> @@ -54,6 +55,16 @@ void running_handler(int a); /* global sockets */ int s1, s2, c1, c2, p1, p2; +int txmsg_pass; +int txmsg_noisy; +int txmsg_redir; +int txmsg_redir_noisy; +int txmsg_drop; +int txmsg_apply; +int txmsg_cork; +int txmsg_start; +int txmsg_end; + static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, {"cgroup", required_argument, NULL, 'c' }, @@ -62,6 +73,16 @@ static const struct option long_options[] = { {"iov_count", required_argument, NULL, 'i' }, {"length", required_argument, NULL, 'l' }, {"test", required_argument, NULL, 't' }, + {"data_test", no_argument, NULL, 'd' }, + {"txmsg", no_argument, &txmsg_pass, 1 }, + {"txmsg_noisy", no_argument, &txmsg_noisy, 1 }, + {"txmsg_redir", no_argument, &txmsg_redir, 1 }, + {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1}, + {"txmsg_drop", no_argument, &txmsg_drop, 1 }, + {"txmsg_apply", required_argument, NULL, 'a'}, + {"txmsg_cork", required_argument, NULL, 'k'}, + {"txmsg_start", required_argument, NULL, 's'}, + {"txmsg_end", required_argument, NULL, 'e'}, {0, 0, NULL, 0 } }; @@ -195,19 +216,71 @@ struct msg_stats { struct timespec end; }; +struct sockmap_options { + int verbose; + bool base; + bool sendpage; + bool data_test; + bool drop_expected; +}; + +static int msg_loop_sendpage(int fd, int iov_length, int cnt, + struct msg_stats *s, + struct sockmap_options *opt) +{ + bool drop = opt->drop_expected; + unsigned char k = 0; + FILE *file; + int i, fp; + + file = fopen(".sendpage_tst.tmp", "w+"); + for (i = 0; i < iov_length * cnt; i++, k++) + fwrite(&k, sizeof(char), 1, file); + fflush(file); + fseek(file, 0, SEEK_SET); + fclose(file); + + fp = open(".sendpage_tst.tmp", O_RDONLY); + clock_gettime(CLOCK_MONOTONIC, &s->start); + for (i = 0; i < cnt; i++) { + int sent = sendfile(fd, fp, NULL, iov_length); + + if (!drop && sent < 0) { + perror("send loop error:"); + close(fp); + return sent; + } else if (drop && sent >= 0) { + printf("sendpage loop error expected: %i\n", sent); + close(fp); + return -EIO; + } + + if (sent > 0) + s->bytes_sent += sent; + } + clock_gettime(CLOCK_MONOTONIC, &s->end); + close(fp); + return 0; +} + static int msg_loop(int fd, int iov_count, int iov_length, int cnt, - struct msg_stats *s, bool tx) + struct msg_stats *s, bool tx, + struct sockmap_options *opt) { struct msghdr msg = {0}; int err, i, flags = MSG_NOSIGNAL; struct iovec *iov; + unsigned char k; + bool data_test = opt->data_test; + bool drop = opt->drop_expected; iov = calloc(iov_count, sizeof(struct iovec)); if (!iov) return errno; + k = 0; for (i = 0; i < iov_count; i++) { - char *d = calloc(iov_length, sizeof(char)); + unsigned char *d = calloc(iov_length, sizeof(char)); if (!d) { fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count); @@ -215,21 +288,34 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } iov[i].iov_base = d; iov[i].iov_len = iov_length; + + if (data_test && tx) { + int j; + + for (j = 0; j < iov_length; j++) + d[j] = k++; + } } msg.msg_iov = iov; msg.msg_iovlen = iov_count; + k = 0; if (tx) { clock_gettime(CLOCK_MONOTONIC, &s->start); for (i = 0; i < cnt; i++) { int sent = sendmsg(fd, &msg, flags); - if (sent < 0) { + if (!drop && sent < 0) { perror("send loop error:"); goto out_errno; + } else if (drop && sent >= 0) { + printf("send loop error expected: %i\n", sent); + errno = -EIO; + goto out_errno; } - s->bytes_sent += sent; + if (sent > 0) + s->bytes_sent += sent; } clock_gettime(CLOCK_MONOTONIC, &s->end); } else { @@ -272,6 +358,26 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } s->bytes_recvd += recv; + + if (data_test) { + int j; + + for (i = 0; i < msg.msg_iovlen; i++) { + unsigned char *d = iov[i].iov_base; + + for (j = 0; + j < iov[i].iov_len && recv; j++) { + if (d[j] != k++) { + errno = -EIO; + fprintf(stderr, + "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", + i, j, d[j], k - 1, d[j+1], k + 1); + goto out_errno; + } + recv--; + } + } + } } clock_gettime(CLOCK_MONOTONIC, &s->end); } @@ -300,7 +406,7 @@ static inline float recvdBps(struct msg_stats s) } static int sendmsg_test(int iov_count, int iov_buf, int cnt, - int verbose, bool base) + struct sockmap_options *opt) { float sent_Bps = 0, recvd_Bps = 0; int rx_fd, txpid, rxpid, err = 0; @@ -309,14 +415,20 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt, errno = 0; - if (base) + if (opt->base) rx_fd = p1; else rx_fd = p2; rxpid = fork(); if (rxpid == 0) { - err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false); + if (opt->drop_expected) + exit(1); + + if (opt->sendpage) + iov_count = 1; + err = msg_loop(rx_fd, iov_count, iov_buf, + cnt, &s, false, opt); if (err) fprintf(stderr, "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", @@ -339,7 +451,12 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt, txpid = fork(); if (txpid == 0) { - err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true); + if (opt->sendpage) + err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt); + else + err = msg_loop(c1, iov_count, iov_buf, + cnt, &s, true, opt); + if (err) fprintf(stderr, "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n", @@ -364,7 +481,7 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt, return err; } -static int forever_ping_pong(int rate, int verbose) +static int forever_ping_pong(int rate, struct sockmap_options *opt) { struct timeval timeout; char buf[1024] = {0}; @@ -429,7 +546,7 @@ static int forever_ping_pong(int rate, int verbose) if (rate) sleep(rate); - if (verbose) { + if (opt->verbose) { printf("."); fflush(stdout); @@ -443,20 +560,34 @@ enum { PING_PONG, SENDMSG, BASE, + BASE_SENDPAGE, + SENDPAGE, }; int main(int argc, char **argv) { - int iov_count = 1, length = 1024, rate = 1, verbose = 0; + int iov_count = 1, length = 1024, rate = 1, tx_prog_fd; struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; int opt, longindex, err, cg_fd = 0; + struct sockmap_options options = {0}; int test = PING_PONG; char filename[256]; - while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:", + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:", long_options, &longindex)) != -1) { switch (opt) { - /* Cgroup configuration */ + case 's': + txmsg_start = atoi(optarg); + break; + case 'e': + txmsg_end = atoi(optarg); + break; + case 'a': + txmsg_apply = atoi(optarg); + break; + case 'k': + txmsg_cork = atoi(optarg); + break; case 'c': cg_fd = open(optarg, O_DIRECTORY, O_RDONLY); if (cg_fd < 0) { @@ -470,7 +601,7 @@ int main(int argc, char **argv) rate = atoi(optarg); break; case 'v': - verbose = 1; + options.verbose = 1; break; case 'i': iov_count = atoi(optarg); @@ -478,6 +609,9 @@ int main(int argc, char **argv) case 'l': length = atoi(optarg); break; + case 'd': + options.data_test = true; + break; case 't': if (strcmp(optarg, "ping") == 0) { test = PING_PONG; @@ -485,11 +619,17 @@ int main(int argc, char **argv) test = SENDMSG; } else if (strcmp(optarg, "base") == 0) { test = BASE; + } else if (strcmp(optarg, "base_sendpage") == 0) { + test = BASE_SENDPAGE; + } else if (strcmp(optarg, "sendpage") == 0) { + test = SENDPAGE; } else { usage(argv); return -1; } break; + case 0: + break; case 'h': default: usage(argv); @@ -515,16 +655,16 @@ int main(int argc, char **argv) /* catch SIGINT */ signal(SIGINT, running_handler); - /* If base test skip BPF setup */ - if (test == BASE) - goto run; - if (load_bpf_file(filename)) { fprintf(stderr, "load_bpf_file: (%s) %s\n", filename, strerror(errno)); return 1; } + /* If base test skip BPF setup */ + if (test == BASE || test == BASE_SENDPAGE) + goto run; + /* Attach programs to sockmap */ err = bpf_prog_attach(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER, 0); @@ -557,13 +697,126 @@ run: goto out; } - if (test == PING_PONG) - err = forever_ping_pong(rate, verbose); - else if (test == SENDMSG) - err = sendmsg_test(iov_count, length, rate, verbose, false); - else if (test == BASE) - err = sendmsg_test(iov_count, length, rate, verbose, true); + /* Attach txmsg program to sockmap */ + if (txmsg_pass) + tx_prog_fd = prog_fd[3]; + else if (txmsg_noisy) + tx_prog_fd = prog_fd[4]; + else if (txmsg_redir) + tx_prog_fd = prog_fd[5]; + else if (txmsg_redir_noisy) + tx_prog_fd = prog_fd[6]; + else if (txmsg_drop) + tx_prog_fd = prog_fd[9]; + /* apply and cork must be last */ + else if (txmsg_apply) + tx_prog_fd = prog_fd[7]; + else if (txmsg_cork) + tx_prog_fd = prog_fd[8]; else + tx_prog_fd = 0; + + if (tx_prog_fd) { + int redir_fd, i = 0; + + err = bpf_prog_attach(tx_prog_fd, + map_fd[1], BPF_SK_MSG_VERDICT, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (txmsg): %d (%s)\n", + err, strerror(errno)); + return err; + } + + err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg): %d (%s\n", + err, strerror(errno)); + return err; + } + + if (txmsg_redir || txmsg_redir_noisy) + redir_fd = c2; + else + redir_fd = c1; + + err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg): %d (%s\n", + err, strerror(errno)); + return err; + } + + if (txmsg_apply) { + err = bpf_map_update_elem(map_fd[3], + &i, &txmsg_apply, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n", + err, strerror(errno)); + return err; + } + } + + if (txmsg_cork) { + err = bpf_map_update_elem(map_fd[4], + &i, &txmsg_cork, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n", + err, strerror(errno)); + return err; + } + } + + if (txmsg_start) { + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n", + err, strerror(errno)); + return err; + } + } + + if (txmsg_end) { + i = 1; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_end, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n", + err, strerror(errno)); + return err; + } + } + } + + if (txmsg_drop) + options.drop_expected = true; + + if (test == PING_PONG) + err = forever_ping_pong(rate, &options); + else if (test == SENDMSG) { + options.base = false; + options.sendpage = false; + err = sendmsg_test(iov_count, length, rate, &options); + } else if (test == SENDPAGE) { + options.base = false; + options.sendpage = true; + err = sendmsg_test(iov_count, length, rate, &options); + } else if (test == BASE) { + options.base = true; + options.sendpage = false; + err = sendmsg_test(iov_count, length, rate, &options); + } else if (test == BASE_SENDPAGE) { + options.base = true; + options.sendpage = true; + err = sendmsg_test(iov_count, length, rate, &options); + } else fprintf(stderr, "unknown test\n"); out: bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS); |
