From 8a76145a2ec2a81dfe34d7ac42e8c242f095e8c8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 5 Oct 2022 21:24:51 -0700 Subject: bpf: explicitly define BPF_FUNC_xxx integer values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Historically enum bpf_func_id's BPF_FUNC_xxx enumerators relied on implicit sequential values being assigned by compiler. This is convenient, as new BPF helpers are always added at the very end, but it also has its downsides, some of them being: - with over 200 helpers now it's very hard to know what's each helper's ID, which is often important to know when working with BPF assembly (e.g., by dumping raw bpf assembly instructions with llvm-objdump -d command). it's possible to work around this by looking into vmlinux.h, dumping /sys/btf/kernel/vmlinux, looking at libbpf-provided bpf_helper_defs.h, etc. But it always feels like an unnecessary step and one should be able to quickly figure this out from UAPI header. - when backporting and cherry-picking only some BPF helpers onto older kernels it's important to be able to skip some enum values for helpers that weren't backported, but preserve absolute integer IDs to keep BPF helper IDs stable so that BPF programs stay portable across upstream and backported kernels. While neither problem is insurmountable, they come up frequently enough and are annoying enough to warrant improving the situation. And for the backporting the problem can easily go unnoticed for a while, especially if backport is done with people not very familiar with BPF subsystem overall. Anyways, it's easy to fix this by making sure that __BPF_FUNC_MAPPER macro provides explicit helper IDs. Unfortunately that would potentially break existing users that use UAPI-exposed __BPF_FUNC_MAPPER and are expected to pass macro that accepts only symbolic helper identifier (e.g., map_lookup_elem for bpf_map_lookup_elem() helper). As such, we need to introduce a new macro (___BPF_FUNC_MAPPER) which would specify both identifier and integer ID, but in such a way as to allow existing __BPF_FUNC_MAPPER be expressed in terms of new ___BPF_FUNC_MAPPER macro. And that's what this patch is doing. To avoid duplication and allow __BPF_FUNC_MAPPER stay *exactly* the same, ___BPF_FUNC_MAPPER accepts arbitrary "context" arguments, which can be used to pass any extra macros, arguments, and whatnot. In our case we use this to pass original user-provided macro that expects single argument and __BPF_FUNC_MAPPER is using it's own three-argument __BPF_FUNC_MAPPER_APPLY intermediate macro to impedance-match new and old "callback" macros. Once we resolve this, we use new ___BPF_FUNC_MAPPER to define enum bpf_func_id with explicit values. The other users of __BPF_FUNC_MAPPER in kernel (namely in kernel/bpf/disasm.c) are kept exactly the same both as demonstration that backwards compat works, but also to avoid unnecessary code churn. Note that new ___BPF_FUNC_MAPPER() doesn't forcefully insert comma between values, as that might not be appropriate in all possible cases where ___BPF_FUNC_MAPPER might be used by users. This doesn't reduce usability, as it's trivial to insert that comma inside "callback" macro. To validate all the manually specified IDs are exactly right, we used BTF to compare before and after values: $ bpftool btf dump file ~/linux-build/default/vmlinux | rg bpf_func_id -A 211 > after.txt $ git stash # stach UAPI changes $ make -j90 ... re-building kernel without UAPI changes ... $ bpftool btf dump file ~/linux-build/default/vmlinux | rg bpf_func_id -A 211 > before.txt $ diff -u before.txt after.txt --- before.txt 2022-10-05 10:48:18.119195916 -0700 +++ after.txt 2022-10-05 10:46:49.446615025 -0700 @@ -1,4 +1,4 @@ -[14576] ENUM 'bpf_func_id' encoding=UNSIGNED size=4 vlen=211 +[9560] ENUM 'bpf_func_id' encoding=UNSIGNED size=4 vlen=211 'BPF_FUNC_unspec' val=0 'BPF_FUNC_map_lookup_elem' val=1 'BPF_FUNC_map_update_elem' val=2 As can be seen from diff above, the only thing that changed was resulting BTF type ID of ENUM bpf_func_id, not any of the enumerators, their names or integer values. The only other place that needed fixing was scripts/bpf_doc.py used to generate man pages and bpf_helper_defs.h header for libbpf and selftests. That script is tightly-coupled to exact shape of ___BPF_FUNC_MAPPER macro definition, so had to be trivially adapted. Cc: Quentin Monnet Reported-by: Andrea Terzolo Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Acked-by: Jiri Olsa Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20221006042452.2089843-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 432 ++++++++++++++++++++++++----------------------- 1 file changed, 219 insertions(+), 213 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 51b9aa640ad2..17f61338f8f8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5436,225 +5436,231 @@ union bpf_attr { * larger than the size of the ring buffer, or which cannot fit * within a struct bpf_dynptr. */ -#define __BPF_FUNC_MAPPER(FN) \ - FN(unspec), \ - FN(map_lookup_elem), \ - FN(map_update_elem), \ - FN(map_delete_elem), \ - FN(probe_read), \ - FN(ktime_get_ns), \ - FN(trace_printk), \ - FN(get_prandom_u32), \ - FN(get_smp_processor_id), \ - FN(skb_store_bytes), \ - FN(l3_csum_replace), \ - FN(l4_csum_replace), \ - FN(tail_call), \ - FN(clone_redirect), \ - FN(get_current_pid_tgid), \ - FN(get_current_uid_gid), \ - FN(get_current_comm), \ - FN(get_cgroup_classid), \ - FN(skb_vlan_push), \ - FN(skb_vlan_pop), \ - FN(skb_get_tunnel_key), \ - FN(skb_set_tunnel_key), \ - FN(perf_event_read), \ - FN(redirect), \ - FN(get_route_realm), \ - FN(perf_event_output), \ - FN(skb_load_bytes), \ - FN(get_stackid), \ - FN(csum_diff), \ - FN(skb_get_tunnel_opt), \ - FN(skb_set_tunnel_opt), \ - FN(skb_change_proto), \ - FN(skb_change_type), \ - FN(skb_under_cgroup), \ - FN(get_hash_recalc), \ - FN(get_current_task), \ - FN(probe_write_user), \ - FN(current_task_under_cgroup), \ - FN(skb_change_tail), \ - FN(skb_pull_data), \ - FN(csum_update), \ - FN(set_hash_invalid), \ - FN(get_numa_node_id), \ - FN(skb_change_head), \ - FN(xdp_adjust_head), \ - FN(probe_read_str), \ - FN(get_socket_cookie), \ - FN(get_socket_uid), \ - FN(set_hash), \ - FN(setsockopt), \ - FN(skb_adjust_room), \ - FN(redirect_map), \ - FN(sk_redirect_map), \ - FN(sock_map_update), \ - FN(xdp_adjust_meta), \ - FN(perf_event_read_value), \ - FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), \ - FN(sock_ops_cb_flags_set), \ - FN(msg_redirect_map), \ - FN(msg_apply_bytes), \ - FN(msg_cork_bytes), \ - FN(msg_pull_data), \ - FN(bind), \ - FN(xdp_adjust_tail), \ - FN(skb_get_xfrm_state), \ - FN(get_stack), \ - FN(skb_load_bytes_relative), \ - FN(fib_lookup), \ - FN(sock_hash_update), \ - FN(msg_redirect_hash), \ - FN(sk_redirect_hash), \ - FN(lwt_push_encap), \ - FN(lwt_seg6_store_bytes), \ - FN(lwt_seg6_adjust_srh), \ - FN(lwt_seg6_action), \ - FN(rc_repeat), \ - FN(rc_keydown), \ - FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), \ - FN(get_local_storage), \ - FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), \ - FN(sk_lookup_tcp), \ - FN(sk_lookup_udp), \ - FN(sk_release), \ - FN(map_push_elem), \ - FN(map_pop_elem), \ - FN(map_peek_elem), \ - FN(msg_push_data), \ - FN(msg_pop_data), \ - FN(rc_pointer_rel), \ - FN(spin_lock), \ - FN(spin_unlock), \ - FN(sk_fullsock), \ - FN(tcp_sock), \ - FN(skb_ecn_set_ce), \ - FN(get_listener_sock), \ - FN(skc_lookup_tcp), \ - FN(tcp_check_syncookie), \ - FN(sysctl_get_name), \ - FN(sysctl_get_current_value), \ - FN(sysctl_get_new_value), \ - FN(sysctl_set_new_value), \ - FN(strtol), \ - FN(strtoul), \ - FN(sk_storage_get), \ - FN(sk_storage_delete), \ - FN(send_signal), \ - FN(tcp_gen_syncookie), \ - FN(skb_output), \ - FN(probe_read_user), \ - FN(probe_read_kernel), \ - FN(probe_read_user_str), \ - FN(probe_read_kernel_str), \ - FN(tcp_send_ack), \ - FN(send_signal_thread), \ - FN(jiffies64), \ - FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), \ - FN(xdp_output), \ - FN(get_netns_cookie), \ - FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), \ - FN(ktime_get_boot_ns), \ - FN(seq_printf), \ - FN(seq_write), \ - FN(sk_cgroup_id), \ - FN(sk_ancestor_cgroup_id), \ - FN(ringbuf_output), \ - FN(ringbuf_reserve), \ - FN(ringbuf_submit), \ - FN(ringbuf_discard), \ - FN(ringbuf_query), \ - FN(csum_level), \ - FN(skc_to_tcp6_sock), \ - FN(skc_to_tcp_sock), \ - FN(skc_to_tcp_timewait_sock), \ - FN(skc_to_tcp_request_sock), \ - FN(skc_to_udp6_sock), \ - FN(get_task_stack), \ - FN(load_hdr_opt), \ - FN(store_hdr_opt), \ - FN(reserve_hdr_opt), \ - FN(inode_storage_get), \ - FN(inode_storage_delete), \ - FN(d_path), \ - FN(copy_from_user), \ - FN(snprintf_btf), \ - FN(seq_printf_btf), \ - FN(skb_cgroup_classid), \ - FN(redirect_neigh), \ - FN(per_cpu_ptr), \ - FN(this_cpu_ptr), \ - FN(redirect_peer), \ - FN(task_storage_get), \ - FN(task_storage_delete), \ - FN(get_current_task_btf), \ - FN(bprm_opts_set), \ - FN(ktime_get_coarse_ns), \ - FN(ima_inode_hash), \ - FN(sock_from_file), \ - FN(check_mtu), \ - FN(for_each_map_elem), \ - FN(snprintf), \ - FN(sys_bpf), \ - FN(btf_find_by_name_kind), \ - FN(sys_close), \ - FN(timer_init), \ - FN(timer_set_callback), \ - FN(timer_start), \ - FN(timer_cancel), \ - FN(get_func_ip), \ - FN(get_attach_cookie), \ - FN(task_pt_regs), \ - FN(get_branch_snapshot), \ - FN(trace_vprintk), \ - FN(skc_to_unix_sock), \ - FN(kallsyms_lookup_name), \ - FN(find_vma), \ - FN(loop), \ - FN(strncmp), \ - FN(get_func_arg), \ - FN(get_func_ret), \ - FN(get_func_arg_cnt), \ - FN(get_retval), \ - FN(set_retval), \ - FN(xdp_get_buff_len), \ - FN(xdp_load_bytes), \ - FN(xdp_store_bytes), \ - FN(copy_from_user_task), \ - FN(skb_set_tstamp), \ - FN(ima_file_hash), \ - FN(kptr_xchg), \ - FN(map_lookup_percpu_elem), \ - FN(skc_to_mptcp_sock), \ - FN(dynptr_from_mem), \ - FN(ringbuf_reserve_dynptr), \ - FN(ringbuf_submit_dynptr), \ - FN(ringbuf_discard_dynptr), \ - FN(dynptr_read), \ - FN(dynptr_write), \ - FN(dynptr_data), \ - FN(tcp_raw_gen_syncookie_ipv4), \ - FN(tcp_raw_gen_syncookie_ipv6), \ - FN(tcp_raw_check_syncookie_ipv4), \ - FN(tcp_raw_check_syncookie_ipv6), \ - FN(ktime_get_tai_ns), \ - FN(user_ringbuf_drain), \ +#define ___BPF_FUNC_MAPPER(FN, ctx...) \ + FN(unspec, 0, ##ctx) \ + FN(map_lookup_elem, 1, ##ctx) \ + FN(map_update_elem, 2, ##ctx) \ + FN(map_delete_elem, 3, ##ctx) \ + FN(probe_read, 4, ##ctx) \ + FN(ktime_get_ns, 5, ##ctx) \ + FN(trace_printk, 6, ##ctx) \ + FN(get_prandom_u32, 7, ##ctx) \ + FN(get_smp_processor_id, 8, ##ctx) \ + FN(skb_store_bytes, 9, ##ctx) \ + FN(l3_csum_replace, 10, ##ctx) \ + FN(l4_csum_replace, 11, ##ctx) \ + FN(tail_call, 12, ##ctx) \ + FN(clone_redirect, 13, ##ctx) \ + FN(get_current_pid_tgid, 14, ##ctx) \ + FN(get_current_uid_gid, 15, ##ctx) \ + FN(get_current_comm, 16, ##ctx) \ + FN(get_cgroup_classid, 17, ##ctx) \ + FN(skb_vlan_push, 18, ##ctx) \ + FN(skb_vlan_pop, 19, ##ctx) \ + FN(skb_get_tunnel_key, 20, ##ctx) \ + FN(skb_set_tunnel_key, 21, ##ctx) \ + FN(perf_event_read, 22, ##ctx) \ + FN(redirect, 23, ##ctx) \ + FN(get_route_realm, 24, ##ctx) \ + FN(perf_event_output, 25, ##ctx) \ + FN(skb_load_bytes, 26, ##ctx) \ + FN(get_stackid, 27, ##ctx) \ + FN(csum_diff, 28, ##ctx) \ + FN(skb_get_tunnel_opt, 29, ##ctx) \ + FN(skb_set_tunnel_opt, 30, ##ctx) \ + FN(skb_change_proto, 31, ##ctx) \ + FN(skb_change_type, 32, ##ctx) \ + FN(skb_under_cgroup, 33, ##ctx) \ + FN(get_hash_recalc, 34, ##ctx) \ + FN(get_current_task, 35, ##ctx) \ + FN(probe_write_user, 36, ##ctx) \ + FN(current_task_under_cgroup, 37, ##ctx) \ + FN(skb_change_tail, 38, ##ctx) \ + FN(skb_pull_data, 39, ##ctx) \ + FN(csum_update, 40, ##ctx) \ + FN(set_hash_invalid, 41, ##ctx) \ + FN(get_numa_node_id, 42, ##ctx) \ + FN(skb_change_head, 43, ##ctx) \ + FN(xdp_adjust_head, 44, ##ctx) \ + FN(probe_read_str, 45, ##ctx) \ + FN(get_socket_cookie, 46, ##ctx) \ + FN(get_socket_uid, 47, ##ctx) \ + FN(set_hash, 48, ##ctx) \ + FN(setsockopt, 49, ##ctx) \ + FN(skb_adjust_room, 50, ##ctx) \ + FN(redirect_map, 51, ##ctx) \ + FN(sk_redirect_map, 52, ##ctx) \ + FN(sock_map_update, 53, ##ctx) \ + FN(xdp_adjust_meta, 54, ##ctx) \ + FN(perf_event_read_value, 55, ##ctx) \ + FN(perf_prog_read_value, 56, ##ctx) \ + FN(getsockopt, 57, ##ctx) \ + FN(override_return, 58, ##ctx) \ + FN(sock_ops_cb_flags_set, 59, ##ctx) \ + FN(msg_redirect_map, 60, ##ctx) \ + FN(msg_apply_bytes, 61, ##ctx) \ + FN(msg_cork_bytes, 62, ##ctx) \ + FN(msg_pull_data, 63, ##ctx) \ + FN(bind, 64, ##ctx) \ + FN(xdp_adjust_tail, 65, ##ctx) \ + FN(skb_get_xfrm_state, 66, ##ctx) \ + FN(get_stack, 67, ##ctx) \ + FN(skb_load_bytes_relative, 68, ##ctx) \ + FN(fib_lookup, 69, ##ctx) \ + FN(sock_hash_update, 70, ##ctx) \ + FN(msg_redirect_hash, 71, ##ctx) \ + FN(sk_redirect_hash, 72, ##ctx) \ + FN(lwt_push_encap, 73, ##ctx) \ + FN(lwt_seg6_store_bytes, 74, ##ctx) \ + FN(lwt_seg6_adjust_srh, 75, ##ctx) \ + FN(lwt_seg6_action, 76, ##ctx) \ + FN(rc_repeat, 77, ##ctx) \ + FN(rc_keydown, 78, ##ctx) \ + FN(skb_cgroup_id, 79, ##ctx) \ + FN(get_current_cgroup_id, 80, ##ctx) \ + FN(get_local_storage, 81, ##ctx) \ + FN(sk_select_reuseport, 82, ##ctx) \ + FN(skb_ancestor_cgroup_id, 83, ##ctx) \ + FN(sk_lookup_tcp, 84, ##ctx) \ + FN(sk_lookup_udp, 85, ##ctx) \ + FN(sk_release, 86, ##ctx) \ + FN(map_push_elem, 87, ##ctx) \ + FN(map_pop_elem, 88, ##ctx) \ + FN(map_peek_elem, 89, ##ctx) \ + FN(msg_push_data, 90, ##ctx) \ + FN(msg_pop_data, 91, ##ctx) \ + FN(rc_pointer_rel, 92, ##ctx) \ + FN(spin_lock, 93, ##ctx) \ + FN(spin_unlock, 94, ##ctx) \ + FN(sk_fullsock, 95, ##ctx) \ + FN(tcp_sock, 96, ##ctx) \ + FN(skb_ecn_set_ce, 97, ##ctx) \ + FN(get_listener_sock, 98, ##ctx) \ + FN(skc_lookup_tcp, 99, ##ctx) \ + FN(tcp_check_syncookie, 100, ##ctx) \ + FN(sysctl_get_name, 101, ##ctx) \ + FN(sysctl_get_current_value, 102, ##ctx) \ + FN(sysctl_get_new_value, 103, ##ctx) \ + FN(sysctl_set_new_value, 104, ##ctx) \ + FN(strtol, 105, ##ctx) \ + FN(strtoul, 106, ##ctx) \ + FN(sk_storage_get, 107, ##ctx) \ + FN(sk_storage_delete, 108, ##ctx) \ + FN(send_signal, 109, ##ctx) \ + FN(tcp_gen_syncookie, 110, ##ctx) \ + FN(skb_output, 111, ##ctx) \ + FN(probe_read_user, 112, ##ctx) \ + FN(probe_read_kernel, 113, ##ctx) \ + FN(probe_read_user_str, 114, ##ctx) \ + FN(probe_read_kernel_str, 115, ##ctx) \ + FN(tcp_send_ack, 116, ##ctx) \ + FN(send_signal_thread, 117, ##ctx) \ + FN(jiffies64, 118, ##ctx) \ + FN(read_branch_records, 119, ##ctx) \ + FN(get_ns_current_pid_tgid, 120, ##ctx) \ + FN(xdp_output, 121, ##ctx) \ + FN(get_netns_cookie, 122, ##ctx) \ + FN(get_current_ancestor_cgroup_id, 123, ##ctx) \ + FN(sk_assign, 124, ##ctx) \ + FN(ktime_get_boot_ns, 125, ##ctx) \ + FN(seq_printf, 126, ##ctx) \ + FN(seq_write, 127, ##ctx) \ + FN(sk_cgroup_id, 128, ##ctx) \ + FN(sk_ancestor_cgroup_id, 129, ##ctx) \ + FN(ringbuf_output, 130, ##ctx) \ + FN(ringbuf_reserve, 131, ##ctx) \ + FN(ringbuf_submit, 132, ##ctx) \ + FN(ringbuf_discard, 133, ##ctx) \ + FN(ringbuf_query, 134, ##ctx) \ + FN(csum_level, 135, ##ctx) \ + FN(skc_to_tcp6_sock, 136, ##ctx) \ + FN(skc_to_tcp_sock, 137, ##ctx) \ + FN(skc_to_tcp_timewait_sock, 138, ##ctx) \ + FN(skc_to_tcp_request_sock, 139, ##ctx) \ + FN(skc_to_udp6_sock, 140, ##ctx) \ + FN(get_task_stack, 141, ##ctx) \ + FN(load_hdr_opt, 142, ##ctx) \ + FN(store_hdr_opt, 143, ##ctx) \ + FN(reserve_hdr_opt, 144, ##ctx) \ + FN(inode_storage_get, 145, ##ctx) \ + FN(inode_storage_delete, 146, ##ctx) \ + FN(d_path, 147, ##ctx) \ + FN(copy_from_user, 148, ##ctx) \ + FN(snprintf_btf, 149, ##ctx) \ + FN(seq_printf_btf, 150, ##ctx) \ + FN(skb_cgroup_classid, 151, ##ctx) \ + FN(redirect_neigh, 152, ##ctx) \ + FN(per_cpu_ptr, 153, ##ctx) \ + FN(this_cpu_ptr, 154, ##ctx) \ + FN(redirect_peer, 155, ##ctx) \ + FN(task_storage_get, 156, ##ctx) \ + FN(task_storage_delete, 157, ##ctx) \ + FN(get_current_task_btf, 158, ##ctx) \ + FN(bprm_opts_set, 159, ##ctx) \ + FN(ktime_get_coarse_ns, 160, ##ctx) \ + FN(ima_inode_hash, 161, ##ctx) \ + FN(sock_from_file, 162, ##ctx) \ + FN(check_mtu, 163, ##ctx) \ + FN(for_each_map_elem, 164, ##ctx) \ + FN(snprintf, 165, ##ctx) \ + FN(sys_bpf, 166, ##ctx) \ + FN(btf_find_by_name_kind, 167, ##ctx) \ + FN(sys_close, 168, ##ctx) \ + FN(timer_init, 169, ##ctx) \ + FN(timer_set_callback, 170, ##ctx) \ + FN(timer_start, 171, ##ctx) \ + FN(timer_cancel, 172, ##ctx) \ + FN(get_func_ip, 173, ##ctx) \ + FN(get_attach_cookie, 174, ##ctx) \ + FN(task_pt_regs, 175, ##ctx) \ + FN(get_branch_snapshot, 176, ##ctx) \ + FN(trace_vprintk, 177, ##ctx) \ + FN(skc_to_unix_sock, 178, ##ctx) \ + FN(kallsyms_lookup_name, 179, ##ctx) \ + FN(find_vma, 180, ##ctx) \ + FN(loop, 181, ##ctx) \ + FN(strncmp, 182, ##ctx) \ + FN(get_func_arg, 183, ##ctx) \ + FN(get_func_ret, 184, ##ctx) \ + FN(get_func_arg_cnt, 185, ##ctx) \ + FN(get_retval, 186, ##ctx) \ + FN(set_retval, 187, ##ctx) \ + FN(xdp_get_buff_len, 188, ##ctx) \ + FN(xdp_load_bytes, 189, ##ctx) \ + FN(xdp_store_bytes, 190, ##ctx) \ + FN(copy_from_user_task, 191, ##ctx) \ + FN(skb_set_tstamp, 192, ##ctx) \ + FN(ima_file_hash, 193, ##ctx) \ + FN(kptr_xchg, 194, ##ctx) \ + FN(map_lookup_percpu_elem, 195, ##ctx) \ + FN(skc_to_mptcp_sock, 196, ##ctx) \ + FN(dynptr_from_mem, 197, ##ctx) \ + FN(ringbuf_reserve_dynptr, 198, ##ctx) \ + FN(ringbuf_submit_dynptr, 199, ##ctx) \ + FN(ringbuf_discard_dynptr, 200, ##ctx) \ + FN(dynptr_read, 201, ##ctx) \ + FN(dynptr_write, 202, ##ctx) \ + FN(dynptr_data, 203, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \ + FN(ktime_get_tai_ns, 208, ##ctx) \ + FN(user_ringbuf_drain, 209, ##ctx) \ /* */ +/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't + * know or care about integer value that is now passed as second argument + */ +#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name), +#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN) + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ -#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x +#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y, enum bpf_func_id { - __BPF_FUNC_MAPPER(__BPF_ENUM_FN) + ___BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; #undef __BPF_ENUM_FN -- cgit v1.2.3 From 0ff57171d6d225558c81a69439d5323e35b40549 Mon Sep 17 00:00:00 2001 From: Vinayak Yadawad Date: Wed, 7 Sep 2022 18:14:48 +0530 Subject: cfg80211: Update Transition Disable policy during port authorization In case of 4way handshake offload, transition disable policy updated by the AP during EAPOL 3/4 is not updated to the upper layer. This results in mismatch between transition disable policy between the upper layer and the driver. This patch addresses this issue by updating transition disable policy as part of port authorization indication. Signed-off-by: Vinayak Yadawad Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c32e7616a366..c14a91bbca7c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2749,6 +2749,8 @@ enum nl80211_commands { * When used with %NL80211_CMD_FRAME_TX_STATUS, indicates the ack RX * timestamp. When used with %NL80211_CMD_FRAME RX notification, indicates * the incoming frame RX timestamp. + * @NL80211_ATTR_TD_BITMAP: Transition Disable bitmap, for subsequent + * (re)associations. * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3276,6 +3278,7 @@ enum nl80211_attrs { NL80211_ATTR_TX_HW_TIMESTAMP, NL80211_ATTR_RX_HW_TIMESTAMP, + NL80211_ATTR_TD_BITMAP, /* add attributes here, update the policy in nl80211.c */ -- cgit v1.2.3 From dd2bc5cc9e25554546f16661f8de1dcb8033dde0 Mon Sep 17 00:00:00 2001 From: Quan Nguyen Date: Tue, 4 Oct 2022 16:31:04 +0700 Subject: ipmi: ssif_bmc: Add SSIF BMC driver The SMBus system interface (SSIF) IPMI BMC driver can be used to perform in-band IPMI communication with their host in management (BMC) side. Thanks Dan for the copy_from_user() fix in the link below. Link: https://lore.kernel.org/linux-arm-kernel/20220310114119.13736-4-quan@os.amperecomputing.com/ Signed-off-by: Quan Nguyen Message-Id: <20221004093106.1653317-2-quan@os.amperecomputing.com> Signed-off-by: Corey Minyard --- include/uapi/linux/ipmi_ssif_bmc.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/uapi/linux/ipmi_ssif_bmc.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipmi_ssif_bmc.h b/include/uapi/linux/ipmi_ssif_bmc.h new file mode 100644 index 000000000000..1c6a753dad08 --- /dev/null +++ b/include/uapi/linux/ipmi_ssif_bmc.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note*/ +/* + * Copyright (c) 2022, Ampere Computing LLC. + */ + +#ifndef _UAPI_LINUX_IPMI_SSIF_BMC_H +#define _UAPI_LINUX_IPMI_SSIF_BMC_H + +#include + +/* Max length of ipmi ssif message included netfn and cmd field */ +#define IPMI_SSIF_PAYLOAD_MAX 254 +struct ipmi_ssif_msg { + unsigned int len; + __u8 payload[IPMI_SSIF_PAYLOAD_MAX]; +}; + +#endif /* _UAPI_LINUX_IPMI_SSIF_BMC_H */ -- cgit v1.2.3 From b9f5ce27c8f8be409d6afca9797a2da01e5cebbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Tue, 18 Oct 2022 20:22:09 +0200 Subject: landlock: Support file truncation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the LANDLOCK_ACCESS_FS_TRUNCATE flag for file truncation. This flag hooks into the path_truncate, file_truncate and file_alloc_security LSM hooks and covers file truncation using truncate(2), ftruncate(2), open(2) with O_TRUNC, as well as creat(). This change also increments the Landlock ABI version, updates corresponding selftests, and updates code documentation to document the flag. In security/security.c, allocate security blobs at pointer-aligned offsets. This fixes the problem where one LSM's security blob can shift another LSM's security blob to an unaligned address (reported by Nathan Chancellor). The following operations are restricted: open(2): requires the LANDLOCK_ACCESS_FS_TRUNCATE right if a file gets implicitly truncated as part of the open() (e.g. using O_TRUNC). Notable special cases: * open(..., O_RDONLY|O_TRUNC) can truncate files as well in Linux * open() with O_TRUNC does *not* need the TRUNCATE right when it creates a new file. truncate(2) (on a path): requires the LANDLOCK_ACCESS_FS_TRUNCATE right. ftruncate(2) (on a file): requires that the file had the TRUNCATE right when it was previously opened. File descriptors acquired by other means than open(2) (e.g. memfd_create(2)) continue to support truncation with ftruncate(2). Cc: Nathan Chancellor Signed-off-by: Günther Noack Acked-by: Paul Moore (LSM) Link: https://lore.kernel.org/r/20221018182216.301684-5-gnoack3000@gmail.com Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 9c4bcc37a455..f3223f964691 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -95,8 +95,19 @@ struct landlock_path_beneath_attr { * A file can only receive these access rights: * * - %LANDLOCK_ACCESS_FS_EXECUTE: Execute a file. - * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. + * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. Note that + * you might additionally need the %LANDLOCK_ACCESS_FS_TRUNCATE right in order + * to overwrite files with :manpage:`open(2)` using ``O_TRUNC`` or + * :manpage:`creat(2)`. * - %LANDLOCK_ACCESS_FS_READ_FILE: Open a file with read access. + * - %LANDLOCK_ACCESS_FS_TRUNCATE: Truncate a file with :manpage:`truncate(2)`, + * :manpage:`ftruncate(2)`, :manpage:`creat(2)`, or :manpage:`open(2)` with + * ``O_TRUNC``. Whether an opened file can be truncated with + * :manpage:`ftruncate(2)` is determined during :manpage:`open(2)`, in the + * same way as read and write permissions are checked during + * :manpage:`open(2)` using %LANDLOCK_ACCESS_FS_READ_FILE and + * %LANDLOCK_ACCESS_FS_WRITE_FILE. This access right is available since the + * third version of the Landlock ABI. * * A directory can receive access rights related to files or directories. The * following access right is applied to the directory itself, and the @@ -139,10 +150,9 @@ struct landlock_path_beneath_attr { * * It is currently not possible to restrict some file-related actions * accessible through these syscall families: :manpage:`chdir(2)`, - * :manpage:`truncate(2)`, :manpage:`stat(2)`, :manpage:`flock(2)`, - * :manpage:`chmod(2)`, :manpage:`chown(2)`, :manpage:`setxattr(2)`, - * :manpage:`utime(2)`, :manpage:`ioctl(2)`, :manpage:`fcntl(2)`, - * :manpage:`access(2)`. + * :manpage:`stat(2)`, :manpage:`flock(2)`, :manpage:`chmod(2)`, + * :manpage:`chown(2)`, :manpage:`setxattr(2)`, :manpage:`utime(2)`, + * :manpage:`ioctl(2)`, :manpage:`fcntl(2)`, :manpage:`access(2)`. * Future Landlock evolutions will enable to restrict them. */ /* clang-format off */ @@ -160,6 +170,7 @@ struct landlock_path_beneath_attr { #define LANDLOCK_ACCESS_FS_MAKE_BLOCK (1ULL << 11) #define LANDLOCK_ACCESS_FS_MAKE_SYM (1ULL << 12) #define LANDLOCK_ACCESS_FS_REFER (1ULL << 13) +#define LANDLOCK_ACCESS_FS_TRUNCATE (1ULL << 14) /* clang-format on */ #endif /* _UAPI_LINUX_LANDLOCK_H */ -- cgit v1.2.3 From d182bf156c4cb8b08ce4a75e82b3357b14a4382d Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Tue, 11 Oct 2022 09:53:48 +0200 Subject: usb: gadget: uvc: default the ctrl request interface offsets For the userspace it is needed to distinguish between requests for the control or streaming interface. The userspace would have to parse the configfs to know which interface index it has to compare the ctrl requests against. Since the interface numbers are not fixed, e.g. for composite gadgets, the interface offset depends on the setup. The kernel has this information when handing over the ctrl request to the userspace. This patch removes the offset from the interface numbers and expose the default interface defines in the uapi g_uvc.h. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20221011075348.1786897-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/g_uvc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/g_uvc.h b/include/uapi/linux/usb/g_uvc.h index 652f169a019e..8d7824dde1b2 100644 --- a/include/uapi/linux/usb/g_uvc.h +++ b/include/uapi/linux/usb/g_uvc.h @@ -21,6 +21,9 @@ #define UVC_EVENT_DATA (V4L2_EVENT_PRIVATE_START + 5) #define UVC_EVENT_LAST (V4L2_EVENT_PRIVATE_START + 5) +#define UVC_STRING_CONTROL_IDX 0 +#define UVC_STRING_STREAMING_IDX 1 + struct uvc_request_data { __s32 length; __u8 data[60]; -- cgit v1.2.3 From 404c76783f322120266a4ef659abe418dc74f5c6 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 20 Oct 2022 17:20:03 +0200 Subject: ethtool: Add support for 800Gbps link modes Add support for 800Gbps speed, link modes of 100Gbps per lane. As mentioned in slide 21 in IEEE documentation [1], all adopted 802.3df copper and optical PMDs baselines using 100G/lane will be supported. Add the relevant PMDs which are mentioned in slide 5 in IEEE documentation [1] and were approved on 10-2022 [2]: BP - KR8 Cu Cable - CR8 MMF 50m - VR8 MMF 100m - SR8 SMF 500m - DR8 SMF 2km - DR8-2 [1]: https://www.ieee802.org/3/df/public/22_10/22_1004/shrikhande_3df_01a_221004.pdf [2]: https://ieee802.org/3/df/KeyMotions_3df_221005.pdf Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index dc2aa3d75b39..f341de2ae612 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1737,6 +1737,13 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90, ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91, ETHTOOL_LINK_MODE_10baseT1L_Full_BIT = 92, + ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT = 93, + ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT = 94, + ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT = 95, + ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT = 96, + ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT = 97, + ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT = 98, + /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; @@ -1848,6 +1855,7 @@ enum ethtool_link_mode_bit_indices { #define SPEED_100000 100000 #define SPEED_200000 200000 #define SPEED_400000 400000 +#define SPEED_800000 800000 #define SPEED_UNKNOWN -1 -- cgit v1.2.3 From e77eb66342c73946f7ed9ea68a6329f13b4343eb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 29 Sep 2022 11:11:22 +0200 Subject: videodev2.h: add p_s32 and p_s64 pointers Added p_s32 and p_s64 pointers to the union in struct v4l2_ext_control to simplify INTEGER and INTEGER64 control array support. Internally the control framework handles such arrays just fine, but userspace is missing corresponding pointers to access array elements of these types. The internal union v4l2_ctrl_ptr which the control framework uses already has these types, they just were never added to the public API. Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 86cae23cc446..a6f5c008a5a8 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1779,6 +1779,8 @@ struct v4l2_ext_control { __u8 __user *p_u8; __u16 __user *p_u16; __u32 __user *p_u32; + __u32 __user *p_s32; + __u32 __user *p_s64; struct v4l2_area __user *p_area; struct v4l2_ctrl_h264_sps __user *p_h264_sps; struct v4l2_ctrl_h264_pps *p_h264_pps; -- cgit v1.2.3 From 48f750e25ab72c36f2b043370175adf602d4a45e Mon Sep 17 00:00:00 2001 From: Benjamin Mugnier Date: Tue, 11 Oct 2022 14:16:01 +0200 Subject: media: v4l: Add 1X16 16-bit greyscale media bus code definition This extends the greyscale media bus family originally from MEDIA_BUS_FMT_Y8_1X8 up to MEDIA_BUS_FMT_Y14_1X14 by adding MEDIA_BUS_FMT_Y16_1X16, and behaves the same way with 16 bits. Add its documentation in subdev-formats.rst Signed-off-by: Benjamin Mugnier Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus --- include/uapi/linux/media-bus-format.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index ec3323dbb927..ca9a24c874da 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -69,7 +69,7 @@ #define MEDIA_BUS_FMT_RGB121212_1X36 0x1019 #define MEDIA_BUS_FMT_RGB161616_1X48 0x101a -/* YUV (including grey) - next is 0x202e */ +/* YUV (including grey) - next is 0x202f */ #define MEDIA_BUS_FMT_Y8_1X8 0x2001 #define MEDIA_BUS_FMT_UV8_1X8 0x2015 #define MEDIA_BUS_FMT_UYVY8_1_5X8 0x2002 @@ -92,6 +92,7 @@ #define MEDIA_BUS_FMT_YUYV12_2X12 0x201e #define MEDIA_BUS_FMT_YVYU12_2X12 0x201f #define MEDIA_BUS_FMT_Y14_1X14 0x202d +#define MEDIA_BUS_FMT_Y16_1X16 0x202e #define MEDIA_BUS_FMT_UYVY8_1X16 0x200f #define MEDIA_BUS_FMT_VYUY8_1X16 0x2010 #define MEDIA_BUS_FMT_YUYV8_1X16 0x2011 -- cgit v1.2.3 From 7673f3058bd2511474491d4f82c2ece09176412c Mon Sep 17 00:00:00 2001 From: Benjamin Mugnier Date: Tue, 11 Oct 2022 14:16:02 +0200 Subject: media: v4l: ctrls: Add a control for HDR mode Add V4L2_CID_HDR_MODE as a menu item control to set the HDR mode of the sensor, and its documentation. Menu items are not standardized as they differ for each sensors. Signed-off-by: Benjamin Mugnier Signed-off-by: Sakari Ailus --- include/uapi/linux/v4l2-controls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index b5e7d082b8ad..d27e255ed33b 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1019,6 +1019,8 @@ enum v4l2_auto_focus_range { #define V4L2_CID_CAMERA_SENSOR_ROTATION (V4L2_CID_CAMERA_CLASS_BASE+35) +#define V4L2_CID_HDR_SENSOR_MODE (V4L2_CID_CAMERA_CLASS_BASE+36) + /* FM Modulator class control IDs */ #define V4L2_CID_FM_TX_CLASS_BASE (V4L2_CTRL_CLASS_FM_TX | 0x900) -- cgit v1.2.3 From 3a07327d10a09379315c844c63f27941f5081e0a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 25 Oct 2022 13:48:15 +0200 Subject: netfilter: nft_inner: support for inner tunnel header matching This new expression allows you to match on the inner headers that are encapsulated by any of the existing tunneling protocols. This expression parses the inner packet to set the link, network and transport offsets, so the existing expressions (with a few updates) can be reused to match on the inner headers. The inner expression supports for different tunnel combinations such as: - ethernet frame over IPv4/IPv6 packet, eg. VxLAN. - IPv4/IPv6 packet over IPv4/IPv6 packet, eg. IPIP. - IPv4/IPv6 packet over IPv4/IPv6 + transport header, eg. GRE. - transport header (ESP or SCTP) over transport header (usually UDP) The following fields are used to describe the tunnel protocol: - flags, which describe how to parse the inner headers: NFT_PAYLOAD_CTX_INNER_TUN, the tunnel provides its own header. NFT_PAYLOAD_CTX_INNER_ETHER, the ethernet frame is available as inner header. NFT_PAYLOAD_CTX_INNER_NH, the network header is available as inner header. NFT_PAYLOAD_CTX_INNER_TH, the transport header is available as inner header. For example, VxLAN sets on all of these flags. While GRE only sets on NFT_PAYLOAD_CTX_INNER_NH and NFT_PAYLOAD_CTX_INNER_TH. Then, ESP over UDP only sets on NFT_PAYLOAD_CTX_INNER_TH. The tunnel description is composed of the following attributes: - header size: in case the tunnel comes with its own header, eg. VxLAN. - type: this provides a hint to userspace on how to delinearize the rule. This is useful for VxLAN and Geneve since they run over UDP, since transport does not provide a hint. This is also useful in case hardware offload is ever supported. The type is not currently interpreted by the kernel. - expression: currently only payload supported. Follow up patch adds also inner meta support which is required by autogenerated dependencies. The exthdr expression should be supported too at some point. There is a new inner_ops operation that needs to be set on to allow to use an existing expression from the inner expression. This patch adds a new NFT_PAYLOAD_TUN_HEADER base which allows to match on the tunnel header fields, eg. vxlan vni. The payload expression is embedded into nft_inner private area and this private data area is passed to the payload inner eval function via direct call. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 466fd3f4447c..05a15dce8271 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -760,6 +760,7 @@ enum nft_payload_bases { NFT_PAYLOAD_NETWORK_HEADER, NFT_PAYLOAD_TRANSPORT_HEADER, NFT_PAYLOAD_INNER_HEADER, + NFT_PAYLOAD_TUN_HEADER, }; /** @@ -779,6 +780,31 @@ enum nft_payload_csum_flags { NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0), }; +enum nft_inner_type { + NFT_INNER_UNSPEC = 0, + NFT_INNER_VXLAN, +}; + +enum nft_inner_flags { + NFT_INNER_HDRSIZE = (1 << 0), + NFT_INNER_LL = (1 << 1), + NFT_INNER_NH = (1 << 2), + NFT_INNER_TH = (1 << 3), +}; +#define NFT_INNER_MASK (NFT_INNER_HDRSIZE | NFT_INNER_LL | \ + NFT_INNER_NH | NFT_INNER_TH) + +enum nft_inner_attributes { + NFTA_INNER_UNSPEC, + NFTA_INNER_NUM, + NFTA_INNER_TYPE, + NFTA_INNER_FLAGS, + NFTA_INNER_HDRSIZE, + NFTA_INNER_EXPR, + __NFTA_INNER_MAX +}; +#define NFTA_INNER_MAX (__NFTA_INNER_MAX - 1) + /** * enum nft_payload_attributes - nf_tables payload expression netlink attributes * -- cgit v1.2.3 From 0db14b95660b63dceeb7e89f2e3ffa97d331fce0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 17 Oct 2022 13:03:34 +0200 Subject: netfilter: nft_inner: add geneve support Geneve tunnel header may contain options, parse geneve header and update offset to point to the link layer header according to the opt_len field. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 05a15dce8271..e4b739d57480 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -783,6 +783,7 @@ enum nft_payload_csum_flags { enum nft_inner_type { NFT_INNER_UNSPEC = 0, NFT_INNER_VXLAN, + NFT_INNER_GENEVE, }; enum nft_inner_flags { -- cgit v1.2.3 From 23a7aea5faf6500051c52dfaba46845c41b3abd4 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Tue, 4 Oct 2022 12:25:40 +0200 Subject: ELF uapi: add spaces before '{' When searching for a struct definition I often enough end up simply doing git grep 'struct foobar {' Sadly some of the ELF structs did not follow the usual coding style so they were invisible. Signed-off-by: Rolf Eike Beer Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/11563980.Ss37MnutNL@mobilepool36.emlix.com --- include/uapi/linux/elf.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c7b056af9ef0..775a17b5b8ac 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -140,9 +140,9 @@ typedef __s64 Elf64_Sxword; #define ELF64_ST_BIND(x) ELF_ST_BIND(x) #define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) -typedef struct dynamic{ +typedef struct dynamic { Elf32_Sword d_tag; - union{ + union { Elf32_Sword d_val; Elf32_Addr d_ptr; } d_un; @@ -173,7 +173,7 @@ typedef struct elf64_rel { Elf64_Xword r_info; /* index and type of relocation */ } Elf64_Rel; -typedef struct elf32_rela{ +typedef struct elf32_rela { Elf32_Addr r_offset; Elf32_Word r_info; Elf32_Sword r_addend; @@ -185,7 +185,7 @@ typedef struct elf64_rela { Elf64_Sxword r_addend; /* Constant addend used to compute value */ } Elf64_Rela; -typedef struct elf32_sym{ +typedef struct elf32_sym { Elf32_Word st_name; Elf32_Addr st_value; Elf32_Word st_size; @@ -206,7 +206,7 @@ typedef struct elf64_sym { #define EI_NIDENT 16 -typedef struct elf32_hdr{ +typedef struct elf32_hdr { unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; Elf32_Half e_machine; @@ -246,7 +246,7 @@ typedef struct elf64_hdr { #define PF_W 0x2 #define PF_X 0x1 -typedef struct elf32_phdr{ +typedef struct elf32_phdr { Elf32_Word p_type; Elf32_Off p_offset; Elf32_Addr p_vaddr; -- cgit v1.2.3 From 8f6e3f9e5a0f58e458a348b7e36af11d0e9702af Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Oct 2022 00:14:20 -0700 Subject: binfmt: Fix whitespace issues Fix the annoying whitespace issues that have been following these files around for years. Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20221018071350.never.230-kees@kernel.org --- include/uapi/linux/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 775a17b5b8ac..4c6a8fa5e7ed 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -91,7 +91,7 @@ typedef __s64 Elf64_Sxword; #define DT_INIT 12 #define DT_FINI 13 #define DT_SONAME 14 -#define DT_RPATH 15 +#define DT_RPATH 15 #define DT_SYMBOLIC 16 #define DT_REL 17 #define DT_RELSZ 18 -- cgit v1.2.3 From c4bcfb38a95edb1021a53f2d0356a78120ecfbe4 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 25 Oct 2022 21:28:50 -0700 Subject: bpf: Implement cgroup storage available to non-cgroup-attached bpf progs Similar to sk/inode/task storage, implement similar cgroup local storage. There already exists a local storage implementation for cgroup-attached bpf programs. See map type BPF_MAP_TYPE_CGROUP_STORAGE and helper bpf_get_local_storage(). But there are use cases such that non-cgroup attached bpf progs wants to access cgroup local storage data. For example, tc egress prog has access to sk and cgroup. It is possible to use sk local storage to emulate cgroup local storage by storing data in socket. But this is a waste as it could be lots of sockets belonging to a particular cgroup. Alternatively, a separate map can be created with cgroup id as the key. But this will introduce additional overhead to manipulate the new map. A cgroup local storage, similar to existing sk/inode/task storage, should help for this use case. The life-cycle of storage is managed with the life-cycle of the cgroup struct. i.e. the storage is destroyed along with the owning cgroup with a call to bpf_cgrp_storage_free() when cgroup itself is deleted. The userspace map operations can be done by using a cgroup fd as a key passed to the lookup, update and delete operations. Typically, the following code is used to get the current cgroup: struct task_struct *task = bpf_get_current_task_btf(); ... task->cgroups->dfl_cgrp ... and in structure task_struct definition: struct task_struct { .... struct css_set __rcu *cgroups; .... } With sleepable program, accessing task->cgroups is not protected by rcu_read_lock. So the current implementation only supports non-sleepable program and supporting sleepable program will be the next step together with adding rcu_read_lock protection for rcu tagged structures. Since map name BPF_MAP_TYPE_CGROUP_STORAGE has been used for old cgroup local storage support, the new map name BPF_MAP_TYPE_CGRP_STORAGE is used for cgroup storage available to non-cgroup-attached bpf programs. The old cgroup storage supports bpf_get_local_storage() helper to get the cgroup data. The new cgroup storage helper bpf_cgrp_storage_get() can provide similar functionality. While old cgroup storage pre-allocates storage memory, the new mechanism can also pre-allocate with a user space bpf_map_update_elem() call to avoid potential run-time memory allocation failure. Therefore, the new cgroup storage can provide all functionality w.r.t. the old one. So in uapi bpf.h, the old BPF_MAP_TYPE_CGROUP_STORAGE is alias to BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED to indicate the old cgroup storage can be deprecated since the new one can provide the same functionality. Acked-by: David Vernet Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20221026042850.673791-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 17f61338f8f8..94659f6b3395 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -922,7 +922,14 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, - BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching + * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to + * both cgroup-attached and other progs and supports all functionality + * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark + * BPF_MAP_TYPE_CGROUP_STORAGE deprecated. + */ + BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, @@ -935,6 +942,7 @@ enum bpf_map_type { BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, + BPF_MAP_TYPE_CGRP_STORAGE, }; /* Note that tracing related programs such as @@ -5435,6 +5443,44 @@ union bpf_attr { * **-E2BIG** if user-space has tried to publish a sample which is * larger than the size of the ring buffer, or which cannot fit * within a struct bpf_dynptr. + * + * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *cgroup*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *cgroup* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this + * helper enforces the key must be a cgroup struct and the map must also + * be a **BPF_MAP_TYPE_CGRP_STORAGE**. + * + * In reality, the local-storage value is embedded directly inside of the + * *cgroup* object itself, rather than being located in the + * **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is + * queried for some *map* on a *cgroup* object, the kernel will perform an + * O(n) iteration over all of the live local-storage values for that + * *cgroup* object until the local-storage value for the *map* is found. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup) + * Description + * Delete a bpf_local_storage from a *cgroup*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5647,6 +5693,8 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \ FN(ktime_get_tai_ns, 208, ##ctx) \ FN(user_ringbuf_drain, 209, ##ctx) \ + FN(cgrp_storage_get, 210, ##ctx) \ + FN(cgrp_storage_delete, 211, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't -- cgit v1.2.3 From e32e1e26c4098d8a866ce09fd26d8004da4ddf9e Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 19 Sep 2022 20:03:39 +0530 Subject: PCI: Add PCI_PTM_CAP_RES macro Add macro defining Responder capable bit in Precision Time Measurement capability register. Link: https://lore.kernel.org/r/20220919143340.4527-2-vidyas@nvidia.com Signed-off-by: Vidya Sagar Signed-off-by: Lorenzo Pieralisi Reviewed-by: Jingoo Han --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 57b8e2ffb1dd..1c3591c8e09e 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1058,6 +1058,7 @@ /* Precision Time Measurement */ #define PCI_PTM_CAP 0x04 /* PTM Capability */ #define PCI_PTM_CAP_REQ 0x00000001 /* Requester capable */ +#define PCI_PTM_CAP_RES 0x00000002 /* Responder capable */ #define PCI_PTM_CAP_ROOT 0x00000004 /* Root capable */ #define PCI_PTM_GRANULARITY_MASK 0x0000FF00 /* Clock granularity */ #define PCI_PTM_CTRL 0x08 /* PTM Control */ -- cgit v1.2.3 From 7984ceb134bf31aa9a597f10ed52d831d5aede14 Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Mon, 17 Oct 2022 14:25:00 -0500 Subject: crypto: af_alg - Support symmetric encryption via keyring keys We want to leverage keyring to store sensitive keys, and then use those keys for symmetric encryption via the crypto API. Among the key types we wish to support are: user, logon, encrypted, and trusted. User key types are already able to have their data copied to user space, but logon does not support this. Further, trusted and encrypted keys will return their encrypted data back to user space on read, which does not make them ideal for symmetric encryption. To support symmetric encryption for these key types, add a new ALG_SET_KEY_BY_KEY_SERIAL setsockopt() option to the crypto API. This allows users to pass a key_serial_t to the crypto API to perform symmetric encryption. The behavior is the same as ALG_SET_KEY, but the crypto key data is copied in kernel space from a keyring key, which allows for the support of logon, encrypted, and trusted key types. Keyring keys must have the KEY_(POS|USR|GRP|OTH)_SEARCH permission set to leverage this feature. This follows the asymmetric_key type where key lookup calls eventually lead to keyring_search_rcu() without the KEYRING_SEARCH_NO_CHECK_PERM flag set. Signed-off-by: Frederick Lawler Signed-off-by: Herbert Xu --- include/uapi/linux/if_alg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index 578b18aab821..0824fbc026a1 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -52,6 +52,7 @@ struct af_alg_iv { #define ALG_SET_AEAD_ASSOCLEN 4 #define ALG_SET_AEAD_AUTHSIZE 5 #define ALG_SET_DRBG_ENTROPY 6 +#define ALG_SET_KEY_BY_KEY_SERIAL 7 /* Operations */ #define ALG_OP_DECRYPT 0 -- cgit v1.2.3 From 29c1c44646aec5d5134f2365259a84becc1ee7d3 Mon Sep 17 00:00:00 2001 From: Mubashir Adnan Qureshi Date: Wed, 26 Oct 2022 13:51:14 +0000 Subject: tcp: add u32 counter in tcp_sock and an SNMP counter for PLB A u32 counter is added to tcp_sock for counting the number of PLB triggered rehashes for a TCP connection. An SNMP counter is also added to count overall PLB triggered rehash events for a host. These counters are hooked up to PLB implementation for DCTCP. TCP_NLA_REHASH is added to SCM_TIMESTAMPING_OPT_STATS that reports the rehash attempts triggered due to PLB or timeouts. This gives a historical view of sustained congestion or timeouts experienced by the TCP connection. Signed-off-by: Mubashir Adnan Qureshi Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + include/uapi/linux/tcp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 4d7470036a8b..6600cb0164c2 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -292,6 +292,7 @@ enum LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ + LINUX_MIB_TCPPLBREHASH, /* TCPPLBRehash */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 8fc09e8638b3..c9abe86eda5f 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -315,6 +315,7 @@ enum { TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ TCP_NLA_TTL, /* TTL or hop limit of a packet received */ + TCP_NLA_REHASH, /* PLB and timeout triggered rehash attempts */ }; /* for TCP_MD5SIG socket option */ -- cgit v1.2.3 From 71fc704768f601ed3fa36310822a5e03f310f781 Mon Sep 17 00:00:00 2001 From: Mubashir Adnan Qureshi Date: Wed, 26 Oct 2022 13:51:15 +0000 Subject: tcp: add rcv_wnd and plb_rehash to TCP_INFO rcv_wnd can be useful to diagnose TCP performance where receiver window becomes the bottleneck. rehash reports the PLB and timeout triggered rehash attempts by the TCP connection. Signed-off-by: Mubashir Adnan Qureshi Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index c9abe86eda5f..879eeb0a084b 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -284,6 +284,11 @@ struct tcp_info { __u32 tcpi_snd_wnd; /* peer's advertised receive window after * scaling (bytes) */ + __u32 tcpi_rcv_wnd; /* local advertised receive window after + * scaling (bytes) + */ + + __u32 tcpi_rehash; /* PLB or timeout triggered rehash attempts */ }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ -- cgit v1.2.3 From 58ba426388d9fe56aa638f555b01d6e63cada88c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 27 Oct 2022 17:10:14 -0400 Subject: net/packet: add PACKET_FANOUT_FLAG_IGNORE_OUTGOING Extend packet socket option PACKET_IGNORE_OUTGOING to fanout groups. The socket option sets ptype.ignore_outgoing, which makes dev_queue_xmit_nit skip the socket. When the socket joins a fanout group, the option is not reflected in the struct ptype of the group. dev_queue_xmit_nit only tests the fanout ptype, so the flag is ignored once a socket joins a fanout group. Inheriting the option from a socket would change established behavior. Different sockets in the group can set different flags, and can also change them at runtime. Testing in packet_rcv_fanout defeats the purpose of the original patch, which is to avoid skb_clone in dev_queue_xmit_nit (esp. for MSG_ZEROCOPY packets). Instead, introduce a new fanout group flag with the same behavior. Tested with https://github.com/wdebruij/kerneltools/blob/master/tests/test_psock_fanout_ignore_outgoing.c Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221027211014.3581513-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_packet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index c07caf7b40db..a8516b3594a4 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -70,6 +70,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_EBPF 7 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_UNIQUEID 0x2000 +#define PACKET_FANOUT_FLAG_IGNORE_OUTGOING 0x4000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 struct tpacket_stats { -- cgit v1.2.3 From 738136a0e3757a8534df3ad97d6ff6d7f429f6c1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Oct 2022 14:25:53 -0700 Subject: netlink: split up copies in the ack construction Clean up the use of unsafe_memcpy() by adding a flexible array at the end of netlink message header and splitting up the header and data copies. Reviewed-by: Kees Cook Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index e2ae82e3f9f7..5da0da59bf01 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -48,6 +48,7 @@ struct sockaddr_nl { * @nlmsg_flags: Additional flags * @nlmsg_seq: Sequence number * @nlmsg_pid: Sending process port ID + * @nlmsg_data: Message payload */ struct nlmsghdr { __u32 nlmsg_len; @@ -55,6 +56,7 @@ struct nlmsghdr { __u16 nlmsg_flags; __u32 nlmsg_seq; __u32 nlmsg_pid; + __u8 nlmsg_data[]; }; /* Flags values */ -- cgit v1.2.3 From 6dd07781b4cdd38103c81ddcc88fa4e8a31ebf71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 19 Oct 2022 12:33:39 +0300 Subject: serial: Convert serial_rs485 to kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert struct serial_rs485 comments to kernel doc format and include it into documentation. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Reviewed-by: Bagas Sanjaya Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221019093343.9546-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial.h | 55 +++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 19 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h index cea06924b295..53bc1af67a41 100644 --- a/include/uapi/linux/serial.h +++ b/include/uapi/linux/serial.h @@ -107,33 +107,50 @@ struct serial_icounter_struct { int reserved[9]; }; -/* +/** + * struct serial_rs485 - serial interface for controlling RS485 settings. + * @flags: RS485 feature flags. + * @delay_rts_before_send: Delay before send (milliseconds). + * @delay_rts_after_send: Delay after send (milliseconds). + * @addr_recv: Receive filter for RS485 addressing mode + * (used only when %SER_RS485_ADDR_RECV is set). + * @addr_dest: Destination address for RS485 addressing mode + * (used only when %SER_RS485_ADDR_DEST is set). + * @padding0: Padding (set to zero). + * @padding1: Padding (set to zero). + * @padding: Deprecated, use @padding0 and @padding1 instead. + * Do not use with @addr_recv and @addr_dest (due to + * overlap). + * * Serial interface for controlling RS485 settings on chips with suitable * support. Set with TIOCSRS485 and get with TIOCGRS485 if supported by your * platform. The set function returns the new state, with any unsupported bits * reverted appropriately. + * + * The flag bits are: + * + * * %SER_RS485_ENABLED - RS485 enabled. + * * %SER_RS485_RTS_ON_SEND - Logical level for RTS pin when sending. + * * %SER_RS485_RTS_AFTER_SEND - Logical level for RTS pin after sent. + * * %SER_RS485_RX_DURING_TX - Full-duplex RS485 line. + * * %SER_RS485_TERMINATE_BUS - Enable bus termination (if supported). + * * %SER_RS485_ADDRB - Enable RS485 addressing mode. + * * %SER_RS485_ADDR_RECV - Receive address filter (enables @addr_recv). Requires %SER_RS485_ADDRB. + * * %SER_RS485_ADDR_DEST - Destination address (enables @addr_dest). Requires %SER_RS485_ADDRB. */ - struct serial_rs485 { - __u32 flags; /* RS485 feature flags */ -#define SER_RS485_ENABLED (1 << 0) /* If enabled */ -#define SER_RS485_RTS_ON_SEND (1 << 1) /* Logical level for - RTS pin when - sending */ -#define SER_RS485_RTS_AFTER_SEND (1 << 2) /* Logical level for - RTS pin after sent*/ + __u32 flags; +#define SER_RS485_ENABLED (1 << 0) +#define SER_RS485_RTS_ON_SEND (1 << 1) +#define SER_RS485_RTS_AFTER_SEND (1 << 2) #define SER_RS485_RX_DURING_TX (1 << 4) -#define SER_RS485_TERMINATE_BUS (1 << 5) /* Enable bus - termination - (if supported) */ - -/* RS-485 addressing mode */ -#define SER_RS485_ADDRB (1 << 6) /* Enable addressing mode */ -#define SER_RS485_ADDR_RECV (1 << 7) /* Receive address filter */ -#define SER_RS485_ADDR_DEST (1 << 8) /* Destination address */ +#define SER_RS485_TERMINATE_BUS (1 << 5) +#define SER_RS485_ADDRB (1 << 6) +#define SER_RS485_ADDR_RECV (1 << 7) +#define SER_RS485_ADDR_DEST (1 << 8) - __u32 delay_rts_before_send; /* Delay before send (milliseconds) */ - __u32 delay_rts_after_send; /* Delay after send (milliseconds) */ + __u32 delay_rts_before_send; + __u32 delay_rts_after_send; /* The fields below are defined by flags */ union { -- cgit v1.2.3 From ec32c0c42d0a7208571c4c77f140bffaa4e5e304 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Tue, 1 Nov 2022 10:48:29 +0100 Subject: net: dcb: add new pcp selector to app object Add new PCP selector for the 8021Qaz APP managed object. As the PCP selector is not part of the 8021Qaz standard, a new non-std extension attribute DCB_ATTR_DCB_APP has been introduced. Also two helper functions to translate between selector and app attribute type has been added. The new selector has been given a value of 255, to minimize the risk of future overlap of std- and non-std attributes. The new DCB_ATTR_DCB_APP is sent alongside the ieee std attribute in the app table. This means that the dcb_app struct can now both contain std- and non-std app attributes. Currently there is no overlap between the selector values of the two attributes. The purpose of adding the PCP selector, is to be able to offload PCP-based queue classification to the 8021Q Priority Code Point table, see 6.9.3 of IEEE Std 802.1Q-2018. PCP and DEI is encoded in the protocol field as 8*dei+pcp, so that a mapping of PCP 2 and DEI 1 to priority 3 is encoded as {255, 10, 3}. Signed-off-by: Daniel Machon Reviewed-by: Petr Machata Signed-off-by: Paolo Abeni --- include/uapi/linux/dcbnl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h index a791a94013a6..dc7ef96207ca 100644 --- a/include/uapi/linux/dcbnl.h +++ b/include/uapi/linux/dcbnl.h @@ -218,6 +218,9 @@ struct cee_pfc { #define IEEE_8021QAZ_APP_SEL_ANY 4 #define IEEE_8021QAZ_APP_SEL_DSCP 5 +/* Non-std selector values */ +#define DCB_APP_SEL_PCP 255 + /* This structure contains the IEEE 802.1Qaz APP managed object. This * object is also used for the CEE std as well. * @@ -247,6 +250,8 @@ struct dcb_app { __u16 protocol; }; +#define IEEE_8021QAZ_APP_SEL_MAX 255 + /** * struct dcb_peer_app_info - APP feature information sent by the peer * @@ -425,6 +430,7 @@ enum ieee_attrs { enum ieee_attrs_app { DCB_ATTR_IEEE_APP_UNSPEC, DCB_ATTR_IEEE_APP, + DCB_ATTR_DCB_APP, __DCB_ATTR_IEEE_APP_MAX }; #define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1) -- cgit v1.2.3 From 6182d5875c330a5a611687caa05f47752455720c Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Tue, 1 Nov 2022 10:48:30 +0100 Subject: net: dcb: add new apptrust attribute Add new apptrust extension attributes to the 8021Qaz APP managed object. Two new attributes, DCB_ATTR_DCB_APP_TRUST_TABLE and DCB_ATTR_DCB_APP_TRUST, has been added. Trusted selectors are passed in the nested attribute DCB_ATTR_DCB_APP_TRUST, in order of precedence. The new attributes are meant to allow drivers, whose hw supports the notion of trust, to be able to set whether a particular app selector is trusted - and in which order. Signed-off-by: Daniel Machon Reviewed-by: Petr Machata Signed-off-by: Paolo Abeni --- include/uapi/linux/dcbnl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h index dc7ef96207ca..99047223ab26 100644 --- a/include/uapi/linux/dcbnl.h +++ b/include/uapi/linux/dcbnl.h @@ -410,6 +410,7 @@ enum dcbnl_attrs { * @DCB_ATTR_IEEE_PEER_ETS: peer ETS configuration - get only * @DCB_ATTR_IEEE_PEER_PFC: peer PFC configuration - get only * @DCB_ATTR_IEEE_PEER_APP: peer APP tlv - get only + * @DCB_ATTR_DCB_APP_TRUST_TABLE: selector trust table */ enum ieee_attrs { DCB_ATTR_IEEE_UNSPEC, @@ -423,6 +424,7 @@ enum ieee_attrs { DCB_ATTR_IEEE_QCN, DCB_ATTR_IEEE_QCN_STATS, DCB_ATTR_DCB_BUFFER, + DCB_ATTR_DCB_APP_TRUST_TABLE, __DCB_ATTR_IEEE_MAX }; #define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1) -- cgit v1.2.3 From a35ec8e38cdd1766f29924ca391a01de20163931 Mon Sep 17 00:00:00 2001 From: "Hans J. Schultz" Date: Tue, 1 Nov 2022 21:39:21 +0200 Subject: bridge: Add MAC Authentication Bypass (MAB) support Hosts that support 802.1X authentication are able to authenticate themselves by exchanging EAPOL frames with an authenticator (Ethernet bridge, in this case) and an authentication server. Access to the network is only granted by the authenticator to successfully authenticated hosts. The above is implemented in the bridge using the "locked" bridge port option. When enabled, link-local frames (e.g., EAPOL) can be locally received by the bridge, but all other frames are dropped unless the host is authenticated. That is, unless the user space control plane installed an FDB entry according to which the source address of the frame is located behind the locked ingress port. The entry can be dynamic, in which case learning needs to be enabled so that the entry will be refreshed by incoming traffic. There are deployments in which not all the devices connected to the authenticator (the bridge) support 802.1X. Such devices can include printers and cameras. One option to support such deployments is to unlock the bridge ports connecting these devices, but a slightly more secure option is to use MAB. When MAB is enabled, the MAC address of the connected device is used as the user name and password for the authentication. For MAB to work, the user space control plane needs to be notified about MAC addresses that are trying to gain access so that they will be compared against an allow list. This can be implemented via the regular learning process with the sole difference that learned FDB entries are installed with a new "locked" flag indicating that the entry cannot be used to authenticate the device. The flag cannot be set by user space, but user space can clear the flag by replacing the entry, thereby authenticating the device. Locked FDB entries implement the following semantics with regards to roaming, aging and forwarding: 1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports, in which case the "locked" flag is cleared. FDB entries cannot roam to locked ports regardless of MAB being enabled or not. Therefore, locked FDB entries are only created if an FDB entry with the given {MAC, VID} does not already exist. This behavior prevents unauthenticated devices from disrupting traffic destined to already authenticated devices. 2. Aging: Locked FDB entries age and refresh by incoming traffic like regular entries. 3. Forwarding: Locked FDB entries forward traffic like regular entries. If user space detects an unauthorized MAC behind a locked port and wishes to prevent traffic with this MAC DA from reaching the host, it can do so using tc or a different mechanism. Enable the above behavior using a new bridge port option called "mab". It can only be enabled on a bridge port that is both locked and has learning enabled. Locked FDB entries are flushed from the port once MAB is disabled. A new option is added because there are pure 802.1X deployments that are not interested in notifications about locked FDB entries. Signed-off-by: Hans J. Schultz Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 1 + include/uapi/linux/neighbour.h | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5e7a1041df3a..d92b3f79eba3 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -561,6 +561,7 @@ enum { IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, IFLA_BRPORT_LOCKED, + IFLA_BRPORT_MAB, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index a998bf761635..5e67a7eaf4a7 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -52,7 +52,8 @@ enum { #define NTF_STICKY (1 << 6) #define NTF_ROUTER (1 << 7) /* Extended flags under NDA_FLAGS_EXT: */ -#define NTF_EXT_MANAGED (1 << 0) +#define NTF_EXT_MANAGED (1 << 0) +#define NTF_EXT_LOCKED (1 << 1) /* * Neighbor Cache Entry States. @@ -86,6 +87,11 @@ enum { * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf * of a user space control plane, and automatically refreshed so that (if * possible) they remain in NUD_REACHABLE state. + * + * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the + * bridge in response to a host trying to communicate via a locked bridge port + * with MAB enabled. Their purpose is to notify user space that a host requires + * authentication. */ struct nda_cacheinfo { -- cgit v1.2.3 From dca56c3038c34a3e5acfe0aadb1f2bc9d724ae79 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Nov 2022 17:02:11 +0100 Subject: net: expose devlink port over rtnetlink Expose devlink port handle related to netdev over rtnetlink. Introduce a new nested IFLA attribute to carry the info. Call into devlink code to fill-up the nest with existing devlink attributes that are used over devlink netlink. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d92b3f79eba3..1021a7e47a86 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -372,6 +372,8 @@ enum { IFLA_TSO_MAX_SEGS, IFLA_ALLMULTI, /* Allmulti count: > 0 means acts ALLMULTI */ + IFLA_DEVLINK_PORT, + __IFLA_MAX }; -- cgit v1.2.3 From 7a4b3770d635d05f1d8a4a17ae4acab5b3d2bad1 Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Fri, 28 Oct 2022 10:35:50 +0800 Subject: media: v4l: Add definition for the Aspeed JPEG format This introduces support for the Aspeed JPEG format, where the new frame can refer to previous frame to reduce the amount of compressed data. The concept is similar to I/P frame of video compression. It will compare the new frame with previous one to decide which macroblock's data is changed, and only the changed macroblocks will be compressed. This Aspeed JPEG format is used by the video engine on Aspeed platforms, which is generally adapted for remote KVM. Signed-off-by: Jammy Huang Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 29da1f4b4578..f91260e79ddc 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -775,6 +775,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_HI240 v4l2_fourcc('H', 'I', '2', '4') /* BTTV 8-bit dithered RGB */ #define V4L2_PIX_FMT_QC08C v4l2_fourcc('Q', '0', '8', 'C') /* Qualcomm 8-bit compressed */ #define V4L2_PIX_FMT_QC10C v4l2_fourcc('Q', '1', '0', 'C') /* Qualcomm 10-bit compressed */ +#define V4L2_PIX_FMT_AJPG v4l2_fourcc('A', 'J', 'P', 'G') /* Aspeed JPEG */ /* 10bit raw packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */ #define V4L2_PIX_FMT_IPU3_SBGGR10 v4l2_fourcc('i', 'p', '3', 'b') /* IPU3 packed 10-bit BGGR bayer */ -- cgit v1.2.3 From 867d3b275c385e80eaf7c46ab08674ae0af47bf3 Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Fri, 28 Oct 2022 10:35:51 +0800 Subject: media: v4l2-ctrls: Reserve controls for ASPEED Reserve controls for ASPEED video family. Aspeed video engine contains a few features which improve video quality, reduce amount of compressed data, and etc. Hence, 16 controls are reserved for these aspeed proprietary features. Signed-off-by: Jammy Huang Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-controls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index b5e7d082b8ad..5c09451779cb 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -231,6 +231,12 @@ enum v4l2_colorfx { */ #define V4L2_CID_USER_DW100_BASE (V4L2_CID_USER_BASE + 0x1190) +/* + * The base for Aspeed driver controls. + * We reserve 16 controls for this driver. + */ +#define V4L2_CID_USER_ASPEED_BASE (V4L2_CID_USER_BASE + 0x11a0) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From d4b9fd006fae58d2fdc68a1d1000e0498d8fbe33 Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Fri, 28 Oct 2022 10:35:53 +0800 Subject: media: aspeed: Support aspeed mode to reduce compressed data aspeed supports differential jpeg format which only compress the parts which are changed. In this way, it reduces both the amount of data to be transferred by network and those to be decoded on the client side. 2 new ctrls are added: * Aspeed HQ Mode: to control aspeed's high quality(2-pass) compression mode This only works with yuv444 subsampling. * Aspeed HQ Quality: to control the quality of aspeed's HQ mode only useful if Aspeed HQ mode is enabled Aspeed JPEG Format requires an additional buffer, called bcd, to store the information about which macro block in the new frame is different from the previous one. To have bcd correctly working, we need to swap the buffers for src0/1 to make src1 refer to previous frame and src0 to the coming new frame. Signed-off-by: Jammy Huang Reported-by: kernel test robot Signed-off-by: Hans Verkuil [hverkuil: fix logging dma_addr_t, use %pad for that] --- include/uapi/linux/aspeed-video.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/uapi/linux/aspeed-video.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/aspeed-video.h b/include/uapi/linux/aspeed-video.h new file mode 100644 index 000000000000..6586a65548c4 --- /dev/null +++ b/include/uapi/linux/aspeed-video.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (C) 2021 ASPEED Technology Inc. + */ + +#ifndef _UAPI_LINUX_ASPEED_VIDEO_H +#define _UAPI_LINUX_ASPEED_VIDEO_H + +#include + +#define V4L2_CID_ASPEED_HQ_MODE (V4L2_CID_USER_ASPEED_BASE + 1) +#define V4L2_CID_ASPEED_HQ_JPEG_QUALITY (V4L2_CID_USER_ASPEED_BASE + 2) + +#endif /* _UAPI_LINUX_ASPEED_VIDEO_H */ -- cgit v1.2.3 From 9a0f830f80265bd1ef816e1541ac24bee80e9a3c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 4 Nov 2022 12:01:25 -0700 Subject: ethtool: linkstate: add a statistic for PHY down events The previous attempt to augment carrier_down (see Link) was not met with much enthusiasm so let's do the simple thing of exposing what some devices already maintain. Add a common ethtool statistic for link going down. Currently users have to maintain per-driver mapping to extract the right stat from the vendor-specific ethtool -S stats. carrier_down does not fit the bill because it counts a lot of software related false positives. Add the statistic to the extended link state API to steer vendors towards implementing all of it. Implement for bnxt and all Linux-controlled PHYs. mlx5 and (possibly) enic also have a counter for this but I leave the implementation to their maintainers. Link: https://lore.kernel.org/r/20220520004500.2250674-1-kuba@kernel.org Reviewed-by: Florian Fainelli Reviewed-by: Michael Chan Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20221104190125.684910-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/uapi/linux/ethtool_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index bb57084ac524..aaf7c6963d61 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -262,6 +262,7 @@ enum { ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ /* add new constants above here */ __ETHTOOL_A_LINKSTATE_CNT, -- cgit v1.2.3 From a21b06e7319129994f339ed47f512bbe57b77f5b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 6 Nov 2022 15:34:17 -0500 Subject: net: sched: add helper support in act_ct This patch is to add helper support in act_ct for OVS actions=ct(alg=xxx) offloading, which is corresponding to Commit cae3a2627520 ("openvswitch: Allow attaching helpers to ct action") in OVS kernel part. The difference is when adding TC actions family and proto cannot be got from the filter/match, other than helper name in tb[TCA_CT_HELPER_NAME], we also need to send the family in tb[TCA_CT_HELPER_FAMILY] and the proto in tb[TCA_CT_HELPER_PROTO] to kernel. Acked-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Signed-off-by: Paolo Abeni --- include/uapi/linux/tc_act/tc_ct.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_ct.h b/include/uapi/linux/tc_act/tc_ct.h index 5fb1d7ac1027..6c5200f0ed38 100644 --- a/include/uapi/linux/tc_act/tc_ct.h +++ b/include/uapi/linux/tc_act/tc_ct.h @@ -22,6 +22,9 @@ enum { TCA_CT_NAT_PORT_MIN, /* be16 */ TCA_CT_NAT_PORT_MAX, /* be16 */ TCA_CT_PAD, + TCA_CT_HELPER_NAME, /* string */ + TCA_CT_HELPER_FAMILY, /* u8 */ + TCA_CT_HELPER_PROTO, /* u8 */ __TCA_CT_MAX }; -- cgit v1.2.3 From defbab270d45e32b068e7e73c3567232d745c60f Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 27 Sep 2022 14:52:56 -0700 Subject: include/uapi/linux/swab: Fix potentially missing __always_inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bc27fb68aaad ("include/uapi/linux/byteorder, swab: force inlining of some byteswap operations") added __always_inline to swab functions and commit 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to userspace headers") added a definition of __always_inline for use in exported headers when the kernel's compiler.h is not available. However, since swab.h does not include stddef.h, if the header soup does not indirectly include it, the definition of __always_inline is missing, resulting in a compilation failure, which was observed compiling the perf tool using exported headers containing this commit: In file included from /usr/include/linux/byteorder/little_endian.h:12:0, from /usr/include/asm/byteorder.h:14, from tools/include/uapi/linux/perf_event.h:20, from perf.h:8, from builtin-bench.c:18: /usr/include/linux/swab.h:160:8: error: unknown type name `__always_inline' static __always_inline __u16 __swab16p(const __u16 *p) Fix this by replacing the inclusion of linux/compiler.h with linux/stddef.h to ensure that we pick up that definition if required, without relying on it's indirect inclusion. compiler.h is then included indirectly, via stddef.h. Fixes: 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to userspace headers") Signed-off-by: Matt Redfearn Signed-off-by: Florian Fainelli Signed-off-by: Arnd Bergmann Tested-by: Nathan Chancellor Reviewed-by: Petr Vaněk Signed-off-by: Arnd Bergmann --- include/uapi/linux/swab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 0723a9cce747..01717181339e 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -3,7 +3,7 @@ #define _UAPI_LINUX_SWAB_H #include -#include +#include #include #include -- cgit v1.2.3 From db205f7e1edc8d8f0880f0218d3a03b13fe94af3 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Wed, 21 Sep 2022 15:15:22 +0000 Subject: KVM: x86: Add a VALID_MASK for the MSR exit reason flags Add the mask KVM_MSR_EXIT_REASON_VALID_MASK for the MSR exit reason flags. This simplifies checks that validate these flags, and makes it easier to introduce new flags in the future. No functional change intended. Signed-off-by: Aaron Lewis Message-Id: <20220921151525.904162-3-aaronlewis@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0d5d4419139a..7fea12369245 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -485,6 +485,9 @@ struct kvm_run { #define KVM_MSR_EXIT_REASON_INVAL (1 << 0) #define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) #define KVM_MSR_EXIT_REASON_FILTER (1 << 2) +#define KVM_MSR_EXIT_REASON_VALID_MASK (KVM_MSR_EXIT_REASON_INVAL | \ + KVM_MSR_EXIT_REASON_UNKNOWN | \ + KVM_MSR_EXIT_REASON_FILTER) __u32 reason; /* kernel -> user */ __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ -- cgit v1.2.3 From 86bdf3ebcfe1ded055282536fecce13001874740 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:10 +0800 Subject: KVM: Support dirty ring in conjunction with bitmap ARM64 needs to dirty memory outside of a VCPU context when VGIC/ITS is enabled. It's conflicting with that ring-based dirty page tracking always requires a running VCPU context. Introduce a new flavor of dirty ring that requires the use of both VCPU dirty rings and a dirty bitmap. The expectation is that for non-VCPU sources of dirty memory (such as the VGIC/ITS on arm64), KVM writes to the dirty bitmap. Userspace should scan the dirty bitmap before migrating the VM to the target. Use an additional capability to advertise this behavior. The newly added capability (KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP) can't be enabled before KVM_CAP_DIRTY_LOG_RING_ACQ_REL on ARM64. In this way, the newly added capability is treated as an extension of KVM_CAP_DIRTY_LOG_RING_ACQ_REL. Suggested-by: Marc Zyngier Suggested-by: Peter Xu Co-developed-by: Oliver Upton Signed-off-by: Oliver Upton Signed-off-by: Gavin Shan Acked-by: Peter Xu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-4-gshan@redhat.com --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0d5d4419139a..c87b5882d7ae 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1178,6 +1178,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 +#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 224 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 9bb053490f1a5a0914eb9f7b4116a0e4a95d4f8e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 7 Nov 2022 15:04:18 -0800 Subject: bpf: Add hwtstamp field for the sockops prog The bpf-tc prog has already been able to access the skb_hwtstamps(skb)->hwtstamp. This patch extends the same hwtstamp access to the sockops prog. In sockops, the skb is also available to the bpf prog during the BPF_SOCK_OPS_PARSE_HDR_OPT_CB event. There is a use case that the hwtstamp will be useful to the sockops prog to better measure the one-way-delay when the sender has put the tx timestamp in the tcp header option. Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221107230420.4192307-2-martin.lau@linux.dev --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 94659f6b3395..fb4c911d2a03 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6445,6 +6445,7 @@ struct bpf_sock_ops { * the outgoing header has not * been written yet. */ + __u64 skb_hwtstamp; }; /* Definitions for bpf_sock_ops_cb_flags */ -- cgit v1.2.3 From dc901d98b1fe6e52ab81cd3e0879379168e06daa Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 10 Nov 2022 17:27:15 -0800 Subject: dmaengine: idxd: Fix crc_val field for completion record The crc_val in the completion record should be 64 bits and not 32 bits. Fixes: 4ac823e9cd85 ("dmaengine: idxd: fix delta_rec and crc size field for completion record") Reported-by: Nirav N Shah Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20221111012715.2031481-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 2b9e7feba3f3..1d553bedbdb5 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -295,7 +295,7 @@ struct dsa_completion_record { }; uint32_t delta_rec_size; - uint32_t crc_val; + uint64_t crc_val; /* DIF check & strip */ struct { -- cgit v1.2.3 From 487d828d751d90cf9ca594f45b02dd0e0d712b64 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 26 Sep 2022 14:57:11 -0700 Subject: cxl/doe: Request exclusive DOE access The PCIE Data Object Exchange (DOE) mailbox is a protocol run over configuration cycles. It assumes one initiator at a time. While the kernel has control of the mailbox user space writes could interfere with the kernel access. Mark DOE mailbox config space exclusive when iterated by the CXL driver. Signed-off-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20220926215711.2893286-3-ira.weiny@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 57b8e2ffb1dd..82a03ea954af 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1119,6 +1119,7 @@ #define PCI_DOE_STATUS_DATA_OBJECT_READY 0x80000000 /* Data Object Ready */ #define PCI_DOE_WRITE 0x10 /* DOE Write Data Mailbox Register */ #define PCI_DOE_READ 0x14 /* DOE Read Data Mailbox Register */ +#define PCI_DOE_CAP_SIZEOF 0x18 /* Size of DOE register block */ /* DOE Data Object - note not actually registers */ #define PCI_DOE_DATA_OBJECT_HEADER_1_VID 0x0000ffff -- cgit v1.2.3 From 4e016f969529f2aec0545e90119e7eb3cb124c46 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 6 Nov 2022 19:46:18 +0200 Subject: vfio: Add an option to get migration data size Add an option to get migration data size by introducing a new migration feature named VFIO_DEVICE_FEATURE_MIG_DATA_SIZE. Upon VFIO_DEVICE_FEATURE_GET the estimated data length that will be required to complete STOP_COPY is returned. This option may better enable user space to consider before moving to STOP_COPY whether it can meet the downtime SLA based on the returned data. The patch also includes the implementation for mlx5 and hisi for this new option to make it feature complete for the existing drivers in this area. Signed-off-by: Yishai Hadas Reviewed-by: Jason Gunthorpe Reviewed-by: Longfang Liu Link: https://lore.kernel.org/r/20221106174630.25909-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index d7d8e0922376..3e45dbaf190e 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1128,6 +1128,19 @@ struct vfio_device_feature_dma_logging_report { #define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 +/* + * Upon VFIO_DEVICE_FEATURE_GET read back the estimated data length that will + * be required to complete stop copy. + * + * Note: Can be called on each device state. + */ + +struct vfio_device_feature_mig_data_size { + __aligned_u64 stop_copy_length; +}; + +#define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9 + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From f0c5941ff5b255413d31425bb327c2aec3625673 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 15 Nov 2022 00:45:25 +0530 Subject: bpf: Support bpf_list_head in map values Add the support on the map side to parse, recognize, verify, and build metadata table for a new special field of the type struct bpf_list_head. To parameterize the bpf_list_head for a certain value type and the list_node member it will accept in that value type, we use BTF declaration tags. The definition of bpf_list_head in a map value will be done as follows: struct foo { struct bpf_list_node node; int data; }; struct map_value { struct bpf_list_head head __contains(foo, node); }; Then, the bpf_list_head only allows adding to the list 'head' using the bpf_list_node 'node' for the type struct foo. The 'contains' annotation is a BTF declaration tag composed of four parts, "contains:name:node" where the name is then used to look up the type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during the lookup. The node defines name of the member in this type that has the type struct bpf_list_node, which is actually used for linking into the linked list. For now, 'kind' part is hardcoded as struct. This allows building intrusive linked lists in BPF, using container_of to obtain pointer to entry, while being completely type safe from the perspective of the verifier. The verifier knows exactly the type of the nodes, and knows that list helpers return that type at some fixed offset where the bpf_list_node member used for this list exists. The verifier also uses this information to disallow adding types that are not accepted by a certain list. For now, no elements can be added to such lists. Support for that is coming in future patches, hence draining and freeing items is done with a TODO that will be resolved in a future patch. Note that the bpf_list_head_free function moves the list out to a local variable under the lock and releases it, doing the actual draining of the list items outside the lock. While this helps with not holding the lock for too long pessimizing other concurrent list operations, it is also necessary for deadlock prevention: unless every function called in the critical section would be notrace, a fentry/fexit program could attach and call bpf_map_update_elem again on the map, leading to the same lock being acquired if the key matches and lead to a deadlock. While this requires some special effort on part of the BPF programmer to trigger and is highly unlikely to occur in practice, it is always better if we can avoid such a condition. While notrace would prevent this, doing the draining outside the lock has advantages of its own, hence it is used to also fix the deadlock related problem. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fb4c911d2a03..6580448e9f77 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6888,6 +6888,16 @@ struct bpf_dynptr { __u64 :64; } __attribute__((aligned(8))); +struct bpf_list_head { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + +struct bpf_list_node { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. -- cgit v1.2.3 From 8daa8fde3fc3f069ff0b5c87079a5c1df7743113 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 14 Oct 2022 23:45:59 +0200 Subject: netfilter: nf_tables: Introduce NFT_MSG_GETRULE_RESET Analogous to NFT_MSG_GETOBJ_RESET, but for rules: Reset stateful expressions like counters or quotas. The latter two are the only consumers, adjust their 'dump' callbacks to respect the parameter introduced earlier. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index e4b739d57480..cfa844da1ce6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -97,6 +97,7 @@ enum nft_verdicts { * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes) * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes) * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes) + * @NFT_MSG_GETRULE_RESET: get rules and reset stateful expressions (enum nft_obj_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -124,6 +125,7 @@ enum nf_tables_msg_types { NFT_MSG_NEWFLOWTABLE, NFT_MSG_GETFLOWTABLE, NFT_MSG_DELFLOWTABLE, + NFT_MSG_GETRULE_RESET, NFT_MSG_MAX, }; -- cgit v1.2.3 From 32637e33003f36e75e9147788cc0e2f21706ef99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 8 Nov 2022 15:06:00 +0100 Subject: bpf: Expand map key argument of bpf_redirect_map to u64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For queueing packets in XDP we want to add a new redirect map type with support for 64-bit indexes. To prepare fore this, expand the width of the 'key' argument to the bpf_redirect_map() helper. Since BPF registers are always 64-bit, this should be safe to do after the fact. Acked-by: Song Liu Reviewed-by: Stanislav Fomichev Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20221108140601.149971-3-toke@redhat.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6580448e9f77..ab86145df760 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2647,7 +2647,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain -- cgit v1.2.3 From 6c8c1406a6d6a3f2e61ac590f5c0994231bc6be7 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Wed, 16 Nov 2022 14:38:19 -0800 Subject: virt: Add TDX guest driver TDX guest driver exposes IOCTL interfaces to service TDX guest user-specific requests. Currently, it is only used to allow the user to get the TDREPORT to support TDX attestation. Details about the TDX attestation process are documented in Documentation/x86/tdx.rst, and the IOCTL details are documented in Documentation/virt/coco/tdx-guest.rst. Operations like getting TDREPORT involves sending a blob of data as input and getting another blob of data as output. It was considered to use a sysfs interface for this, but it doesn't fit well into the standard sysfs model for configuring values. It would be possible to do read/write on files, but it would need multiple file descriptors, which would be somewhat messy. IOCTLs seem to be the best fitting and simplest model for this use case. The AMD sev-guest driver also uses the IOCTL interface to support attestation. [Bagas Sanjaya: Ack is for documentation portion] Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Dave Hansen Reviewed-by: Bagas Sanjaya Reviewed-by: Tony Luck Reviewed-by: Mika Westerberg Acked-by: Kai Huang Acked-by: Kirill A. Shutemov Acked-by: Wander Lairson Costa Link: https://lore.kernel.org/all/20221116223820.819090-3-sathyanarayanan.kuppuswamy%40linux.intel.com --- include/uapi/linux/tdx-guest.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 include/uapi/linux/tdx-guest.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tdx-guest.h b/include/uapi/linux/tdx-guest.h new file mode 100644 index 000000000000..a6a2098c08ff --- /dev/null +++ b/include/uapi/linux/tdx-guest.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Userspace interface for TDX guest driver + * + * Copyright (C) 2022 Intel Corporation + */ + +#ifndef _UAPI_LINUX_TDX_GUEST_H_ +#define _UAPI_LINUX_TDX_GUEST_H_ + +#include +#include + +/* Length of the REPORTDATA used in TDG.MR.REPORT TDCALL */ +#define TDX_REPORTDATA_LEN 64 + +/* Length of TDREPORT used in TDG.MR.REPORT TDCALL */ +#define TDX_REPORT_LEN 1024 + +/** + * struct tdx_report_req - Request struct for TDX_CMD_GET_REPORT0 IOCTL. + * + * @reportdata: User buffer with REPORTDATA to be included into TDREPORT. + * Typically it can be some nonce provided by attestation + * service, so the generated TDREPORT can be uniquely verified. + * @tdreport: User buffer to store TDREPORT output from TDCALL[TDG.MR.REPORT]. + */ +struct tdx_report_req { + __u8 reportdata[TDX_REPORTDATA_LEN]; + __u8 tdreport[TDX_REPORT_LEN]; +}; + +/* + * TDX_CMD_GET_REPORT0 - Get TDREPORT0 (a.k.a. TDREPORT subtype 0) using + * TDCALL[TDG.MR.REPORT] + * + * Return 0 on success, -EIO on TDCALL execution failure, and + * standard errno on other general error cases. + */ +#define TDX_CMD_GET_REPORT0 _IOWR('T', 1, struct tdx_report_req) + +#endif /* _UAPI_LINUX_TDX_GUEST_H_ */ -- cgit v1.2.3 From 8acbca3a92b859e3dfe0538254acd5bd5b4632b1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Sep 2022 13:56:59 +0200 Subject: headers: Remove some left-over license text in include/uapi/linux/hsi/ Remove some left-over from commit e2be04c7f995 ("License cleanup: add SPDX license identifier to uapi header files with a license") When the SPDX-License-Identifier tag has been added, the corresponding license text has not been removed. Signed-off-by: Christophe JAILLET Acked-by: Kai Vehmanen Acked-by: Peter Ujfalusi Signed-off-by: Sebastian Reichel --- include/uapi/linux/hsi/cs-protocol.h | 14 -------------- include/uapi/linux/hsi/hsi_char.h | 14 -------------- 2 files changed, 28 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hsi/cs-protocol.h b/include/uapi/linux/hsi/cs-protocol.h index c7f6e7672cb5..07c3bfb67463 100644 --- a/include/uapi/linux/hsi/cs-protocol.h +++ b/include/uapi/linux/hsi/cs-protocol.h @@ -6,20 +6,6 @@ * * Contact: Kai Vehmanen * Original author: Peter Ujfalusi - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA */ #ifndef _CS_PROTOCOL_H diff --git a/include/uapi/linux/hsi/hsi_char.h b/include/uapi/linux/hsi/hsi_char.h index 91623b0398b1..5ef72f0daf94 100644 --- a/include/uapi/linux/hsi/hsi_char.h +++ b/include/uapi/linux/hsi/hsi_char.h @@ -5,20 +5,6 @@ * Copyright (C) 2010 Nokia Corporation. All rights reserved. * * Contact: Andras Domokos - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA */ #ifndef __HSI_CHAR_H -- cgit v1.2.3 From f20a0a0519f35a3a34236ed2d38b067c5cb22b9f Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Thu, 17 Nov 2022 02:18:28 +0900 Subject: ethtool: doc: clarify what drivers can implement in their get_drvinfo() Many of the drivers which implement ethtool_ops::get_drvinfo() will prints the .driver, .version or .bus_info of struct ethtool_drvinfo. To have a glance of current state, do: $ git grep -W "get_drvinfo(struct" Printing in those three fields is useless because: - since [1], the driver version should be the kernel version (at least for upstream drivers). Arguably, out of tree drivers might still want to set a custom version, but out of tree is not our focus. - since [2], the core is able to provide default values for .driver and .bus_info. In summary, drivers may provide .fw_version and .erom_version, the rest is expected to be done by the core. In struct ethtool_ops doc from linux/ethtool: rephrase field get_drvinfo() doc to discourage developers from implementing this callback. In struct ethtool_drvinfo doc from uapi/linux/ethtool.h: remove the paragraph mentioning what drivers should do. Rationale: no need to repeat what is already written in struct ethtool_ops doc. But add a note that .fw_version and .erom_version are driver defined. Also update the dummy driver and simply remove the callback in order not to confuse the newcomers: most of the drivers will not need this callback function any more. [1] commit 6a7e25c7fb48 ("net/core: Replace driver version to be kernel version") Link: https://git.kernel.org/torvalds/linux/c/6a7e25c7fb48 [2] commit edaf5df22cb8 ("ethtool: ethtool_get_drvinfo: populate drvinfo fields even if callback exits") Link: https://git.kernel.org/netdev/net-next/c/edaf5df22cb8 Reviewed-by: Leon Romanovsky Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/r/20221116171828.4093-1-mailhol.vincent@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index f341de2ae612..58e587ba0450 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -159,8 +159,10 @@ static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep) * in its bus driver structure (e.g. pci_driver::name). Must * not be an empty string. * @version: Driver version string; may be an empty string - * @fw_version: Firmware version string; may be an empty string - * @erom_version: Expansion ROM version string; may be an empty string + * @fw_version: Firmware version string; driver defined; may be an + * empty string + * @erom_version: Expansion ROM version string; driver defined; may be + * an empty string * @bus_info: Device bus address. This should match the dev_name() * string for the underlying bus device, if there is one. May be * an empty string. @@ -179,10 +181,6 @@ static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep) * * Users can use the %ETHTOOL_GSSET_INFO command to get the number of * strings in any string set (from Linux 2.6.34). - * - * Drivers should set at most @driver, @version, @fw_version and - * @bus_info in their get_drvinfo() implementation. The ethtool - * core fills in the other fields using other driver operations. */ struct ethtool_drvinfo { __u32 cmd; -- cgit v1.2.3 From cd502236835b678738810ecd501c85a3a7a11150 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Tue, 15 Nov 2022 11:48:15 +0100 Subject: devlink: Introduce new attribute 'tx_priority' to devlink-rate To fully utilize offload capabilities of Intel 100G card QoS capabilities new attribute 'tx_priority' needs to be introduced. This attribute allows for usage of strict priority arbiter among siblings. This arbitration scheme attempts to schedule nodes based on their priority as long as the nodes remain within their bandwidth limit. Introduce new attribute in devlink-rate that will allow for configuration of strict priority. New attribute is optional. Signed-off-by: Michal Wilczynski Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 2f24b53a87a5..1a9214d35ef5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -607,6 +607,7 @@ enum devlink_attr { DEVLINK_ATTR_SELFTESTS, /* nested */ + DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 6e2d7e84fcfee62d70e62aa9e469d10b8b6a7dc7 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Tue, 15 Nov 2022 11:48:16 +0100 Subject: devlink: Introduce new attribute 'tx_weight' to devlink-rate To fully utilize offload capabilities of Intel 100G card QoS capabilities new attribute 'tx_weight' needs to be introduced. This attribute allows for usage of Weighted Fair Queuing arbitration scheme among siblings. This arbitration scheme can be used simultaneously with the strict priority. Introduce new attribute in devlink-rate that will allow for configuration of Weighted Fair Queueing. New attribute is optional. Signed-off-by: Michal Wilczynski Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 1a9214d35ef5..498d0d5d0957 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -608,6 +608,8 @@ enum devlink_attr { DEVLINK_ATTR_SELFTESTS, /* nested */ DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */ + DEVLINK_ATTR_RATE_TX_WEIGHT, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From c73a72f4cbb47672c8cc7f7d7aba52f1cb15baca Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 17 Nov 2022 19:39:03 -0800 Subject: netlink: remove the flex array from struct nlmsghdr I've added a flex array to struct nlmsghdr in commit 738136a0e375 ("netlink: split up copies in the ack construction") to allow accessing the data easily. It leads to warnings with clang, if user space wraps this structure into another struct and the flex array is not at the end of the container. Reviewed-by: Kees Cook Reviewed-by: David Ahern Link: https://lore.kernel.org/all/20221114023927.GA685@u2004-local/ Link: https://lore.kernel.org/r/20221118033903.1651026-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/netlink.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 5da0da59bf01..e2ae82e3f9f7 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -48,7 +48,6 @@ struct sockaddr_nl { * @nlmsg_flags: Additional flags * @nlmsg_seq: Sequence number * @nlmsg_pid: Sending process port ID - * @nlmsg_data: Message payload */ struct nlmsghdr { __u32 nlmsg_len; @@ -56,7 +55,6 @@ struct nlmsghdr { __u16 nlmsg_flags; __u32 nlmsg_seq; __u32 nlmsg_pid; - __u8 nlmsg_data[]; }; /* Flags values */ -- cgit v1.2.3 From e307e6698165ca6508ed42c69cb1be76c8eb6a3c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 27 Oct 2022 20:34:45 +0200 Subject: io_uring/net: introduce IORING_SEND_ZC_REPORT_USAGE flag It might be useful for applications to detect if a zero copy transfer with SEND[MSG]_ZC was actually possible or not. The application can fallback to plain SEND[MSG] in order to avoid the overhead of two cqes per request. Or it can generate a log message that could indicate to an administrator that no zero copy was possible and could explain degraded performance. Cc: stable@vger.kernel.org # 6.1 Link: https://lore.kernel.org/io-uring/fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com/T/#m2b0d9df94ce43b0e69e6c089bdff0ce6babbdfaa Signed-off-by: Stefan Metzmacher Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/8945b01756d902f5d5b0667f20b957ad3f742e5e.1666895626.git.metze@samba.org Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2df3225b562f..9d4c4078e8d0 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -296,10 +296,28 @@ enum io_uring_op { * * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in * the buf_index field. + * + * IORING_SEND_ZC_REPORT_USAGE + * If set, SEND[MSG]_ZC should report + * the zerocopy usage in cqe.res + * for the IORING_CQE_F_NOTIF cqe. + * 0 is reported if zerocopy was actually possible. + * IORING_NOTIF_USAGE_ZC_COPIED if data was copied + * (at least partially). */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) +#define IORING_SEND_ZC_REPORT_USAGE (1U << 3) + +/* + * cqe.res for IORING_CQE_F_NOTIF if + * IORING_SEND_ZC_REPORT_USAGE was requested + * + * It should be treated as a flag, all other + * bits of cqe.res should be treated as reserved! + */ +#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31) /* * accept flags stored in sqe->ioprio -- cgit v1.2.3 From caf1aeaffc3b09649a56769e559333ae2c4f1802 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 20 Nov 2022 10:10:53 -0700 Subject: eventpoll: add EPOLL_URING_WAKE poll wakeup flag We can have dependencies between epoll and io_uring. Consider an epoll context, identified by the epfd file descriptor, and an io_uring file descriptor identified by iofd. If we add iofd to the epfd context, and arm a multishot poll request for epfd with iofd, then the multishot poll request will repeatedly trigger and generate events until terminated by CQ ring overflow. This isn't a desired behavior. Add EPOLL_URING so that io_uring can pass it in as part of the poll wakeup key, and io_uring can check for that to detect a potential recursive invocation. Cc: stable@vger.kernel.org # 6.0 Signed-off-by: Jens Axboe --- include/uapi/linux/eventpoll.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 8a3432d0f0dc..e687658843b1 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -41,6 +41,12 @@ #define EPOLLMSG (__force __poll_t)0x00000400 #define EPOLLRDHUP (__force __poll_t)0x00002000 +/* + * Internal flag - wakeup generated by io_uring, used to detect recursion back + * into the io_uring poll handler. + */ +#define EPOLL_URING_WAKE ((__force __poll_t)(1U << 27)) + /* Set exclusive wakeup mode for the target file descriptor */ #define EPOLLEXCLUSIVE ((__force __poll_t)(1U << 28)) -- cgit v1.2.3 From 9f4211bf7f811b653aa6acfb9aea38222436a458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eray=20Or=C3=A7unus?= Date: Tue, 22 Nov 2022 17:46:55 -0800 Subject: HID: add mapping for camera access keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HUTRR72 added 3 new usage codes for keys that are supposed to enable, disable and toggle camera access. These are useful, considering many laptops today have key(s) for toggling access to camera. This patch adds new key definitions for KEY_CAMERA_ACCESS_ENABLE, KEY_CAMERA_ACCESS_DISABLE and KEY_CAMERA_ACCESS_TOGGLE. Additionally hid-debug is adjusted to recognize this new usage codes as well. Signed-off-by: Eray Orçunus Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20221029120311.11152-3-erayorcunus@gmail.com Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 7ad931a32970..022a520e31fc 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -614,6 +614,9 @@ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ #define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ +#define KEY_CAMERA_ACCESS_ENABLE 0x24b /* Enables programmatic access to camera devices. (HUTRR72) */ +#define KEY_CAMERA_ACCESS_DISABLE 0x24c /* Disables programmatic access to camera devices. (HUTRR72) */ +#define KEY_CAMERA_ACCESS_TOGGLE 0x24d /* Toggles the current state of the camera access control. (HUTRR72) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ -- cgit v1.2.3 From 4f8d37020e1fd0bf6ee9381ba918135ef3712efd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Oct 2022 14:25:21 +0200 Subject: fuse: add "expire only" mode to FUSE_NOTIFY_INVAL_ENTRY Add a flag to entry expiration that lets the filesystem expire a dentry without kicking it out from the cache immediately. This makes a difference for overmounted dentries, where plain invalidation would detach all submounts before dropping the dentry from the cache. If only expiry is set on the dentry, then any overmounts are left alone and until ->d_revalidate() is called. Note: ->d_revalidate() is not called for the case of following a submount, so invalidation will only be triggered for the non-overmounted case. The dentry could also be mounted in a different mount instance, in which case any submounts will still be detached. Suggested-by: Jakob Blomer Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 76ee8f9e024a..39cfb343faa8 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -197,6 +197,9 @@ * * 7.37 * - add FUSE_TMPFILE + * + * 7.38 + * - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry */ #ifndef _LINUX_FUSE_H @@ -232,7 +235,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 37 +#define FUSE_KERNEL_MINOR_VERSION 38 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -491,6 +494,12 @@ struct fuse_file_lock { */ #define FUSE_SETXATTR_ACL_KILL_SGID (1 << 0) +/** + * notify_inval_entry flags + * FUSE_EXPIRE_ONLY + */ +#define FUSE_EXPIRE_ONLY (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -919,7 +928,7 @@ struct fuse_notify_inval_inode_out { struct fuse_notify_inval_entry_out { uint64_t parent; uint32_t namelen; - uint32_t padding; + uint32_t flags; }; struct fuse_notify_delete_out { -- cgit v1.2.3 From 153524053bbb0d27bb2e0be36d1b46862e9ce74c Mon Sep 17 00:00:00 2001 From: Dharmendra Singh Date: Fri, 17 Jun 2022 12:40:27 +0530 Subject: fuse: allow non-extending parallel direct writes on the same file In general, as of now, in FUSE, direct writes on the same file are serialized over inode lock i.e we hold inode lock for the full duration of the write request. I could not find in fuse code and git history a comment which clearly explains why this exclusive lock is taken for direct writes. Following might be the reasons for acquiring an exclusive lock but not be limited to 1) Our guess is some USER space fuse implementations might be relying on this lock for serialization. 2) The lock protects against file read/write size races. 3) Ruling out any issues arising from partial write failures. This patch relaxes the exclusive lock for direct non-extending writes only. File size extending writes might not need the lock either, but we are not entirely sure if there is a risk to introduce any kind of regression. Furthermore, benchmarking with fio does not show a difference between patch versions that take on file size extension a) an exclusive lock and b) a shared lock. A possible example of an issue with i_size extending writes are write error cases. Some writes might succeed and others might fail for file system internal reasons - for example ENOSPACE. With parallel file size extending writes it _might_ be difficult to revert the action of the failing write, especially to restore the right i_size. With these changes, we allow non-extending parallel direct writes on the same file with the help of a flag called FOPEN_PARALLEL_DIRECT_WRITES. If this flag is set on the file (flag is passed from libfuse to fuse kernel as part of file open/create), we do not take exclusive lock anymore, but instead use a shared lock that allows non-extending writes to run in parallel. FUSE implementations which rely on this inode lock for serialization can continue to do so and serialized direct writes are still the default. Implementations that do not do write serialization need to be updated and need to set the FOPEN_PARALLEL_DIRECT_WRITES flag in their file open/create reply. On patch review there were concerns that network file systems (or vfs multiple mounts of the same file system) might have issues with parallel writes. We believe this is not the case, as this is just a local lock, which network file systems could not rely on anyway. I.e. this lock is just for local consistency. Signed-off-by: Dharmendra Singh Signed-off-by: Bernd Schubert Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 39cfb343faa8..e3c54109bae9 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -200,6 +200,7 @@ * * 7.38 * - add FUSE_EXPIRE_ONLY flag to fuse_notify_inval_entry + * - add FOPEN_PARALLEL_DIRECT_WRITES */ #ifndef _LINUX_FUSE_H @@ -307,6 +308,7 @@ struct fuse_file_lock { * FOPEN_CACHE_DIR: allow caching this directory * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) + * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -314,6 +316,7 @@ struct fuse_file_lock { #define FOPEN_CACHE_DIR (1 << 3) #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) +#define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) /** * INIT request/reply flags -- cgit v1.2.3 From fb491d5500a7ca551e49bc32d9b19d226023f68d Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:27 +0100 Subject: KVM: s390: pv: asynchronous destroy for reboot Until now, destroying a protected guest was an entirely synchronous operation that could potentially take a very long time, depending on the size of the guest, due to the time needed to clean up the address space from protected pages. This patch implements an asynchronous destroy mechanism, that allows a protected guest to reboot significantly faster than previously. This is achieved by clearing the pages of the old guest in background. In case of reboot, the new guest will be able to run in the same address space almost immediately. The old protected guest is then only destroyed when all of its memory has been destroyed or otherwise made non protected. Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl: KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for later asynchronous teardown. The current KVM VM will then continue immediately as non-protected. If a protected VM had already been set aside for asynchronous teardown, but without starting the teardown process, this call will fail. There can be at most one VM set aside at any time. Once it is set aside, the protected VM only exists in the context of the Ultravisor, it is not associated with the KVM VM anymore. Its protected CPUs have already been destroyed, but not its memory. This command can be issued again immediately after starting KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion. KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace from a separate thread. If a fatal signal is received (or if the process terminates naturally), the command will terminate immediately without completing. All protected VMs whose teardown was interrupted will be put in the need_cleanup list. The rest of the normal KVM teardown process will take care of properly cleaning up all remaining protected VMs, including the ones on the need_cleanup list. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Janosch Frank Reviewed-by: Steffen Eiden Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0d5d4419139a..b3701b23ca18 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1740,6 +1740,8 @@ enum pv_cmd_id { KVM_PV_UNSHARE_ALL, KVM_PV_INFO, KVM_PV_DUMP, + KVM_PV_ASYNC_CLEANUP_PREPARE, + KVM_PV_ASYNC_CLEANUP_PERFORM, }; struct kvm_pv_cmd { -- cgit v1.2.3 From 8c516b25d6e9c70e6d76627932b14b0ef03a82c4 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:29 +0100 Subject: KVM: s390: pv: add KVM_CAP_S390_PROTECTED_ASYNC_DISABLE Add KVM_CAP_S390_PROTECTED_ASYNC_DISABLE to signal that the KVM_PV_ASYNC_DISABLE and KVM_PV_ASYNC_DISABLE_PREPARE commands for the KVM_S390_PV_COMMAND ioctl are available. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Steffen Eiden Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20221111170632.77622-4-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-4-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b3701b23ca18..d3f86a280858 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1178,6 +1178,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 +#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 1dbb4f0235a450f22e518124cbf9b922802ce38f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 16 Nov 2022 18:29:56 +0200 Subject: virt: acrn: Mark the uuid field as unused After the commits for userspace (see Link tags below) the uuid field is not being used in the ACRN code. Update kernel to reflect these changes, i.e. do the following: - adding a comment explaining that it's not used anymore - replacing the specific type by a raw buffer - updating the example code accordingly The advertised field confused users and actually never been used. So the wrong part here is that kernel puts something which userspace never used and hence this may confuse a reader of this code. Note, that there is only a single tool that had been prepared a year ago for these forthcoming changes in the kernel. Link: https://github.com/projectacrn/acrn-hypervisor/commit/da0d24326ed6 Link: https://github.com/projectacrn/acrn-hypervisor/commit/bb0327e70097 Signed-off-by: Andy Shevchenko Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20221116162956.72658-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/acrn.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h index ccf47ed92500..7b714c1902eb 100644 --- a/include/uapi/linux/acrn.h +++ b/include/uapi/linux/acrn.h @@ -12,7 +12,6 @@ #define _UAPI_ACRN_H #include -#include #define ACRN_IO_REQUEST_MAX 16 @@ -186,7 +185,7 @@ struct acrn_ioreq_notify { * @reserved0: Reserved and must be 0 * @vcpu_num: Number of vCPU in the VM. Return from hypervisor. * @reserved1: Reserved and must be 0 - * @uuid: UUID of the VM. Pass to hypervisor directly. + * @uuid: Empty space never to be used again (used to be UUID of the VM) * @vm_flag: Flag of the VM creating. Pass to hypervisor directly. * @ioreq_buf: Service VM GPA of I/O request buffer. Pass to * hypervisor directly. @@ -198,7 +197,7 @@ struct acrn_vm_creation { __u16 reserved0; __u16 vcpu_num; __u16 reserved1; - guid_t uuid; + __u8 uuid[16]; __u64 vm_flag; __u64 ioreq_buf; __u64 cpu_affinity; -- cgit v1.2.3 From 72b43bde38de4aa05e6a7fa12d7965f48180deb6 Mon Sep 17 00:00:00 2001 From: Ji Rongfeng Date: Fri, 18 Nov 2022 16:18:18 +0800 Subject: bpf: Update bpf_{g,s}etsockopt() documentation * append missing optnames to the end * simplify bpf_getsockopt()'s doc Signed-off-by: Ji Rongfeng Link: https://lore.kernel.org/r/DU0P192MB15479B86200B1216EC90E162D6099@DU0P192MB1547.EURP192.PROD.OUTLOOK.COM Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ab86145df760..f89de51a45db 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2584,14 +2584,19 @@ union bpf_attr { * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, - * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**, + * **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, + * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, + * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, + * **TCP_BPF_RTO_MIN**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * * **IPPROTO_IPV6**, which supports the following *optname*\ s: + * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. * Return * 0 on success, or a negative error in case of failure. * @@ -2808,12 +2813,10 @@ union bpf_attr { * and **BPF_CGROUP_INET6_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. - * It supports the following *level*\ s: - * - * * **IPPROTO_TCP**, which supports *optname* - * **TCP_CONGESTION**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * It supports the same set of *optname*\ s that is supported by + * the **bpf_setsockopt**\ () helper. The exceptions are + * **TCP_BPF_*** is **bpf_setsockopt**\ () only and + * **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only. * Return * 0 on success, or a negative error in case of failure. * -- cgit v1.2.3 From 6508a50fe84f9858e8b59b53dce3847aaeeab744 Mon Sep 17 00:00:00 2001 From: Robert Schlabbach Date: Fri, 14 Jan 2022 07:43:48 +0000 Subject: media: dvb: add DVB-C2 and DVB-S2X parameter values Extend the DVB frontend parameter enums with additional values specified by the DVB-C2 (ETSI EN 302 769) and DVB-S2X (ETSI EN 302 307-2) standards to be ready for frontend drivers for such receivers. While most parameters will be "read-only" due to being autodetected by the receiver and only being reported back for informational purposes, the addition of SYS_DVBC2 to the delivery systems enum is required, because there are DVB-C2 capable receivers which are not capable of DVB-C/C2 autodetection and thus need this enum value to be explicitly instructed to search for a DVB-C2 signal. As for DVB-S2X, as that is an extension to DVB-S2, the same delivery system enum as for DVB-S2 can be used. Add the additional enum values and comments to the documentation. Link: https://lore.kernel.org/linux-media/trinity-1b7c5a66-85d4-4595-a690-0fde965d49b3-1642146228587@3c-app-gmx-bap69 Signed-off-by: Robert Schlabbach Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/frontend.h | 59 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index 4f9b4551c534..4fed9e316147 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -296,6 +296,22 @@ enum fe_spectral_inversion { * @FEC_3_5: Forward Error Correction Code 3/5 * @FEC_9_10: Forward Error Correction Code 9/10 * @FEC_2_5: Forward Error Correction Code 2/5 + * @FEC_1_3: Forward Error Correction Code 1/3 + * @FEC_1_4: Forward Error Correction Code 1/4 + * @FEC_5_9: Forward Error Correction Code 5/9 + * @FEC_7_9: Forward Error Correction Code 7/9 + * @FEC_8_15: Forward Error Correction Code 8/15 + * @FEC_11_15: Forward Error Correction Code 11/15 + * @FEC_13_18: Forward Error Correction Code 13/18 + * @FEC_9_20: Forward Error Correction Code 9/20 + * @FEC_11_20: Forward Error Correction Code 11/20 + * @FEC_23_36: Forward Error Correction Code 23/36 + * @FEC_25_36: Forward Error Correction Code 25/36 + * @FEC_13_45: Forward Error Correction Code 13/45 + * @FEC_26_45: Forward Error Correction Code 26/45 + * @FEC_28_45: Forward Error Correction Code 28/45 + * @FEC_32_45: Forward Error Correction Code 32/45 + * @FEC_77_90: Forward Error Correction Code 77/90 * * Please note that not all FEC types are supported by a given standard. */ @@ -313,6 +329,22 @@ enum fe_code_rate { FEC_3_5, FEC_9_10, FEC_2_5, + FEC_1_3, + FEC_1_4, + FEC_5_9, + FEC_7_9, + FEC_8_15, + FEC_11_15, + FEC_13_18, + FEC_9_20, + FEC_11_20, + FEC_23_36, + FEC_25_36, + FEC_13_45, + FEC_26_45, + FEC_28_45, + FEC_32_45, + FEC_77_90, }; /** @@ -331,6 +363,13 @@ enum fe_code_rate { * @APSK_32: 32-APSK modulation * @DQPSK: DQPSK modulation * @QAM_4_NR: 4-QAM-NR modulation + * @QAM-1024: 1024-QAM modulation + * @QAM-4096: 4096-QAM modulation + * @APSK_8_L: 8APSK-L modulation + * @APSK_16_L: 16APSK-L modulation + * @APSK_32_L: 32APSK-L modulation + * @APSK_64: 64APSK modulation + * @APSK_64_L: 64APSK-L modulation * * Please note that not all modulations are supported by a given standard. * @@ -350,6 +389,13 @@ enum fe_modulation { APSK_32, DQPSK, QAM_4_NR, + QAM_1024, + QAM_4096, + APSK_8_L, + APSK_16_L, + APSK_32_L, + APSK_64, + APSK_64_L, }; /** @@ -404,6 +450,7 @@ enum fe_transmit_mode { * @GUARD_INTERVAL_PN420: PN length 420 (1/4) * @GUARD_INTERVAL_PN595: PN length 595 (1/6) * @GUARD_INTERVAL_PN945: PN length 945 (1/9) + * @GUARD_INTERVAL_1_64: Guard interval 1/64 * * Please note that not all guard intervals are supported by a given standard. */ @@ -419,6 +466,7 @@ enum fe_guard_interval { GUARD_INTERVAL_PN420, GUARD_INTERVAL_PN595, GUARD_INTERVAL_PN945, + GUARD_INTERVAL_1_64, }; /** @@ -571,6 +619,9 @@ enum fe_pilot { * @ROLLOFF_20: Roloff factor: α=20% * @ROLLOFF_25: Roloff factor: α=25% * @ROLLOFF_AUTO: Auto-detect the roloff factor. + * @ROLLOFF_15: Rolloff factor: α=15% + * @ROLLOFF_10: Rolloff factor: α=10% + * @ROLLOFF_5: Rolloff factor: α=5% * * .. note: * @@ -581,6 +632,9 @@ enum fe_rolloff { ROLLOFF_20, ROLLOFF_25, ROLLOFF_AUTO, + ROLLOFF_15, + ROLLOFF_10, + ROLLOFF_5, }; /** @@ -594,6 +648,8 @@ enum fe_rolloff { * Cable TV: DVB-C following ITU-T J.83 Annex B spec (ClearQAM) * @SYS_DVBC_ANNEX_C: * Cable TV: DVB-C following ITU-T J.83 Annex C spec + * @SYS_DVBC2: + * Cable TV: DVB-C2 * @SYS_ISDBC: * Cable TV: ISDB-C (no drivers yet) * @SYS_DVBT: @@ -611,7 +667,7 @@ enum fe_rolloff { * @SYS_DVBS: * Satellite TV: DVB-S * @SYS_DVBS2: - * Satellite TV: DVB-S2 + * Satellite TV: DVB-S2 and DVB-S2X * @SYS_TURBO: * Satellite TV: DVB-S Turbo * @SYS_ISDBS: @@ -645,6 +701,7 @@ enum fe_delivery_system { SYS_DVBT2, SYS_TURBO, SYS_DVBC_ANNEX_C, + SYS_DVBC2, }; /* backward compatibility definitions for delivery systems */ -- cgit v1.2.3 From 5b8bb216e91a2f4f4d5b82739c0101b3922064e5 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Thu, 25 Aug 2022 02:38:29 +0100 Subject: media: add nv12_8l128 and nv12_10be_8l128 video format. add contiguous nv12 tiled format nv12_8l128 and nv12_10be_8l128 Signed-off-by: Ming Qian Reviewed-by: Nicolas Dufresne Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index e8c621ad8439..1befd181a4cc 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -654,6 +654,8 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_NV12_16L16 v4l2_fourcc('H', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ #define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 32x32 tiles */ #define V4L2_PIX_FMT_P010_4L4 v4l2_fourcc('T', '0', '1', '0') /* 12 Y/CbCr 4:2:0 10-bit 4x4 macroblocks */ +#define V4L2_PIX_FMT_NV12_8L128 v4l2_fourcc('A', 'T', '1', '2') /* Y/CbCr 4:2:0 8x128 tiles */ +#define V4L2_PIX_FMT_NV12_10BE_8L128 v4l2_fourcc_be('A', 'X', '1', '2') /* Y/CbCr 4:2:0 10-bit 8x128 tiles */ /* Tiled YUV formats, non contiguous planes */ #define V4L2_PIX_FMT_NV12MT v4l2_fourcc('T', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 64x32 tiles */ -- cgit v1.2.3 From 41e8f85a75fc60e1543e4903428a1b481b672a17 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 11 Nov 2022 09:04:06 -0800 Subject: f2fs: introduce F2FS_IOC_START_ATOMIC_REPLACE introduce a new ioctl to replace the whole content of a file atomically, which means it induces truncate and content update at the same time. We can start it with F2FS_IOC_START_ATOMIC_REPLACE and complete it with F2FS_IOC_COMMIT_ATOMIC_WRITE. Or abort it with F2FS_IOC_ABORT_ATOMIC_WRITE. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/uapi/linux/f2fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index 3121d127d5aa..955d440be104 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -42,6 +42,7 @@ struct f2fs_comp_option) #define F2FS_IOC_DECOMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 23) #define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24) +#define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25) /* * should be same as XFS_IOC_GOINGDOWN. -- cgit v1.2.3 From d8ba8ba4c801b794f47852a6f1821ea48f83b5d1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 27 Nov 2022 12:22:10 +0000 Subject: KVM: x86/xen: Allow XEN_RUNSTATE_UPDATE flag behaviour to be configured Closer inspection of the Xen code shows that we aren't supposed to be using the XEN_RUNSTATE_UPDATE flag unconditionally. It should be explicitly enabled by guests through the HYPERVISOR_vm_assist hypercall. If we randomly set the top bit of ->state_entry_time for a guest that hasn't asked for it and doesn't expect it, that could make the runtimes fail to add up and confuse the guest. Without the flag it's perfectly safe for a vCPU to read its own vcpu_runstate_info; just not for one vCPU to read *another's*. I briefly pondered adding a word for the whole set of VMASST_TYPE_* flags but the only one we care about for HVM guests is this, so it seemed a bit pointless. Signed-off-by: David Woodhouse Message-Id: <20221127122210.248427-3-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 88448397642c..64dfe9c07c87 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1271,6 +1271,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) struct kvm_xen_hvm_config { __u32 flags; @@ -1776,6 +1777,7 @@ struct kvm_xen_hvm_attr { union { __u8 long_mode; __u8 vector; + __u8 runstate_update_flag; struct { __u64 gfn; } shared_info; @@ -1816,6 +1818,8 @@ struct kvm_xen_hvm_attr { /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ #define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 #define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) -- cgit v1.2.3 From bff3d0534804452e19c097ae6b4eb4b4d846d67f Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Fri, 4 Nov 2022 18:18:35 +0100 Subject: netfilter: conntrack: add sctp DATA_SENT state SCTP conntrack currently assumes that the SCTP endpoints will probe secondary paths using HEARTBEAT before sending traffic. But, according to RFC 9260, SCTP endpoints can send any traffic on any of the confirmed paths after SCTP association is up. SCTP endpoints that sends INIT will confirm all peer addresses that upper layer configures, and the SCTP endpoint that receives COOKIE_ECHO will only confirm the address it sent the INIT_ACK to. So, we can have a situation where the INIT sender can start to use secondary paths without the need to send HEARTBEAT. This patch allows DATA/SACK packets to create new connection tracking entry. A new state has been added to indicate that a DATA/SACK chunk has been seen in the original direction - SCTP_CONNTRACK_DATA_SENT. State transitions mostly follows the HEARTBEAT_SENT, except on receiving HEARTBEAT/HEARTBEAT_ACK/DATA/SACK in the reply direction. State transitions in original direction: - DATA_SENT behaves similar to HEARTBEAT_SENT for all chunks, except that it remains in DATA_SENT on receving HEARTBEAT, HEARTBEAT_ACK/DATA/SACK chunks State transitions in reply direction: - DATA_SENT behaves similar to HEARTBEAT_SENT for all chunks, except that it moves to HEARTBEAT_ACKED on receiving HEARTBEAT/HEARTBEAT_ACK/DATA/SACK chunks Note: This patch still doesn't solve the problem when the SCTP endpoint decides to use primary paths for association establishment but uses a secondary path for association shutdown. We still have to depend on timeout for connections to expire in such a case. Signed-off-by: Sriram Yagnaraman Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_sctp.h | 1 + include/uapi/linux/netfilter/nfnetlink_cttimeout.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h index edc6ddab0de6..c742469afe21 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h @@ -16,6 +16,7 @@ enum sctp_conntrack { SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, SCTP_CONNTRACK_HEARTBEAT_SENT, SCTP_CONNTRACK_HEARTBEAT_ACKED, + SCTP_CONNTRACK_DATA_SENT, SCTP_CONNTRACK_MAX }; diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h index 6b20fb22717b..94e74034706d 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h @@ -95,6 +95,7 @@ enum ctattr_timeout_sctp { CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, CTA_TIMEOUT_SCTP_HEARTBEAT_SENT, CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, + CTA_TIMEOUT_SCTP_DATA_SENT, __CTA_TIMEOUT_SCTP_MAX }; #define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) -- cgit v1.2.3 From e9374524950512a1769f610a868fcdf89ea59b8e Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Tue, 22 Nov 2022 20:30:57 +0100 Subject: netfilter: ipset: Add support for new bitmask parameter Add a new parameter to complement the existing 'netmask' option. The main difference between netmask and bitmask is that bitmask takes any arbitrary ip address as input, it does not have to be a valid netmask. The name of the new parameter is 'bitmask'. This lets us mask out arbitrary bits in the ip address, for example: ipset create set1 hash:ip bitmask 255.128.255.0 ipset create set2 hash:ip,port family inet6 bitmask ffff::ff80 Signed-off-by: Vishwanath Pai Signed-off-by: Joshua Hunt Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/ipset/ip_set.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 79e5d68b87af..333807efd32b 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -85,6 +85,7 @@ enum { IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ IPSET_ATTR_MARK, /* 10 */ IPSET_ATTR_MARKMASK, /* 11 */ + IPSET_ATTR_BITMASK, /* 12 */ /* Reserve empty slots */ IPSET_ATTR_CADT_MAX = 16, /* Create-only specific attributes */ @@ -153,6 +154,7 @@ enum ipset_errno { IPSET_ERR_COMMENT, IPSET_ERR_INVALID_MARKMASK, IPSET_ERR_SKBINFO, + IPSET_ERR_BITMASK_NETMASK_EXCL, /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 4352, -- cgit v1.2.3 From 2ff4bed7fee72ba1abfcff5f11ae8f8e570353f2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:29 -0400 Subject: iommufd: File descriptor, context, kconfig and makefiles This is the basic infrastructure of a new miscdevice to hold the iommufd IOCTL API. It provides: - A miscdevice to create file descriptors to run the IOCTL interface over - A table based ioctl dispatch and centralized extendable pre-validation step - An xarray mapping userspace ID's to kernel objects. The design has multiple inter-related objects held within in a single IOMMUFD fd - A simple usage count to build a graph of object relations and protect against hostile userspace racing ioctls The only IOCTL provided in this patch is the generic 'destroy any object by handle' operation. Link: https://lore.kernel.org/r/6-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Signed-off-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 include/uapi/linux/iommufd.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h new file mode 100644 index 000000000000..37de92f0534b --- /dev/null +++ b/include/uapi/linux/iommufd.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef _UAPI_IOMMUFD_H +#define _UAPI_IOMMUFD_H + +#include +#include + +#define IOMMUFD_TYPE (';') + +/** + * DOC: General ioctl format + * + * The ioctl interface follows a general format to allow for extensibility. Each + * ioctl is passed in a structure pointer as the argument providing the size of + * the structure in the first u32. The kernel checks that any structure space + * beyond what it understands is 0. This allows userspace to use the backward + * compatible portion while consistently using the newer, larger, structures. + * + * ioctls use a standard meaning for common errnos: + * + * - ENOTTY: The IOCTL number itself is not supported at all + * - E2BIG: The IOCTL number is supported, but the provided structure has + * non-zero in a part the kernel does not understand. + * - EOPNOTSUPP: The IOCTL number is supported, and the structure is + * understood, however a known field has a value the kernel does not + * understand or support. + * - EINVAL: Everything about the IOCTL was understood, but a field is not + * correct. + * - ENOENT: An ID or IOVA provided does not exist. + * - ENOMEM: Out of memory. + * - EOVERFLOW: Mathematics overflowed. + * + * As well as additional errnos, within specific ioctls. + */ +enum { + IOMMUFD_CMD_BASE = 0x80, + IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, +}; + +/** + * struct iommu_destroy - ioctl(IOMMU_DESTROY) + * @size: sizeof(struct iommu_destroy) + * @id: iommufd object ID to destroy. Can by any destroyable object type. + * + * Destroy any object held within iommufd. + */ +struct iommu_destroy { + __u32 size; + __u32 id; +}; +#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY) + +#endif -- cgit v1.2.3 From aad37e71d5c4dc1d3c25734f0bcd51c324f94b5e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:34 -0400 Subject: iommufd: IOCTLs for the io_pagetable Connect the IOAS to its IOCTL interface. This exposes most of the functionality in the io_pagetable to userspace. This is intended to be the core of the generic interface that IOMMUFD will provide. Every IOMMU driver should be able to implement an iommu_domain that is compatible with this generic mechanism. It is also designed to be easy to use for simple non virtual machine monitor users, like DPDK: - Universal simple support for all IOMMUs (no PPC special path) - An IOVA allocator that considers the aperture and the allowed/reserved ranges - io_pagetable allows any number of iommu_domains to be connected to the IOAS - Automatic allocation and re-use of iommu_domains Along with room in the design to add non-generic features to cater to specific HW functionality. Link: https://lore.kernel.org/r/11-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 258 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 257 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 37de92f0534b..30cc5c5e2b34 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -37,12 +37,19 @@ enum { IOMMUFD_CMD_BASE = 0x80, IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, + IOMMUFD_CMD_IOAS_ALLOC, + IOMMUFD_CMD_IOAS_ALLOW_IOVAS, + IOMMUFD_CMD_IOAS_COPY, + IOMMUFD_CMD_IOAS_IOVA_RANGES, + IOMMUFD_CMD_IOAS_MAP, + IOMMUFD_CMD_IOAS_UNMAP, + IOMMUFD_CMD_OPTION, }; /** * struct iommu_destroy - ioctl(IOMMU_DESTROY) * @size: sizeof(struct iommu_destroy) - * @id: iommufd object ID to destroy. Can by any destroyable object type. + * @id: iommufd object ID to destroy. Can be any destroyable object type. * * Destroy any object held within iommufd. */ @@ -52,4 +59,253 @@ struct iommu_destroy { }; #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY) +/** + * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC) + * @size: sizeof(struct iommu_ioas_alloc) + * @flags: Must be 0 + * @out_ioas_id: Output IOAS ID for the allocated object + * + * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA) + * to memory mapping. + */ +struct iommu_ioas_alloc { + __u32 size; + __u32 flags; + __u32 out_ioas_id; +}; +#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC) + +/** + * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE) + * @start: First IOVA + * @last: Inclusive last IOVA + * + * An interval in IOVA space. + */ +struct iommu_iova_range { + __aligned_u64 start; + __aligned_u64 last; +}; + +/** + * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES) + * @size: sizeof(struct iommu_ioas_iova_ranges) + * @ioas_id: IOAS ID to read ranges from + * @num_iovas: Input/Output total number of ranges in the IOAS + * @__reserved: Must be 0 + * @allowed_iovas: Pointer to the output array of struct iommu_iova_range + * @out_iova_alignment: Minimum alignment required for mapping IOVA + * + * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges + * is not allowed. num_iovas will be set to the total number of iovas and + * the allowed_iovas[] will be filled in as space permits. + * + * The allowed ranges are dependent on the HW path the DMA operation takes, and + * can change during the lifetime of the IOAS. A fresh empty IOAS will have a + * full range, and each attached device will narrow the ranges based on that + * device's HW restrictions. Detaching a device can widen the ranges. Userspace + * should query ranges after every attach/detach to know what IOVAs are valid + * for mapping. + * + * On input num_iovas is the length of the allowed_iovas array. On output it is + * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set + * num_iovas to the required value if num_iovas is too small. In this case the + * caller should allocate a larger output array and re-issue the ioctl. + * + * out_iova_alignment returns the minimum IOVA alignment that can be given + * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy:: + * + * starting_iova % out_iova_alignment == 0 + * (starting_iova + length) % out_iova_alignment == 0 + * + * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot + * be higher than the system PAGE_SIZE. + */ +struct iommu_ioas_iova_ranges { + __u32 size; + __u32 ioas_id; + __u32 num_iovas; + __u32 __reserved; + __aligned_u64 allowed_iovas; + __aligned_u64 out_iova_alignment; +}; +#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES) + +/** + * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS) + * @size: sizeof(struct iommu_ioas_allow_iovas) + * @ioas_id: IOAS ID to allow IOVAs from + * @num_iovas: Input/Output total number of ranges in the IOAS + * @__reserved: Must be 0 + * @allowed_iovas: Pointer to array of struct iommu_iova_range + * + * Ensure a range of IOVAs are always available for allocation. If this call + * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges + * that are narrower than the ranges provided here. This call will fail if + * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges. + * + * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as + * devices are attached the IOVA will narrow based on the device restrictions. + * When an allowed range is specified any narrowing will be refused, ie device + * attachment can fail if the device requires limiting within the allowed range. + * + * Automatic IOVA allocation is also impacted by this call. MAP will only + * allocate within the allowed IOVAs if they are present. + * + * This call replaces the entire allowed list with the given list. + */ +struct iommu_ioas_allow_iovas { + __u32 size; + __u32 ioas_id; + __u32 num_iovas; + __u32 __reserved; + __aligned_u64 allowed_iovas; +}; +#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS) + +/** + * enum iommufd_ioas_map_flags - Flags for map and copy + * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate + * IOVA to place the mapping at + * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping + * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping + */ +enum iommufd_ioas_map_flags { + IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0, + IOMMU_IOAS_MAP_WRITEABLE = 1 << 1, + IOMMU_IOAS_MAP_READABLE = 1 << 2, +}; + +/** + * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP) + * @size: sizeof(struct iommu_ioas_map) + * @flags: Combination of enum iommufd_ioas_map_flags + * @ioas_id: IOAS ID to change the mapping of + * @__reserved: Must be 0 + * @user_va: Userspace pointer to start mapping from + * @length: Number of bytes to map + * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set + * then this must be provided as input. + * + * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the + * mapping will be established at iova, otherwise a suitable location based on + * the reserved and allowed lists will be automatically selected and returned in + * iova. + * + * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently + * be unused, existing IOVA cannot be replaced. + */ +struct iommu_ioas_map { + __u32 size; + __u32 flags; + __u32 ioas_id; + __u32 __reserved; + __aligned_u64 user_va; + __aligned_u64 length; + __aligned_u64 iova; +}; +#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) + +/** + * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) + * @size: sizeof(struct iommu_ioas_copy) + * @flags: Combination of enum iommufd_ioas_map_flags + * @dst_ioas_id: IOAS ID to change the mapping of + * @src_ioas_id: IOAS ID to copy from + * @length: Number of bytes to copy and map + * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is + * set then this must be provided as input. + * @src_iova: IOVA to start the copy + * + * Copy an already existing mapping from src_ioas_id and establish it in + * dst_ioas_id. The src iova/length must exactly match a range used with + * IOMMU_IOAS_MAP. + * + * This may be used to efficiently clone a subset of an IOAS to another, or as a + * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over + * establishing equivalent new mappings, as internal resources are shared, and + * the kernel will pin the user memory only once. + */ +struct iommu_ioas_copy { + __u32 size; + __u32 flags; + __u32 dst_ioas_id; + __u32 src_ioas_id; + __aligned_u64 length; + __aligned_u64 dst_iova; + __aligned_u64 src_iova; +}; +#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY) + +/** + * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP) + * @size: sizeof(struct iommu_ioas_unmap) + * @ioas_id: IOAS ID to change the mapping of + * @iova: IOVA to start the unmapping at + * @length: Number of bytes to unmap, and return back the bytes unmapped + * + * Unmap an IOVA range. The iova/length must be a superset of a previously + * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or + * truncating ranges is not allowed. The values 0 to U64_MAX will unmap + * everything. + */ +struct iommu_ioas_unmap { + __u32 size; + __u32 ioas_id; + __aligned_u64 iova; + __aligned_u64 length; +}; +#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP) + +/** + * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and + * ioctl(IOMMU_OPTION_HUGE_PAGES) + * @IOMMU_OPTION_RLIMIT_MODE: + * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege + * to invoke this. Value 0 (default) is user based accouting, 1 uses process + * based accounting. Global option, object_id must be 0 + * @IOMMU_OPTION_HUGE_PAGES: + * Value 1 (default) allows contiguous pages to be combined when generating + * iommu mappings. Value 0 disables combining, everything is mapped to + * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS + * option, the object_id must be the IOAS ID. + */ +enum iommufd_option { + IOMMU_OPTION_RLIMIT_MODE = 0, + IOMMU_OPTION_HUGE_PAGES = 1, +}; + +/** + * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and + * ioctl(IOMMU_OPTION_OP_GET) + * @IOMMU_OPTION_OP_SET: Set the option's value + * @IOMMU_OPTION_OP_GET: Get the option's value + */ +enum iommufd_option_ops { + IOMMU_OPTION_OP_SET = 0, + IOMMU_OPTION_OP_GET = 1, +}; + +/** + * struct iommu_option - iommu option multiplexer + * @size: sizeof(struct iommu_option) + * @option_id: One of enum iommufd_option + * @op: One of enum iommufd_option_ops + * @__reserved: Must be 0 + * @object_id: ID of the object if required + * @val64: Option value to set or value returned on get + * + * Change a simple option value. This multiplexor allows controlling options + * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET + * will return the current value. + */ +struct iommu_option { + __u32 size; + __u32 option_id; + __u16 op; + __u16 __reserved; + __u32 object_id; + __aligned_u64 val64; +}; +#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) #endif -- cgit v1.2.3 From d624d6652a65ad4f47a58b8651a1ec1163bb81d3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 29 Nov 2022 16:29:38 -0400 Subject: iommufd: vfio container FD ioctl compatibility iommufd can directly implement the /dev/vfio/vfio container IOCTLs by mapping them into io_pagetable operations. A userspace application can test against iommufd and confirm compatibility then simply make a small change to open /dev/iommu instead of /dev/vfio/vfio. For testing purposes /dev/vfio/vfio can be symlinked to /dev/iommu and then all applications will use the compatibility path with no code changes. A later series allows /dev/vfio/vfio to be directly provided by iommufd, which allows the rlimit mode to work the same as well. This series just provides the iommufd side of compatibility. Actually linking this to VFIO_SET_CONTAINER is a followup series, with a link in the cover letter. Internally the compatibility API uses a normal IOAS object that, like vfio, is automatically allocated when the first device is attached. Userspace can also query or set this IOAS object directly using the IOMMU_VFIO_IOAS ioctl. This allows mixing and matching new iommufd only features while still using the VFIO style map/unmap ioctls. While this is enough to operate qemu, it has a few differences: - Resource limits rely on memory cgroups to bound what userspace can do instead of the module parameter dma_entry_limit. - VFIO P2P is not implemented. The DMABUF patches for vfio are a start at a solution where iommufd would import a special DMABUF. This is to avoid further propogating the follow_pfn() security problem. - A full audit for pedantic compatibility details (eg errnos, etc) has not yet been done - powerpc SPAPR is left out, as it is not connected to the iommu_domain framework. It seems interest in SPAPR is minimal as it is currently non-working in v6.1-rc1. They will have to convert to the iommu subsystem framework to enjoy iommfd. The following are not going to be implemented and we expect to remove them from VFIO type1: - SW access 'dirty tracking'. As discussed in the cover letter this will be done in VFIO. - VFIO_TYPE1_NESTING_IOMMU https://lore.kernel.org/all/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/ - VFIO_DMA_MAP_FLAG_VADDR https://lore.kernel.org/all/Yz777bJZjTyLrHEQ@nvidia.com/ Link: https://lore.kernel.org/r/15-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com Tested-by: Nicolin Chen Tested-by: Yi Liu Tested-by: Lixiao Yang Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 30cc5c5e2b34..98ebba80cfa1 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -44,6 +44,7 @@ enum { IOMMUFD_CMD_IOAS_MAP, IOMMUFD_CMD_IOAS_UNMAP, IOMMUFD_CMD_OPTION, + IOMMUFD_CMD_VFIO_IOAS, }; /** @@ -308,4 +309,39 @@ struct iommu_option { __aligned_u64 val64; }; #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) + +/** + * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls + * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS + * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS + * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility + */ +enum iommufd_vfio_ioas_op { + IOMMU_VFIO_IOAS_GET = 0, + IOMMU_VFIO_IOAS_SET = 1, + IOMMU_VFIO_IOAS_CLEAR = 2, +}; + +/** + * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS) + * @size: sizeof(struct iommu_vfio_ioas) + * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set + * For IOMMU_VFIO_IOAS_GET will output the IOAS ID + * @op: One of enum iommufd_vfio_ioas_op + * @__reserved: Must be 0 + * + * The VFIO compatibility support uses a single ioas because VFIO APIs do not + * support the ID field. Set or Get the IOAS that VFIO compatibility will use. + * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the + * compatibility ioas, either by taking what is already set, or auto creating + * one. From then on VFIO will continue to use that ioas and is not effected by + * this ioctl. SET or CLEAR does not destroy any auto-created IOAS. + */ +struct iommu_vfio_ioas { + __u32 size; + __u32 ioas_id; + __u16 op; + __u16 __reserved; +}; +#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) #endif -- cgit v1.2.3 From 328281155539b44539e12d62803e09310d86d11f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 30 Nov 2022 12:21:36 -0700 Subject: cxl/pmem: Introduce nvdimm_security_ops with ->get_flags() operation Add nvdimm_security_ops support for CXL memory device with the introduction of the ->get_flags() callback function. This is part of the "Persistent Memory Data-at-rest Security" command set for CXL memory device support. The ->get_flags() function provides the security state of the persistent memory device defined by the CXL 3.0 spec section 8.2.9.8.6.1. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/166983609611.2734609.13231854299523325319.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index c71021a2a9ed..cdc6049683ce 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -41,6 +41,7 @@ ___C(GET_SCAN_MEDIA_CAPS, "Get Scan Media Capabilities"), \ ___C(SCAN_MEDIA, "Scan Media"), \ ___C(GET_SCAN_MEDIA, "Get Scan Media Results"), \ + ___C(GET_SECURITY_STATE, "Get Security State"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From 255c4f4a6d5b60cfcd218d8fdae517b886ff155a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 21 Nov 2022 21:26:00 -0600 Subject: block: Add error codes for common PR failures If a PR operation fails we can return a device-specific error which is impossible to handle in some cases because we could have a mix of devices when DM is used, or future users like LIO only knows it's interacting with a block device so it doesn't know the type. This patch adds a new pr_status enum so drivers can convert errors to a common type which can be handled by the caller. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20221122032603.32766-2-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Martin K. Petersen --- include/uapi/linux/pr.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index ccc78cbf1221..d8126415966f 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -4,6 +4,23 @@ #include +enum pr_status { + PR_STS_SUCCESS = 0x0, + /* + * The following error codes are based on SCSI, because the interface + * was originally created for it and has existing users. + */ + /* Generic device failure. */ + PR_STS_IOERR = 0x2, + PR_STS_RESERVATION_CONFLICT = 0x18, + /* Temporary path failure that can be retried. */ + PR_STS_RETRY_PATH_FAILURE = 0xe0000, + /* The request was failed due to a fast failure timer. */ + PR_STS_PATH_FAST_FAILED = 0xf0000, + /* The path cannot be reached and has been marked as failed. */ + PR_STS_PATH_FAILED = 0x10000, +}; + enum pr_type { PR_WRITE_EXCLUSIVE = 1, PR_EXCLUSIVE_ACCESS = 2, -- cgit v1.2.3 From af6397c9ee2b42988c912dcad2fca1f43d5c1c99 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 28 Nov 2022 12:36:44 -0800 Subject: devlink: support directly reading from region memory To read from a region, user space must currently request a new snapshot of the region and then read from that snapshot. This can sometimes be overkill if user space only reads a tiny portion. They first create the snapshot, then request a read, then destroy the snapshot. For regions which have a single underlying "contents", it makes sense to allow supporting direct reading of the region data. Extend the DEVLINK_CMD_REGION_READ to allow direct reading from a region if requested via the new DEVLINK_ATTR_REGION_DIRECT. If this attribute is set, then perform a direct read instead of using a snapshot. Direct read is mutually exclusive with DEVLINK_ATTR_REGION_SNAPSHOT_ID, and care is taken to ensure that we reject commands which provide incorrect attributes. Regions must enable support for direct read by implementing the .read() callback function. If a region does not support such direct reads, a suitable extended error message is reported. Signed-off-by: Jacob Keller Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 498d0d5d0957..70191d96af89 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -610,6 +610,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */ DEVLINK_ATTR_RATE_TX_WEIGHT, /* u32 */ + DEVLINK_ATTR_REGION_DIRECT, /* flag */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From e0cefada1383c5ceb5a35f08369d0d40a6629c18 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 1 Dec 2022 20:58:19 +0800 Subject: fscrypt: Add SM4 XTS/CTS symmetric algorithm support Add support for XTS and CTS mode variant of SM4 algorithm. The former is used to encrypt file contents, while the latter (SM4-CTS-CBC) is used to encrypt filenames. SM4 is a symmetric algorithm widely used in China, and is even mandatory algorithm in some special scenarios. We need to provide these users with the ability to encrypt files or disks using SM4-XTS. Signed-off-by: Tianjia Zhang Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221201125819.36932-3-tianjia.zhang@linux.alibaba.com --- include/uapi/linux/fscrypt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index a756b29afcc2..47dbd1994bfe 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -26,6 +26,8 @@ #define FSCRYPT_MODE_AES_256_CTS 4 #define FSCRYPT_MODE_AES_128_CBC 5 #define FSCRYPT_MODE_AES_128_CTS 6 +#define FSCRYPT_MODE_SM4_XTS 7 +#define FSCRYPT_MODE_SM4_CTS 8 #define FSCRYPT_MODE_ADIANTUM 9 #define FSCRYPT_MODE_AES_256_HCTR2 10 /* If adding a mode number > 10, update FSCRYPT_MODE_MAX in fscrypt_private.h */ -- cgit v1.2.3 From 997469407f266250040f20ec73aecc77ad277145 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 30 Nov 2022 12:21:47 -0700 Subject: cxl/pmem: Add "Set Passphrase" security command support Create callback function to support the nvdimm_security_ops ->change_key() callback. Translate the operation to send "Set Passphrase" security command for CXL memory device. The operation supports setting a passphrase for the CXL persistent memory device. It also supports the changing of the currently set passphrase. The operation allows manipulation of a user passphrase or a master passphrase. See CXL rev3.0 spec section 8.2.9.8.6.2 for reference. However, the spec leaves a gap WRT master passphrase usages. The spec does not define any ways to retrieve the status of if the support of master passphrase is available for the device, nor does the commands that utilize master passphrase will return a specific error that indicates master passphrase is not supported. If using a device does not support master passphrase and a command is issued with a master passphrase, the error message returned by the device will be ambiguous. Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/166983610751.2734609.4445075071552032091.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index cdc6049683ce..9da047e9b038 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -42,6 +42,7 @@ ___C(SCAN_MEDIA, "Scan Media"), \ ___C(GET_SCAN_MEDIA, "Get Scan Media Results"), \ ___C(GET_SECURITY_STATE, "Get Security State"), \ + ___C(SET_PASSPHRASE, "Set Passphrase"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From c4ef680d0b72815003a76074ca1cd872a2fecfc3 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 30 Nov 2022 12:21:58 -0700 Subject: cxl/pmem: Add Disable Passphrase security command support Create callback function to support the nvdimm_security_ops ->disable() callback. Translate the operation to send "Disable Passphrase" security command for CXL memory device. The operation supports disabling a passphrase for the CXL persistent memory device. In the original implementation of nvdimm_security_ops, this operation only supports disabling of the user passphrase. This is due to the NFIT version of disable passphrase only supported disabling of user passphrase. The CXL spec allows disabling of the master passphrase as well which nvidmm_security_ops does not support yet. In this commit, the callback function will only support user passphrase. See CXL rev3.0 spec section 8.2.9.8.6.3 for reference. Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/166983611878.2734609.10602135274526390127.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 9da047e9b038..f6d383a80f22 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -43,6 +43,7 @@ ___C(GET_SCAN_MEDIA, "Get Scan Media Results"), \ ___C(GET_SECURITY_STATE, "Get Security State"), \ ___C(SET_PASSPHRASE, "Set Passphrase"), \ + ___C(DISABLE_PASSPHRASE, "Disable Passphrase"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From a072f7b7972fd85bdefefa1d6febec483438d420 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 30 Nov 2022 12:22:10 -0700 Subject: cxl/pmem: Add "Freeze Security State" security command support Create callback function to support the nvdimm_security_ops() ->freeze() callback. Translate the operation to send "Freeze Security State" security command for CXL memory device. See CXL rev3.0 spec section 8.2.9.8.6.5 for reference. Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/166983613019.2734609.10645754779802492122.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index f6d383a80f22..7c0adcd68f4c 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -44,6 +44,7 @@ ___C(GET_SECURITY_STATE, "Get Security State"), \ ___C(SET_PASSPHRASE, "Set Passphrase"), \ ___C(DISABLE_PASSPHRASE, "Disable Passphrase"), \ + ___C(FREEZE_SECURITY, "Freeze Security"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From 2bb692f7a6cd0a7b2c29d8d5029c4469c4ec02dd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 30 Nov 2022 12:22:21 -0700 Subject: cxl/pmem: Add "Unlock" security command support Create callback function to support the nvdimm_security_ops() ->unlock() callback. Translate the operation to send "Unlock" security command for CXL mem device. When the mem device is unlocked, cpu_cache_invalidate_memregion() is called in order to invalidate all CPU caches before attempting to access the mem device. See CXL rev3.0 spec section 8.2.9.8.6.4 for reference. Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/166983614167.2734609.15124543712487741176.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 7c0adcd68f4c..95dca8d4584f 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -45,6 +45,7 @@ ___C(SET_PASSPHRASE, "Set Passphrase"), \ ___C(DISABLE_PASSPHRASE, "Disable Passphrase"), \ ___C(FREEZE_SECURITY, "Freeze Security"), \ + ___C(UNLOCK, "Unlock"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From 3b502e886d01c2f96b2774176be4c7bceef2516b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 30 Nov 2022 12:22:32 -0700 Subject: cxl/pmem: Add "Passphrase Secure Erase" security command support Create callback function to support the nvdimm_security_ops() ->erase() callback. Translate the operation to send "Passphrase Secure Erase" security command for CXL memory device. When the mem device is secure erased, cpu_cache_invalidate_memregion() is called in order to invalidate all CPU caches before attempting to access the mem device again. See CXL 3.0 spec section 8.2.9.8.6.6 for reference. Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/166983615293.2734609.10358657600295932156.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 95dca8d4584f..82bdad4ce5de 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -46,6 +46,7 @@ ___C(DISABLE_PASSPHRASE, "Disable Passphrase"), \ ___C(FREEZE_SECURITY, "Freeze Security"), \ ___C(UNLOCK, "Unlock"), \ + ___C(PASSPHRASE_SECURE_ERASE, "Passphrase Secure Erase"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From f8b435f93b7630afea2df958e0331c566496214b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 1 Dec 2022 19:55:29 -0800 Subject: fscrypt: remove unused Speck definitions These old unused definitions were originally left around to prevent the same mode numbers from being reused. However, we've now decided to reuse the mode numbers anyway. So let's completely remove these old unused definitions to avoid confusion. There is no reason for any code to be using these constants in any way; and indeed, Debian Code Search shows no uses of them (other than in copies or translations of the header). So this should be perfectly safe. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20221202035529.55992-1-ebiggers@kernel.org --- include/uapi/linux/fscrypt.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 47dbd1994bfe..fd1fb0d5389d 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -187,8 +187,6 @@ struct fscrypt_get_key_status_arg { #define FS_ENCRYPTION_MODE_AES_256_CTS FSCRYPT_MODE_AES_256_CTS #define FS_ENCRYPTION_MODE_AES_128_CBC FSCRYPT_MODE_AES_128_CBC #define FS_ENCRYPTION_MODE_AES_128_CTS FSCRYPT_MODE_AES_128_CTS -#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* removed */ -#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* removed */ #define FS_ENCRYPTION_MODE_ADIANTUM FSCRYPT_MODE_ADIANTUM #define FS_KEY_DESC_PREFIX FSCRYPT_KEY_DESC_PREFIX #define FS_KEY_DESC_PREFIX_SIZE FSCRYPT_KEY_DESC_PREFIX_SIZE -- cgit v1.2.3 From f8c9dfbd875b17fee59c7f1aa35a4944d4e6d810 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 30 Nov 2022 15:06:28 +0100 Subject: mptcp: add pm listener events This patch adds two new MPTCP netlink event types for PM listening socket create and close, named MPTCP_EVENT_LISTENER_CREATED and MPTCP_EVENT_LISTENER_CLOSED. Add a new function mptcp_event_pm_listener() to push the new events with family, port and addr to userspace. Invoke mptcp_event_pm_listener() with MPTCP_EVENT_LISTENER_CREATED in mptcp_listen() and mptcp_pm_nl_create_listen_socket(), invoke it with MPTCP_EVENT_LISTENER_CLOSED in __mptcp_close_ssk(). Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index dfe19bf13f4c..32af2d278cb4 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -160,6 +160,12 @@ struct mptcp_info { * daddr4 | daddr6, sport, dport, backup, if_idx * [, error] * The priority of a subflow has changed. 'error' should not be set. + * + * MPTCP_EVENT_LISTENER_CREATED: family, sport, saddr4 | saddr6 + * A new PM listener is created. + * + * MPTCP_EVENT_LISTENER_CLOSED: family, sport, saddr4 | saddr6 + * A PM listener is closed. */ enum mptcp_event_type { MPTCP_EVENT_UNSPEC = 0, @@ -174,6 +180,9 @@ enum mptcp_event_type { MPTCP_EVENT_SUB_CLOSED = 11, MPTCP_EVENT_SUB_PRIORITY = 13, + + MPTCP_EVENT_LISTENER_CREATED = 15, + MPTCP_EVENT_LISTENER_CLOSED = 16, }; enum mptcp_event_attr { -- cgit v1.2.3 From 61e15f871241ee86f217320909005cd022dd844f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 2 Dec 2022 11:50:08 +0100 Subject: KVM: Delete all references to removed KVM_SET_MEMORY_REGION ioctl The documentation says that the ioctl has been deprecated, but it has been actually removed and the remaining references are just left overs. Suggested-by: Sean Christopherson Signed-off-by: Javier Martinez Canillas Message-Id: <20221202105011.185147-2-javierm@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 64dfe9c07c87..c338ca2c972d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -86,14 +86,6 @@ struct kvm_debug_guest { /* *** End of deprecated interfaces *** */ -/* for KVM_CREATE_MEMORY_REGION */ -struct kvm_memory_region { - __u32 slot; - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ -}; - /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -1442,10 +1434,6 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; -/* - * ioctls for VM fds - */ -#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. -- cgit v1.2.3 From 66a9221d73e71199a120ca12ea5bfaac2aa670a3 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 2 Dec 2022 11:50:09 +0100 Subject: KVM: Delete all references to removed KVM_SET_MEMORY_ALIAS ioctl The documentation says that the ioctl has been deprecated, but it has been actually removed and the remaining references are just left overs. Suggested-by: Sean Christopherson Signed-off-by: Javier Martinez Canillas Message-Id: <20221202105011.185147-3-javierm@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c338ca2c972d..ce9183765616 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1440,8 +1440,6 @@ struct kvm_vfio_spapr_tce { */ #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) -/* KVM_SET_MEMORY_ALIAS is obsolete: */ -#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ -- cgit v1.2.3 From 30ee198ce42d60101620d33f8bc70c3234798365 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 2 Dec 2022 11:50:10 +0100 Subject: KVM: Reference to kvm_userspace_memory_region in doc and comments There are still references to the removed kvm_memory_region data structure but the doc and comments should mention struct kvm_userspace_memory_region instead, since that is what's used by the ioctl that replaced the old one and this data structure support the same set of flags. Signed-off-by: Javier Martinez Canillas Message-Id: <20221202105011.185147-4-javierm@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ce9183765616..03708ce10bda 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -96,9 +96,9 @@ struct kvm_userspace_memory_region { }; /* - * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, - * other bits are reserved for kvm internal use which are defined in - * include/linux/kvm_host.h. + * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for + * userspace, other bits are reserved for kvm internal use which are defined + * in include/linux/kvm_host.h. */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) -- cgit v1.2.3 From f40eb99897af665f11858dd7b56edcb62c3f3c67 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 2 Dec 2022 19:27:58 +0100 Subject: pktcdvd: remove driver. Way back in 2016 in commit 5a8b187c61e9 ("pktcdvd: mark as unmaintained and deprecated") this driver was marked as "will be removed soon". 5 years seems long enough to have it stick around after that, so finally remove the thing now. Reported-by: Christoph Hellwig Cc: Jens Axboe Cc: Thomas Maier Cc: Peter Osterlund Cc: linux-block@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20221202182758.1339039-1-gregkh@linuxfoundation.org Signed-off-by: Jens Axboe --- include/uapi/linux/pktcdvd.h | 112 ------------------------------------------- 1 file changed, 112 deletions(-) delete mode 100644 include/uapi/linux/pktcdvd.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h deleted file mode 100644 index 9cbb55d21c94..000000000000 --- a/include/uapi/linux/pktcdvd.h +++ /dev/null @@ -1,112 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2000 Jens Axboe - * Copyright (C) 2001-2004 Peter Osterlund - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and - * DVD-RW devices. - * - */ -#ifndef _UAPI__PKTCDVD_H -#define _UAPI__PKTCDVD_H - -#include - -/* - * 1 for normal debug messages, 2 is very verbose. 0 to turn it off. - */ -#define PACKET_DEBUG 1 - -#define MAX_WRITERS 8 - -#define PKT_RB_POOL_SIZE 512 - -/* - * How long we should hold a non-full packet before starting data gathering. - */ -#define PACKET_WAIT_TIME (HZ * 5 / 1000) - -/* - * use drive write caching -- we need deferred error handling to be - * able to successfully recover with this option (drive will return good - * status as soon as the cdb is validated). - */ -#if defined(CONFIG_CDROM_PKTCDVD_WCACHE) -#define USE_WCACHING 1 -#else -#define USE_WCACHING 0 -#endif - -/* - * No user-servicable parts beyond this point -> - */ - -/* - * device types - */ -#define PACKET_CDR 1 -#define PACKET_CDRW 2 -#define PACKET_DVDR 3 -#define PACKET_DVDRW 4 - -/* - * flags - */ -#define PACKET_WRITABLE 1 /* pd is writable */ -#define PACKET_NWA_VALID 2 /* next writable address valid */ -#define PACKET_LRA_VALID 3 /* last recorded address valid */ -#define PACKET_MERGE_SEGS 4 /* perform segment merging to keep */ - /* underlying cdrom device happy */ - -/* - * Disc status -- from READ_DISC_INFO - */ -#define PACKET_DISC_EMPTY 0 -#define PACKET_DISC_INCOMPLETE 1 -#define PACKET_DISC_COMPLETE 2 -#define PACKET_DISC_OTHER 3 - -/* - * write type, and corresponding data block type - */ -#define PACKET_MODE1 1 -#define PACKET_MODE2 2 -#define PACKET_BLOCK_MODE1 8 -#define PACKET_BLOCK_MODE2 10 - -/* - * Last session/border status - */ -#define PACKET_SESSION_EMPTY 0 -#define PACKET_SESSION_INCOMPLETE 1 -#define PACKET_SESSION_RESERVED 2 -#define PACKET_SESSION_COMPLETE 3 - -#define PACKET_MCN "4a656e734178626f65323030300000" - -#undef PACKET_USE_LS - -#define PKT_CTRL_CMD_SETUP 0 -#define PKT_CTRL_CMD_TEARDOWN 1 -#define PKT_CTRL_CMD_STATUS 2 - -struct pkt_ctrl_command { - __u32 command; /* in: Setup, teardown, status */ - __u32 dev_index; /* in/out: Device index */ - __u32 dev; /* in/out: Device nr for cdrw device */ - __u32 pkt_dev; /* in/out: Device nr for packet device */ - __u32 num_devices; /* out: Largest device index + 1 */ - __u32 padding; /* Not used */ -}; - -/* - * packet ioctls - */ -#define PACKET_IOCTL_MAGIC ('X') -#define PACKET_CTRL_CMD _IOWR(PACKET_IOCTL_MAGIC, 1, struct pkt_ctrl_command) - - -#endif /* _UAPI__PKTCDVD_H */ -- cgit v1.2.3 From d14f28b8c1de668bab863bf5892a49c824cb110d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Fri, 2 Dec 2022 20:41:27 +0200 Subject: xfrm: add new packet offload flag In the next patches, the xfrm core code will be extended to support new type of offload - packet offload. In that mode, both policy and state should be specially configured in order to perform whole offloaded data path. Full offload takes care of encryption, decryption, encapsulation and other operations with headers. As this mode is new for XFRM policy flow, we can "start fresh" with flag bits and release first and second bit for future use. Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 4f84ea7ee14c..23543c33fee8 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -519,6 +519,12 @@ struct xfrm_user_offload { */ #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 +/* Two bits above are relevant for state path only, while + * offload is used for both policy and state flows. + * + * In policy offload mode, they are free and can be safely reused. + */ +#define XFRM_OFFLOAD_PACKET 4 struct xfrm_userpolicy_default { #define XFRM_USERPOLICY_UNSPEC 0 -- cgit v1.2.3 From 4300c58f809079951c87d84e5f11a2d265e3c9e7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 14 Sep 2022 11:06:28 -0400 Subject: btrfs: move btrfs on-disk definitions out of ctree.h The bulk of our on-disk definitions exist in btrfs_tree.h, which user space can use. Keep things consistent and move the rest of the on disk definitions out of ctree.h into btrfs_tree.h. Note I did have to update all u8's to __u8, but otherwise this is a strict copy and paste. Most of the definitions are mainly for internal use and are not guaranteed stable public API and may change as we need. Compilation failures by user applications can happen. Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ reformat comments, style fixups ] Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 215 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 1f7a38ec6ac3..d43f67f676c2 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -10,6 +10,11 @@ #include #endif +/* ASCII for _BHRfS_M, no terminating nul */ +#define BTRFS_MAGIC 0x4D5F53665248425FULL + +#define BTRFS_MAX_LEVEL 8 + /* * This header contains the structure definitions and constants used * by file system objects that can be retrieved using @@ -360,6 +365,43 @@ enum btrfs_csum_type { #define BTRFS_FT_XATTR 8 #define BTRFS_FT_MAX 9 +/* + * Inode flags + */ +#define BTRFS_INODE_NODATASUM (1U << 0) +#define BTRFS_INODE_NODATACOW (1U << 1) +#define BTRFS_INODE_READONLY (1U << 2) +#define BTRFS_INODE_NOCOMPRESS (1U << 3) +#define BTRFS_INODE_PREALLOC (1U << 4) +#define BTRFS_INODE_SYNC (1U << 5) +#define BTRFS_INODE_IMMUTABLE (1U << 6) +#define BTRFS_INODE_APPEND (1U << 7) +#define BTRFS_INODE_NODUMP (1U << 8) +#define BTRFS_INODE_NOATIME (1U << 9) +#define BTRFS_INODE_DIRSYNC (1U << 10) +#define BTRFS_INODE_COMPRESS (1U << 11) + +#define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31) + +#define BTRFS_INODE_FLAG_MASK \ + (BTRFS_INODE_NODATASUM | \ + BTRFS_INODE_NODATACOW | \ + BTRFS_INODE_READONLY | \ + BTRFS_INODE_NOCOMPRESS | \ + BTRFS_INODE_PREALLOC | \ + BTRFS_INODE_SYNC | \ + BTRFS_INODE_IMMUTABLE | \ + BTRFS_INODE_APPEND | \ + BTRFS_INODE_NODUMP | \ + BTRFS_INODE_NOATIME | \ + BTRFS_INODE_DIRSYNC | \ + BTRFS_INODE_COMPRESS | \ + BTRFS_INODE_ROOT_ITEM_INIT) + +#define BTRFS_INODE_RO_VERITY (1U << 0) + +#define BTRFS_INODE_RO_FLAG_MASK (BTRFS_INODE_RO_VERITY) + /* * The key defines the order in the tree, and so it also defines (optimal) * block layout. @@ -389,6 +431,109 @@ struct btrfs_key { __u64 offset; } __attribute__ ((__packed__)); +/* + * Every tree block (leaf or node) starts with this header. + */ +struct btrfs_header { + /* These first four must match the super block */ + __u8 csum[BTRFS_CSUM_SIZE]; + /* FS specific uuid */ + __u8 fsid[BTRFS_FSID_SIZE]; + /* Which block this node is supposed to live in */ + __le64 bytenr; + __le64 flags; + + /* Allowed to be different from the super from here on down */ + __u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; + __le64 generation; + __le64 owner; + __le32 nritems; + __u8 level; +} __attribute__ ((__packed__)); + +/* + * This is a very generous portion of the super block, giving us room to + * translate 14 chunks with 3 stripes each. + */ +#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 + +/* + * Just in case we somehow lose the roots and are not able to mount, we store + * an array of the roots from previous transactions in the super. + */ +#define BTRFS_NUM_BACKUP_ROOTS 4 +struct btrfs_root_backup { + __le64 tree_root; + __le64 tree_root_gen; + + __le64 chunk_root; + __le64 chunk_root_gen; + + __le64 extent_root; + __le64 extent_root_gen; + + __le64 fs_root; + __le64 fs_root_gen; + + __le64 dev_root; + __le64 dev_root_gen; + + __le64 csum_root; + __le64 csum_root_gen; + + __le64 total_bytes; + __le64 bytes_used; + __le64 num_devices; + /* future */ + __le64 unused_64[4]; + + __u8 tree_root_level; + __u8 chunk_root_level; + __u8 extent_root_level; + __u8 fs_root_level; + __u8 dev_root_level; + __u8 csum_root_level; + /* future and to align */ + __u8 unused_8[10]; +} __attribute__ ((__packed__)); + +/* + * A leaf is full of items. offset and size tell us where to find the item in + * the leaf (relative to the start of the data area) + */ +struct btrfs_item { + struct btrfs_disk_key key; + __le32 offset; + __le32 size; +} __attribute__ ((__packed__)); + +/* + * Leaves have an item area and a data area: + * [item0, item1....itemN] [free space] [dataN...data1, data0] + * + * The data is separate from the items to get the keys closer together during + * searches. + */ +struct btrfs_leaf { + struct btrfs_header header; + struct btrfs_item items[]; +} __attribute__ ((__packed__)); + +/* + * All non-leaf blocks are nodes, they hold only keys and pointers to other + * blocks. + */ +struct btrfs_key_ptr { + struct btrfs_disk_key key; + __le64 blockptr; + __le64 generation; +} __attribute__ ((__packed__)); + +struct btrfs_node { + struct btrfs_header header; + struct btrfs_key_ptr ptrs[]; +} __attribute__ ((__packed__)); + struct btrfs_dev_item { /* the internal btrfs device id */ __le64 devid; @@ -472,6 +617,68 @@ struct btrfs_chunk { /* additional stripes go here */ } __attribute__ ((__packed__)); +/* + * The super block basically lists the main trees of the FS. + */ +struct btrfs_super_block { + /* The first 4 fields must match struct btrfs_header */ + __u8 csum[BTRFS_CSUM_SIZE]; + /* FS specific UUID, visible to user */ + __u8 fsid[BTRFS_FSID_SIZE]; + /* This block number */ + __le64 bytenr; + __le64 flags; + + /* Allowed to be different from the btrfs_header from here own down */ + __le64 magic; + __le64 generation; + __le64 root; + __le64 chunk_root; + __le64 log_root; + + /* + * This member has never been utilized since the very beginning, thus + * it's always 0 regardless of kernel version. We always use + * generation + 1 to read log tree root. So here we mark it deprecated. + */ + __le64 __unused_log_root_transid; + __le64 total_bytes; + __le64 bytes_used; + __le64 root_dir_objectid; + __le64 num_devices; + __le32 sectorsize; + __le32 nodesize; + __le32 __unused_leafsize; + __le32 stripesize; + __le32 sys_chunk_array_size; + __le64 chunk_root_generation; + __le64 compat_flags; + __le64 compat_ro_flags; + __le64 incompat_flags; + __le16 csum_type; + __u8 root_level; + __u8 chunk_root_level; + __u8 log_root_level; + struct btrfs_dev_item dev_item; + + char label[BTRFS_LABEL_SIZE]; + + __le64 cache_generation; + __le64 uuid_tree_generation; + + /* The UUID written into btree blocks */ + __u8 metadata_uuid[BTRFS_FSID_SIZE]; + + /* Future expansion */ + __u8 reserved8[8]; + __le64 reserved[27]; + __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; + struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; + + /* Padded to 4096 bytes */ + __u8 padding[565]; +} __attribute__ ((__packed__)); + #define BTRFS_FREE_SPACE_EXTENT 1 #define BTRFS_FREE_SPACE_BITMAP 2 @@ -526,6 +733,14 @@ struct btrfs_extent_item_v0 { /* use full backrefs for extent pointers in the block */ #define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) +#define BTRFS_BACKREF_REV_MAX 256 +#define BTRFS_BACKREF_REV_SHIFT 56 +#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \ + BTRFS_BACKREF_REV_SHIFT) + +#define BTRFS_OLD_BACKREF_REV 0 +#define BTRFS_MIXED_BACKREF_REV 1 + /* * this flag is only used internally by scrub and may be changed at any time * it is only declared here to avoid collisions -- cgit v1.2.3 From ad4b63caf56d0dc08e03966172d685ff9ebad996 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 14 Sep 2022 11:06:30 -0400 Subject: btrfs: move maximum limits to btrfs_tree.h We have maximum link and name length limits, move these to btrfs_tree.h as they're on disk limitations. Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ reformat comments ] Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index d43f67f676c2..4809272f5063 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -15,6 +15,18 @@ #define BTRFS_MAX_LEVEL 8 +/* + * We can actually store much bigger names, but lets not confuse the rest of + * linux. + */ +#define BTRFS_NAME_LEN 255 + +/* + * Theoretical limit is larger, but we keep this down to a sane value. That + * should limit greatly the possibility of collisions on inode ref items. + */ +#define BTRFS_LINK_MAX 65535U + /* * This header contains the structure definitions and constants used * by file system objects that can be retrieved using -- cgit v1.2.3 From 94a48aef49f235cc1efc74dc18e7708ca3b8d698 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 20 Oct 2022 12:58:28 -0400 Subject: btrfs: extend btrfs_dir_item type to store encryption status For directories with encrypted files/filenames, we need to store a flag indicating this fact. There's no room in other fields, so we'll need to borrow a bit from dir_type. Since it's now a combination of type and flags, we rename it to dir_flags to reflect its new usage. The new flag, FT_ENCRYPTED, indicates a directory containing encrypted data, which is orthogonal to file type; therefore, add the new flag, and make conversion from directory type to file type strip the flag. As the file types almost never change we can afford to use the bits. Actual usage will be guarded behind an incompat bit, this patch only adds the support for later use by fscrypt. Signed-off-by: Omar Sandoval Signed-off-by: Sweet Tea Dorminy Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 4809272f5063..29895ffa470d 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -376,6 +376,13 @@ enum btrfs_csum_type { #define BTRFS_FT_SYMLINK 7 #define BTRFS_FT_XATTR 8 #define BTRFS_FT_MAX 9 +/* Directory contains encrypted data */ +#define BTRFS_FT_ENCRYPTED 0x80 + +static inline __u8 btrfs_dir_flags_to_ftype(__u8 flags) +{ + return flags & ~BTRFS_FT_ENCRYPTED; +} /* * Inode flags -- cgit v1.2.3 From 0c7030038e6106711c5d0b237c980905dd3244ec Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 15 Nov 2022 11:16:19 -0500 Subject: btrfs: add nr_global_roots to the super block definition We already have this defined in btrfs-progs, add it to the kernel to make it easier to sync these files into btrfs-progs. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 29895ffa470d..ab38d0f411fa 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -688,8 +688,9 @@ struct btrfs_super_block { /* The UUID written into btree blocks */ __u8 metadata_uuid[BTRFS_FSID_SIZE]; + __u64 nr_global_roots; + /* Future expansion */ - __u8 reserved8[8]; __le64 reserved[27]; __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; -- cgit v1.2.3 From a28135303a669917002f569aecebd5758263e4aa Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 28 Nov 2022 14:26:38 -0500 Subject: btrfs: sync some cleanups from progs into uapi/btrfs.h When syncing this code into btrfs-progs Dave noticed there's some things we were losing in the sync that are needed. This syncs those changes into the kernel, which include a few comments that weren't in the kernel, some whitespace changes, an attribute, and the cplusplus bit. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 5655e89b962b..b4f0f9531119 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -19,8 +19,14 @@ #ifndef _UAPI_LINUX_BTRFS_H #define _UAPI_LINUX_BTRFS_H + +#ifdef __cplusplus +extern "C" { +#endif + #include #include +#include #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_VOL_NAME_MAX 255 @@ -333,6 +339,12 @@ struct btrfs_ioctl_feature_flags { */ struct btrfs_balance_args { __u64 profiles; + + /* + * usage filter + * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N' + * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max + */ union { __u64 usage; struct { @@ -549,7 +561,7 @@ struct btrfs_ioctl_search_header { __u64 offset; __u32 type; __u32 len; -}; +} __attribute__ ((__may_alias__)); #define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key)) /* @@ -562,6 +574,10 @@ struct btrfs_ioctl_search_args { char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; }; +/* + * Extended version of TREE_SEARCH ioctl that can return more than 4k of bytes. + * The allocated size of the buffer is set in buf_size. + */ struct btrfs_ioctl_search_args_v2 { struct btrfs_ioctl_search_key key; /* in/out - search parameters */ __u64 buf_size; /* in - size of buffer @@ -570,10 +586,11 @@ struct btrfs_ioctl_search_args_v2 { __u64 buf[]; /* out - found items */ }; +/* With a @src_length of zero, the range from @src_offset->EOF is cloned! */ struct btrfs_ioctl_clone_range_args { - __s64 src_fd; - __u64 src_offset, src_length; - __u64 dest_offset; + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; }; /* @@ -677,8 +694,11 @@ struct btrfs_ioctl_logical_ino_args { /* struct btrfs_data_container *inodes; out */ __u64 inodes; }; -/* Return every ref to the extent, not just those containing logical block. - * Requires logical == extent bytenr. */ + +/* + * Return every ref to the extent, not just those containing logical block. + * Requires logical == extent bytenr. + */ #define BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET (1ULL << 0) enum btrfs_dev_stat_values { @@ -1144,4 +1164,8 @@ enum btrfs_err_code { #define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \ struct btrfs_ioctl_encoded_io_args) +#ifdef __cplusplus +} +#endif + #endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From 7112a04664bfc10ae4709b2079fe3991cbd1fe18 Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Thu, 1 Dec 2022 16:25:55 -0800 Subject: ethtool: add netlink based get rss support Add netlink based support for "ethtool -x [context x]" command by implementing ETHTOOL_MSG_RSS_GET netlink message. This is equivalent to functionality provided via ETHTOOL_GRSSH in ioctl path. It sends RSS table, hash key and hash function of an interface to user space. This patch implements existing functionality available in ioctl path and enables addition of new RSS context based parameters in future. Signed-off-by: Sudheer Mogilappagari Link: https://lore.kernel.org/r/20221202002555.241580-1-sudheer.mogilappagari@intel.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index aaf7c6963d61..5799a9db034e 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -51,6 +51,7 @@ enum { ETHTOOL_MSG_MODULE_SET, ETHTOOL_MSG_PSE_GET, ETHTOOL_MSG_PSE_SET, + ETHTOOL_MSG_RSS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -97,6 +98,7 @@ enum { ETHTOOL_MSG_MODULE_GET_REPLY, ETHTOOL_MSG_MODULE_NTF, ETHTOOL_MSG_PSE_GET_REPLY, + ETHTOOL_MSG_RSS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -880,6 +882,18 @@ enum { ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) }; +enum { + ETHTOOL_A_RSS_UNSPEC, + ETHTOOL_A_RSS_HEADER, + ETHTOOL_A_RSS_CONTEXT, /* u32 */ + ETHTOOL_A_RSS_HFUNC, /* u32 */ + ETHTOOL_A_RSS_INDIR, /* binary */ + ETHTOOL_A_RSS_HKEY, /* binary */ + + __ETHTOOL_A_RSS_CNT, + ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From d50fd948d3d086e2608430eb17a01948c3c0d0d2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Nov 2022 05:43:03 +0000 Subject: media: dvb/frontend.h: fix kernel-doc warnings scripts/kernel-doc spouts multiple warnings, so fix them: include/uapi/linux/dvb/frontend.h:399: warning: Enum value 'QAM_1024' not described in enum 'fe_modulation' include/uapi/linux/dvb/frontend.h:399: warning: Enum value 'QAM_4096' not described in enum 'fe_modulation' frontend.h:286: warning: contents before sections frontend.h:780: warning: missing initial short description on line: * enum atscmh_rs_code_mode Fixes: 8220ead805b6 ("media: dvb/frontend.h: document the uAPI file") Fixes: 6508a50fe84f ("media: dvb: add DVB-C2 and DVB-S2X parameter values") Signed-off-by: Randy Dunlap Reviewed-by: Robert Schlabbach Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/frontend.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index 4fed9e316147..022a787815e0 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -282,7 +282,6 @@ enum fe_spectral_inversion { /** * enum fe_code_rate - Type of Forward Error Correction (FEC) * - * * @FEC_NONE: No Forward Error Correction Code * @FEC_1_2: Forward Error Correction Code 1/2 * @FEC_2_3: Forward Error Correction Code 2/3 @@ -363,8 +362,8 @@ enum fe_code_rate { * @APSK_32: 32-APSK modulation * @DQPSK: DQPSK modulation * @QAM_4_NR: 4-QAM-NR modulation - * @QAM-1024: 1024-QAM modulation - * @QAM-4096: 4096-QAM modulation + * @QAM_1024: 1024-QAM modulation + * @QAM_4096: 4096-QAM modulation * @APSK_8_L: 8APSK-L modulation * @APSK_16_L: 16APSK-L modulation * @APSK_32_L: 32APSK-L modulation @@ -777,7 +776,7 @@ enum atscmh_rs_frame_mode { }; /** - * enum atscmh_rs_code_mode + * enum atscmh_rs_code_mode - ATSC-M/H Reed Solomon modes * @ATSCMH_RSCODE_211_187: Reed Solomon code (211,187). * @ATSCMH_RSCODE_223_187: Reed Solomon code (223,187). * @ATSCMH_RSCODE_235_187: Reed Solomon code (235,187). -- cgit v1.2.3 From 4db52602a6074e9cc523500b8304600ff63e7b85 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 6 Dec 2022 10:34:26 +0200 Subject: vfio: Extend the device migration protocol with PRE_COPY The optional PRE_COPY states open the saving data transfer FD before reaching STOP_COPY and allows the device to dirty track internal state changes with the general idea to reduce the volume of data transferred in the STOP_COPY stage. While in PRE_COPY the device remains RUNNING, but the saving FD is open. Only if the device also supports RUNNING_P2P can it support PRE_COPY_P2P, which halts P2P transfers while continuing the saving FD. PRE_COPY, with P2P support, requires the driver to implement 7 new arcs and exists as an optional FSM branch between RUNNING and STOP_COPY: RUNNING -> PRE_COPY -> PRE_COPY_P2P -> STOP_COPY A new ioctl VFIO_MIG_GET_PRECOPY_INFO is provided to allow userspace to query the progress of the precopy operation in the driver with the idea it will judge to move to STOP_COPY at least once the initial data set is transferred, and possibly after the dirty size has shrunk appropriately. This ioctl is valid only in PRE_COPY states and kernel driver should return -EINVAL from any other migration state. Compared to the v1 clarification, STOP_COPY -> PRE_COPY is blocked and to be defined in future. We also split the pending_bytes report into the initial and sustaining values, e.g.: initial_bytes and dirty_bytes. initial_bytes: Amount of initial precopy data. dirty_bytes: Device state changes relative to data previously retrieved. These fields are not required to have any bearing to STOP_COPY phase. It is recommended to leave PRE_COPY for STOP_COPY only after the initial_bytes field reaches zero. Leaving PRE_COPY earlier might make things slower. Signed-off-by: Jason Gunthorpe Signed-off-by: Shay Drory Reviewed-by: Kevin Tian Reviewed-by: Shameer Kolothum Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20221206083438.37807-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 123 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 3e45dbaf190e..23105eb036fa 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -819,12 +819,20 @@ struct vfio_device_feature { * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P * is supported in addition to the STOP_COPY states. * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY means that + * PRE_COPY is supported in addition to the STOP_COPY states. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY + * means that RUNNING_P2P, PRE_COPY and PRE_COPY_P2P are supported + * in addition to the STOP_COPY states. + * * Other combinations of flags have behavior to be defined in the future. */ struct vfio_device_feature_migration { __aligned_u64 flags; #define VFIO_MIGRATION_STOP_COPY (1 << 0) #define VFIO_MIGRATION_P2P (1 << 1) +#define VFIO_MIGRATION_PRE_COPY (1 << 2) }; #define VFIO_DEVICE_FEATURE_MIGRATION 1 @@ -875,8 +883,13 @@ struct vfio_device_feature_mig_state { * RESUMING - The device is stopped and is loading a new internal state * ERROR - The device has failed and must be reset * - * And 1 optional state to support VFIO_MIGRATION_P2P: + * And optional states to support VFIO_MIGRATION_P2P: * RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA + * And VFIO_MIGRATION_PRE_COPY: + * PRE_COPY - The device is running normally but tracking internal state + * changes + * And VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY: + * PRE_COPY_P2P - PRE_COPY, except the device cannot do peer to peer DMA * * The FSM takes actions on the arcs between FSM states. The driver implements * the following behavior for the FSM arcs: @@ -908,20 +921,48 @@ struct vfio_device_feature_mig_state { * * To abort a RESUMING session the device must be reset. * + * PRE_COPY -> RUNNING * RUNNING_P2P -> RUNNING * While in RUNNING the device is fully operational, the device may generate * interrupts, DMA, respond to MMIO, all vfio device regions are functional, * and the device may advance its internal state. * + * The PRE_COPY arc will terminate a data transfer session. + * + * PRE_COPY_P2P -> RUNNING_P2P * RUNNING -> RUNNING_P2P * STOP -> RUNNING_P2P * While in RUNNING_P2P the device is partially running in the P2P quiescent * state defined below. * + * The PRE_COPY_P2P arc will terminate a data transfer session. + * + * RUNNING -> PRE_COPY + * RUNNING_P2P -> PRE_COPY_P2P * STOP -> STOP_COPY - * This arc begin the process of saving the device state and will return a - * new data_fd. + * PRE_COPY, PRE_COPY_P2P and STOP_COPY form the "saving group" of states + * which share a data transfer session. Moving between these states alters + * what is streamed in session, but does not terminate or otherwise affect + * the associated fd. + * + * These arcs begin the process of saving the device state and will return a + * new data_fd. The migration driver may perform actions such as enabling + * dirty logging of device state when entering PRE_COPY or PER_COPY_P2P. * + * Each arc does not change the device operation, the device remains + * RUNNING, P2P quiesced or in STOP. The STOP_COPY state is described below + * in PRE_COPY_P2P -> STOP_COPY. + * + * PRE_COPY -> PRE_COPY_P2P + * Entering PRE_COPY_P2P continues all the behaviors of PRE_COPY above. + * However, while in the PRE_COPY_P2P state, the device is partially running + * in the P2P quiescent state defined below, like RUNNING_P2P. + * + * PRE_COPY_P2P -> PRE_COPY + * This arc allows returning the device to a full RUNNING behavior while + * continuing all the behaviors of PRE_COPY. + * + * PRE_COPY_P2P -> STOP_COPY * While in the STOP_COPY state the device has the same behavior as STOP * with the addition that the data transfers session continues to stream the * migration state. End of stream on the FD indicates the entire device @@ -939,6 +980,13 @@ struct vfio_device_feature_mig_state { * device state for this arc if required to prepare the device to receive the * migration data. * + * STOP_COPY -> PRE_COPY + * STOP_COPY -> PRE_COPY_P2P + * These arcs are not permitted and return error if requested. Future + * revisions of this API may define behaviors for these arcs, in this case + * support will be discoverable by a new flag in + * VFIO_DEVICE_FEATURE_MIGRATION. + * * any -> ERROR * ERROR cannot be specified as a device state, however any transition request * can be failed with an errno return and may then move the device_state into @@ -950,7 +998,7 @@ struct vfio_device_feature_mig_state { * The optional peer to peer (P2P) quiescent state is intended to be a quiescent * state for the device for the purposes of managing multiple devices within a * user context where peer-to-peer DMA between devices may be active. The - * RUNNING_P2P states must prevent the device from initiating + * RUNNING_P2P and PRE_COPY_P2P states must prevent the device from initiating * any new P2P DMA transactions. If the device can identify P2P transactions * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration * driver must complete any such outstanding operations prior to completing the @@ -963,6 +1011,8 @@ struct vfio_device_feature_mig_state { * above FSM arcs. As there are multiple paths through the FSM arcs the path * should be selected based on the following rules: * - Select the shortest path. + * - The path cannot have saving group states as interior arcs, only + * starting/end states. * Refer to vfio_mig_get_next_state() for the result of the algorithm. * * The automatic transit through the FSM arcs that make up the combination @@ -976,6 +1026,9 @@ struct vfio_device_feature_mig_state { * support them. The user can discover if these states are supported by using * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can * avoid knowing about these optional states if the kernel driver supports them. + * + * Arcs touching PRE_COPY and PRE_COPY_P2P are removed if support for PRE_COPY + * is not present. */ enum vfio_device_mig_state { VFIO_DEVICE_STATE_ERROR = 0, @@ -984,8 +1037,70 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_STOP_COPY = 3, VFIO_DEVICE_STATE_RESUMING = 4, VFIO_DEVICE_STATE_RUNNING_P2P = 5, + VFIO_DEVICE_STATE_PRE_COPY = 6, + VFIO_DEVICE_STATE_PRE_COPY_P2P = 7, +}; + +/** + * VFIO_MIG_GET_PRECOPY_INFO - _IO(VFIO_TYPE, VFIO_BASE + 21) + * + * This ioctl is used on the migration data FD in the precopy phase of the + * migration data transfer. It returns an estimate of the current data sizes + * remaining to be transferred. It allows the user to judge when it is + * appropriate to leave PRE_COPY for STOP_COPY. + * + * This ioctl is valid only in PRE_COPY states and kernel driver should + * return -EINVAL from any other migration state. + * + * The vfio_precopy_info data structure returned by this ioctl provides + * estimates of data available from the device during the PRE_COPY states. + * This estimate is split into two categories, initial_bytes and + * dirty_bytes. + * + * The initial_bytes field indicates the amount of initial precopy + * data available from the device. This field should have a non-zero initial + * value and decrease as migration data is read from the device. + * It is recommended to leave PRE_COPY for STOP_COPY only after this field + * reaches zero. Leaving PRE_COPY earlier might make things slower. + * + * The dirty_bytes field tracks device state changes relative to data + * previously retrieved. This field starts at zero and may increase as + * the internal device state is modified or decrease as that modified + * state is read from the device. + * + * Userspace may use the combination of these fields to estimate the + * potential data size available during the PRE_COPY phases, as well as + * trends relative to the rate the device is dirtying its internal + * state, but these fields are not required to have any bearing relative + * to the data size available during the STOP_COPY phase. + * + * Drivers have a lot of flexibility in when and what they transfer during the + * PRE_COPY phase, and how they report this from VFIO_MIG_GET_PRECOPY_INFO. + * + * During pre-copy the migration data FD has a temporary "end of stream" that is + * reached when both initial_bytes and dirty_byte are zero. For instance, this + * may indicate that the device is idle and not currently dirtying any internal + * state. When read() is done on this temporary end of stream the kernel driver + * should return ENOMSG from read(). Userspace can wait for more data (which may + * never come) by using poll. + * + * Once in STOP_COPY the migration data FD has a permanent end of stream + * signaled in the usual way by read() always returning 0 and poll always + * returning readable. ENOMSG may not be returned in STOP_COPY. + * Support for this ioctl is mandatory if a driver claims to support + * VFIO_MIGRATION_PRE_COPY. + * + * Return: 0 on success, -1 and errno set on failure. + */ +struct vfio_precopy_info { + __u32 argsz; + __u32 flags; + __aligned_u64 initial_bytes; + __aligned_u64 dirty_bytes; }; +#define VFIO_MIG_GET_PRECOPY_INFO _IO(VFIO_TYPE, VFIO_BASE + 21) + /* * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power * state with the platform-based power management. Device use of lower power -- cgit v1.2.3 From 7fe898041fb0c8e630504ecc2cb8805651ac85c1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Dec 2022 20:22:44 -0800 Subject: cxl/security: Drop security command ioctl uapi CXL PMEM security operations are routed through the NVDIMM sysfs interface. For this reason the corresponding commands are marked "exclusive" to preclude collisions between the ioctl ABI and the sysfs ABI. However, a better way to preclude that collision is to simply remove the ioctl ABI (command-id definitions) for those operations. Now that cxl_internal_send_cmd() (formerly cxl_mbox_send_cmd()) no longer needs to talk the cxl_mem_commands array, all of the uapi definitions for the security commands can be dropped. These never appeared in a released kernel, so no regression risk. Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/167030056464.4044561.11486507095384253833.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 82bdad4ce5de..c71021a2a9ed 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -41,12 +41,6 @@ ___C(GET_SCAN_MEDIA_CAPS, "Get Scan Media Capabilities"), \ ___C(SCAN_MEDIA, "Scan Media"), \ ___C(GET_SCAN_MEDIA, "Get Scan Media Results"), \ - ___C(GET_SECURITY_STATE, "Get Security State"), \ - ___C(SET_PASSPHRASE, "Set Passphrase"), \ - ___C(DISABLE_PASSPHRASE, "Disable Passphrase"), \ - ___C(FREEZE_SECURITY, "Freeze Security"), \ - ___C(UNLOCK, "Unlock"), \ - ___C(PASSPHRASE_SECURE_ERASE, "Passphrase Secure Erase"), \ ___C(MAX, "invalid / last command") #define ___C(a, b) CXL_MEM_COMMAND_ID_##a -- cgit v1.2.3 From 8478afa837c48f10dddb2e06714ffc667843781d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Sep 2022 14:10:59 +0200 Subject: headers: Remove some left-over license text in include/uapi/linux/dvb/ Remove some left-over from commit e2be04c7f995 ("License cleanup: add SPDX license identifier to uapi header files with a license") When the SPDX-License-Identifier tag has been added, the corresponding license text has not been removed. Remove it now. Signed-off-by: Christophe JAILLET Signed-off-by: Hans Verkuil --- include/uapi/linux/dvb/audio.h | 15 --------------- include/uapi/linux/dvb/ca.h | 15 --------------- include/uapi/linux/dvb/dmx.h | 15 --------------- include/uapi/linux/dvb/frontend.h | 15 --------------- include/uapi/linux/dvb/net.h | 15 --------------- include/uapi/linux/dvb/osd.h | 15 --------------- include/uapi/linux/dvb/version.h | 15 --------------- include/uapi/linux/dvb/video.h | 15 --------------- 8 files changed, 120 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dvb/audio.h b/include/uapi/linux/dvb/audio.h index 2f869da69171..77fb866890b4 100644 --- a/include/uapi/linux/dvb/audio.h +++ b/include/uapi/linux/dvb/audio.h @@ -7,21 +7,6 @@ * Copyright (C) 2000 Ralph Metzler * & Marcus Metzler * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Lesser Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #ifndef _DVBAUDIO_H_ diff --git a/include/uapi/linux/dvb/ca.h b/include/uapi/linux/dvb/ca.h index dffa59e95ebb..4244b187cc4d 100644 --- a/include/uapi/linux/dvb/ca.h +++ b/include/uapi/linux/dvb/ca.h @@ -5,21 +5,6 @@ * Copyright (C) 2000 Ralph Metzler * & Marcus Metzler * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Lesser Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #ifndef _DVBCA_H_ diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h index b4112f0b6dd3..7b16375f94e2 100644 --- a/include/uapi/linux/dvb/dmx.h +++ b/include/uapi/linux/dvb/dmx.h @@ -5,21 +5,6 @@ * Copyright (C) 2000 Marcus Metzler * & Ralph Metzler * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #ifndef _UAPI_DVBDMX_H_ diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index 4fed9e316147..53f4fb8043e3 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -7,21 +7,6 @@ * Holger Waechtler * Andre Draszik * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #ifndef _DVBFRONTEND_H_ diff --git a/include/uapi/linux/dvb/net.h b/include/uapi/linux/dvb/net.h index 0c550ef93f2c..7cbb47ac38ef 100644 --- a/include/uapi/linux/dvb/net.h +++ b/include/uapi/linux/dvb/net.h @@ -5,21 +5,6 @@ * Copyright (C) 2000 Marcus Metzler * & Ralph Metzler * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #ifndef _DVBNET_H_ diff --git a/include/uapi/linux/dvb/osd.h b/include/uapi/linux/dvb/osd.h index 858997c74043..6003b108ba45 100644 --- a/include/uapi/linux/dvb/osd.h +++ b/include/uapi/linux/dvb/osd.h @@ -7,21 +7,6 @@ * Copyright (C) 2001 Ralph Metzler * & Marcus Metzler * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Lesser Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #ifndef _DVBOSD_H_ diff --git a/include/uapi/linux/dvb/version.h b/include/uapi/linux/dvb/version.h index 2c5cffe6d2a0..1a8cd038aa0b 100644 --- a/include/uapi/linux/dvb/version.h +++ b/include/uapi/linux/dvb/version.h @@ -4,21 +4,6 @@ * * Copyright (C) 2000 Holger Waechtler * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #ifndef _DVBVERSION_H_ diff --git a/include/uapi/linux/dvb/video.h b/include/uapi/linux/dvb/video.h index 179f1ec60af6..9910b73737e0 100644 --- a/include/uapi/linux/dvb/video.h +++ b/include/uapi/linux/dvb/video.h @@ -7,21 +7,6 @@ * Copyright (C) 2000 Marcus Metzler * & Ralph Metzler * for convergence integrated media GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * */ #ifndef _UAPI_DVBVIDEO_H_ -- cgit v1.2.3 From 96c1212a61e089fcd10abe88f6a8f069f2ac1354 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Sep 2022 17:13:14 +0200 Subject: headers: Remove some left-over license text in include/uapi/linux/v4l2-* Remove some left-over from commit e2be04c7f995 ("License cleanup: add SPDX license identifier to uapi header files with a license") When the SPDX-License-Identifier tag has been added, the corresponding license text has not been removed. Remove it now. Signed-off-by: Christophe JAILLET Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-common.h | 39 ------------------------------------ include/uapi/linux/v4l2-controls.h | 38 ----------------------------------- include/uapi/linux/v4l2-dv-timings.h | 9 --------- include/uapi/linux/v4l2-mediabus.h | 4 ---- include/uapi/linux/v4l2-subdev.h | 13 ------------ 5 files changed, 103 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-common.h b/include/uapi/linux/v4l2-common.h index 7d21c1634b4d..5a23a15d78ac 100644 --- a/include/uapi/linux/v4l2-common.h +++ b/include/uapi/linux/v4l2-common.h @@ -10,45 +10,6 @@ * * Copyright (C) 2012 Nokia Corporation * Contact: Sakari Ailus - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Alternatively you can redistribute this file under the terms of the - * BSD license as stated below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. The names of its contributors may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * */ #ifndef __V4L2_COMMON__ diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 410d778c1243..b73a8ba7df6c 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -4,44 +4,6 @@ * * Copyright (C) 1999-2012 the contributors * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Alternatively you can redistribute this file under the terms of the - * BSD license as stated below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. The names of its contributors may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * * The contents of this header was split off from videodev2.h. All control * definitions should be added to this header, which is included by * videodev2.h. diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h index b52b67c62562..ef0128c7369c 100644 --- a/include/uapi/linux/v4l2-dv-timings.h +++ b/include/uapi/linux/v4l2-dv-timings.h @@ -3,15 +3,6 @@ * V4L2 DV timings header. * * Copyright (C) 2012-2016 Hans Verkuil - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef _V4L2_DV_TIMINGS_H diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h index 903e67b16711..6b07b73473b5 100644 --- a/include/uapi/linux/v4l2-mediabus.h +++ b/include/uapi/linux/v4l2-mediabus.h @@ -3,10 +3,6 @@ * Media Bus API header * * Copyright (C) 2009, Guennadi Liakhovetski - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __LINUX_V4L2_MEDIABUS_H diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index 658106f5b5dc..ecce4c79f5c5 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -6,19 +6,6 @@ * * Contacts: Laurent Pinchart * Sakari Ailus - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __LINUX_V4L2_SUBDEV_H -- cgit v1.2.3 From da65e9ff3bf614d2836e38e1d405c7073e6ba3b7 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Dec 2022 20:51:15 +0200 Subject: devlink: Expose port function commands to control RoCE Expose port function commands to enable / disable RoCE, this is used to control the port RoCE device capabilities. When RoCE is disabled for a function of the port, function cannot create any RoCE specific resources (e.g GID table). It also saves system memory utilization. For example disabling RoCE enable a VF/SF saves 1 Mbytes of system memory per function. Example of a PCI VF port which supports function configuration: Set RoCE of the VF's port function. $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 roce enable $ devlink port function set pci/0000:06:00.0/2 roce disable $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 roce disable Signed-off-by: Shay Drory Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 70191d96af89..6cc2925bd478 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -658,11 +658,21 @@ enum devlink_resource_unit { DEVLINK_RESOURCE_UNIT_ENTRY, }; +enum devlink_port_fn_attr_cap { + DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT, + + /* Add new caps above */ + __DEVLINK_PORT_FN_ATTR_CAPS_MAX, +}; + +#define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT) + enum devlink_port_function_attr { DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ DEVLINK_PORT_FN_ATTR_STATE, /* u8 */ DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ + DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 -- cgit v1.2.3 From a8ce7b26a51efc4d7753b23d639ae092878a6193 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Dec 2022 20:51:18 +0200 Subject: devlink: Expose port function commands to control migratable Expose port function commands to enable / disable migratable capability, this is used to set the port function as migratable. Live migration is the process of transferring a live virtual machine from one physical host to another without disrupting its normal operation. In order for a VM to be able to perform LM, all the VM components must be able to perform migration. e.g.: to be migratable. In order for VF to be migratable, VF must be bound to VFIO driver with migration support. When migratable capability is enabled for a function of the port, the device is making the necessary preparations for the function to be migratable, which might include disabling features which cannot be migrated. Example of LM with migratable function configuration: Set migratable of the VF's port function. $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 migratable disable $ devlink port function set pci/0000:06:00.0/2 migratable enable $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 migratable enable Bind VF to VFIO driver with migration support: $ echo > /sys/bus/pci/devices/0000:08:00.0/driver/unbind $ echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:08:00.0/driver_override $ echo > /sys/bus/pci/devices/0000:08:00.0/driver/bind Attach VF to the VM. Start the VM. Perform LM. Signed-off-by: Shay Drory Reviewed-by: Jiri Pirko Acked-by: Shannon Nelson Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6cc2925bd478..3782d4219ac9 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -660,12 +660,15 @@ enum devlink_resource_unit { enum devlink_port_fn_attr_cap { DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT, + DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT, /* Add new caps above */ __DEVLINK_PORT_FN_ATTR_CAPS_MAX, }; #define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT) +#define DEVLINK_PORT_FN_CAP_MIGRATABLE \ + _BITUL(DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT) enum devlink_port_function_attr { DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, -- cgit v1.2.3 From 81c25247a2a03a0f97e4805d7aff7541ccff6baa Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Tue, 6 Dec 2022 16:12:03 +0000 Subject: usb: gadget: uvc: Rename bmInterfaceFlags -> bmInterlaceFlags In the specification documents for the Uncompressed and MJPEG USB Video Payloads, the field name is bmInterlaceFlags - it has been misnamed within the kernel. Although renaming the field does break the kernel's interface to userspace it should be low-risk in this instance. The field is read only and hardcoded to 0, so there was never any value in anyone reading it. A search of the uvc-gadget application and all the forks that I could find for it did not reveal any users either. Fixes: cdda479f15cd ("USB gadget: video class function driver") Reviewed-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20221206161203.1562827-1-dan.scally@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/video.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/video.h b/include/uapi/linux/usb/video.h index bfdae12cdacf..6e8e572c2980 100644 --- a/include/uapi/linux/usb/video.h +++ b/include/uapi/linux/usb/video.h @@ -466,7 +466,7 @@ struct uvc_format_uncompressed { __u8 bDefaultFrameIndex; __u8 bAspectRatioX; __u8 bAspectRatioY; - __u8 bmInterfaceFlags; + __u8 bmInterlaceFlags; __u8 bCopyProtect; } __attribute__((__packed__)); @@ -522,7 +522,7 @@ struct uvc_format_mjpeg { __u8 bDefaultFrameIndex; __u8 bAspectRatioX; __u8 bAspectRatioY; - __u8 bmInterfaceFlags; + __u8 bmInterlaceFlags; __u8 bCopyProtect; } __attribute__((__packed__)); -- cgit v1.2.3 From c1f480b2d092960ecf8bb0bd1f27982c33ada42a Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 6 Dec 2022 09:29:13 +0000 Subject: sed-opal: allow using IOC_OPAL_SAVE for locking too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Usually when closing a crypto device (eg: dm-crypt with LUKS) the volume key is not required, as it requires root privileges anyway, and root can deny access to a disk in many ways regardless. Requiring the volume key to lock the device is a peculiarity of the OPAL specification. Given we might already have saved the key if the user requested it via the 'IOC_OPAL_SAVE' ioctl, we can use that key to lock the device if no key was provided here and the locking range matches, and the user sets the appropriate flag with 'IOC_OPAL_SAVE'. This allows integrating OPAL with tools and libraries that are used to the common behaviour and do not ask for the volume key when closing a device. Callers can always pass a non-zero key and it will be used regardless, as before. Suggested-by: Štěpán Horáček Signed-off-by: Luca Boccassi Reviewed-by: Christoph Hellwig Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20221206092913.4625-1-luca.boccassi@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/sed-opal.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 2573772e2fb3..1fed3c9294fc 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -44,6 +44,11 @@ enum opal_lock_state { OPAL_LK = 0x04, /* 0100 */ }; +enum opal_lock_flags { + /* IOC_OPAL_SAVE will also store the provided key for locking */ + OPAL_SAVE_FOR_LOCK = 0x01, +}; + struct opal_key { __u8 lr; __u8 key_len; @@ -76,7 +81,8 @@ struct opal_user_lr_setup { struct opal_lock_unlock { struct opal_session_info session; __u32 l_state; - __u8 __align[4]; + __u16 flags; + __u8 __align[2]; }; struct opal_new_pw { -- cgit v1.2.3 From 270605317366e4535d8d9fc3d9da1ad0fb3c9d45 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 8 Dec 2022 02:11:37 +0530 Subject: bpf: Rework process_dynptr_func Recently, user ringbuf support introduced a PTR_TO_DYNPTR register type for use in callback state, because in case of user ringbuf helpers, there is no dynptr on the stack that is passed into the callback. To reflect such a state, a special register type was created. However, some checks have been bypassed incorrectly during the addition of this feature. First, for arg_type with MEM_UNINIT flag which initialize a dynptr, they must be rejected for such register type. Secondly, in the future, there are plans to add dynptr helpers that operate on the dynptr itself and may change its offset and other properties. In all of these cases, PTR_TO_DYNPTR shouldn't be allowed to be passed to such helpers, however the current code simply returns 0. The rejection for helpers that release the dynptr is already handled. For fixing this, we take a step back and rework existing code in a way that will allow fitting in all classes of helpers and have a coherent model for dealing with the variety of use cases in which dynptr is used. First, for ARG_PTR_TO_DYNPTR, it can either be set alone or together with a DYNPTR_TYPE_* constant that denotes the only type it accepts. Next, helpers which initialize a dynptr use MEM_UNINIT to indicate this fact. To make the distinction clear, use MEM_RDONLY flag to indicate that the helper only operates on the memory pointed to by the dynptr, not the dynptr itself. In C parlance, it would be equivalent to taking the dynptr as a point to const argument. When either of these flags are not present, the helper is allowed to mutate both the dynptr itself and also the memory it points to. Currently, the read only status of the memory is not tracked in the dynptr, but it would be trivial to add this support inside dynptr state of the register. With these changes and renaming PTR_TO_DYNPTR to CONST_PTR_TO_DYNPTR to better reflect its usage, it can no longer be passed to helpers that initialize a dynptr, i.e. bpf_dynptr_from_mem, bpf_ringbuf_reserve_dynptr. A note to reviewers is that in code that does mark_stack_slots_dynptr, and unmark_stack_slots_dynptr, we implicitly rely on the fact that PTR_TO_STACK reg is the only case that can reach that code path, as one cannot pass CONST_PTR_TO_DYNPTR to helpers that don't set MEM_RDONLY. In both cases such helpers won't be setting that flag. The next patch will add a couple of selftest cases to make sure this doesn't break. Fixes: 205715673844 ("bpf: Add bpf_user_ringbuf_drain() helper") Acked-by: Joanne Koong Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20221207204141.308952-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f89de51a45db..464ca3f01fe7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5293,7 +5293,7 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) + * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. @@ -5303,7 +5303,7 @@ union bpf_attr { * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if * *flags* is not 0. * - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. @@ -5313,7 +5313,7 @@ union bpf_attr { * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* * is a read-only dynptr or if *flags* is not 0. * - * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) * Description * Get a pointer to the underlying dynptr data. * @@ -5414,7 +5414,7 @@ union bpf_attr { * Drain samples from the specified user ring buffer, and invoke * the provided callback for each such sample: * - * long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx); + * long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx); * * If **callback_fn** returns 0, the helper will continue to try * and drain the next sample, up to a maximum of -- cgit v1.2.3 From b534dc46c8ae0165b1b2509be24dbea4fa9c4011 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 7 Dec 2022 09:37:01 -0500 Subject: net_tstamp: add SOF_TIMESTAMPING_OPT_ID_TCP Add an option to initialize SOF_TIMESTAMPING_OPT_ID for TCP from write_seq sockets instead of snd_una. This should have been the behavior from the start. Because processes may now exist that rely on the established behavior, do not change behavior of the existing option, but add the right behavior with a new flag. It is encouraged to always set SOF_TIMESTAMPING_OPT_ID_TCP on stream sockets along with the existing SOF_TIMESTAMPING_OPT_ID. Intuitively the contract is that the counter is zero after the setsockopt, so that the next write N results in a notification for the last byte N - 1. On idle sockets snd_una == write_seq and this holds for both. But on sockets with data in transmission, snd_una records the unacked offset in the stream. This depends on the ACK response from the peer. A process cannot learn this in a race free manner (ioctl SIOCOUTQ is one racy approach). write_seq records the offset at the last byte written by the process. This is a better starting point. It matches the intuitive contract in all circumstances, unaffected by external behavior. The new timestamp flag necessitates increasing sk_tsflags to 32 bits. Move the field in struct sock to avoid growing the socket (for some common CONFIG variants). The UAPI interface so_timestamping.flags is already int, so 32 bits wide. Reported-by: Sotirios Delimanolis Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20221207143701.29861-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_tstamp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 55501e5e7ac8..a2c66b3d7f0f 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -31,8 +31,9 @@ enum { SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13), SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), SOF_TIMESTAMPING_BIND_PHC = (1 << 15), + SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_BIND_PHC, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; -- cgit v1.2.3 From 1933ea365aa7a48ce26bea2ea09c9f7cc48cc668 Mon Sep 17 00:00:00 2001 From: wangchuanlei Date: Tue, 6 Dec 2022 20:38:57 -0500 Subject: net: openvswitch: Add support to count upcall packets Add support to count upall packets, when kmod of openvswitch upcall to count the number of packets for upcall succeed and failed, which is a better way to see how many packets upcalled on every interfaces. Signed-off-by: wangchuanlei Acked-by: Eelco Chaudron Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 94066f87e9ee..c5d62ee82567 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -277,11 +277,25 @@ enum ovs_vport_attr { OVS_VPORT_ATTR_PAD, OVS_VPORT_ATTR_IFINDEX, OVS_VPORT_ATTR_NETNSID, + OVS_VPORT_ATTR_UPCALL_STATS, __OVS_VPORT_ATTR_MAX }; #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) +/** + * enum ovs_vport_upcall_attr - attributes for %OVS_VPORT_UPCALL* commands + * @OVS_VPORT_UPCALL_SUCCESS: 64-bit upcall success packets. + * @OVS_VPORT_UPCALL_FAIL: 64-bit upcall fail packets. + */ +enum ovs_vport_upcall_attr { + OVS_VPORT_UPCALL_ATTR_SUCCESS, + OVS_VPORT_UPCALL_ATTR_FAIL, + __OVS_VPORT_UPCALL_ATTR_MAX +}; + +#define OVS_VPORT_UPCALL_ATTR_MAX (__OVS_VPORT_UPCALL_ATTR_MAX - 1) + enum { OVS_VXLAN_EXT_UNSPEC, OVS_VXLAN_EXT_GBP, /* Flag or __u32 */ -- cgit v1.2.3 From b22bbdd17a5af2cc99cf42048970b9355dcbec8b Mon Sep 17 00:00:00 2001 From: Andrew Melnychenko Date: Wed, 7 Dec 2022 13:35:54 +0200 Subject: uapi/linux/if_tun.h: Added new offload types for USO4/6. Added 2 additional offlloads for USO(IPv4 & IPv6). Separate offloads are required for Windows VM guests, g.e. Windows may set USO rx only for IPv4. Signed-off-by: Andrew Melnychenko Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index b6d7b868f290..287cdc81c939 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -90,6 +90,8 @@ #define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */ #define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ #define TUN_F_UFO 0x10 /* I can handle UFO packets */ +#define TUN_F_USO4 0x20 /* I can handle USO for IPv4 packets */ +#define TUN_F_USO6 0x40 /* I can handle USO for IPv6 packets */ /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */ #define TUN_PKT_STRIP 0x0001 -- cgit v1.2.3 From 34061b348ae912b9783fae7fabae69d46977a036 Mon Sep 17 00:00:00 2001 From: Andrew Melnychenko Date: Wed, 7 Dec 2022 13:35:56 +0200 Subject: uapi/linux/virtio_net.h: Added USO types. Added new GSO type for USO: VIRTIO_NET_HDR_GSO_UDP_L4. Feature VIRTIO_NET_F_HOST_USO allows to enable NETIF_F_GSO_UDP_L4. Separated VIRTIO_NET_F_GUEST_USO4 & VIRTIO_NET_F_GUEST_USO6 features required for Windows guests. Signed-off-by: Andrew Melnychenko Signed-off-by: David S. Miller --- include/uapi/linux/virtio_net.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 6cb842ea8979..b4062bed186a 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -57,6 +57,9 @@ * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ +#define VIRTIO_NET_F_GUEST_USO4 54 /* Guest can handle USOv4 in. */ +#define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */ +#define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -130,6 +133,7 @@ struct virtio_net_hdr_v1 { #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_UDP_L4 5 /* GSO frame, IPv4& IPv6 UDP (USO) */ #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ __u8 gso_type; __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ -- cgit v1.2.3 From 47c50853bb9c5c35bf5c3eb00b712310ec2969e2 Mon Sep 17 00:00:00 2001 From: Igor Skalkin Date: Mon, 24 Oct 2022 15:40:33 +0200 Subject: virtio_bt: Fix alignment in configuration struct The current version of the configuration structure has unaligned 16-bit fields, but according to the specification [1], access to the configuration space must be aligned. Add a second, aligned version of the configuration structure and a new feature bit indicating that this version is being used. [1] https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf Signed-off-by: Igor Skalkin Signed-off-by: Luiz Augusto von Dentz --- include/uapi/linux/virtio_bt.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_bt.h b/include/uapi/linux/virtio_bt.h index a7bd48daa9a9..af798f4c9680 100644 --- a/include/uapi/linux/virtio_bt.h +++ b/include/uapi/linux/virtio_bt.h @@ -9,6 +9,7 @@ #define VIRTIO_BT_F_VND_HCI 0 /* Indicates vendor command support */ #define VIRTIO_BT_F_MSFT_EXT 1 /* Indicates MSFT vendor support */ #define VIRTIO_BT_F_AOSP_EXT 2 /* Indicates AOSP vendor support */ +#define VIRTIO_BT_F_CONFIG_V2 3 /* Use second version configuration */ enum virtio_bt_config_type { VIRTIO_BT_CONFIG_TYPE_PRIMARY = 0, @@ -28,4 +29,11 @@ struct virtio_bt_config { __u16 msft_opcode; } __attribute__((packed)); +struct virtio_bt_config_v2 { + __u8 type; + __u8 alignment; + __u16 vendor; + __u16 msft_opcode; +}; + #endif /* _UAPI_LINUX_VIRTIO_BT_H */ -- cgit v1.2.3 From 6afaae6d12f54accf194e73ece617d1f456e438d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 10 Dec 2022 16:56:29 +0200 Subject: bridge: mcast: Allow user space to add (*, G) with a source list and filter mode Add new netlink attributes to the RTM_NEWMDB request that allow user space to add (*, G) with a source list and filter mode. The RTM_NEWMDB message can already dump such entries (created by the kernel) so there is no need to add dump support. However, the message contains a different set of attributes depending if it is a request or a response. The naming and structure of the new attributes try to follow the existing ones used in the response. Request: [ struct nlmsghdr ] [ struct br_port_msg ] [ MDBA_SET_ENTRY ] struct br_mdb_entry [ MDBA_SET_ENTRY_ATTRS ] [ MDBE_ATTR_SOURCE ] struct in_addr / struct in6_addr [ MDBE_ATTR_SRC_LIST ] // new [ MDBE_SRC_LIST_ENTRY ] [ MDBE_SRCATTR_ADDRESS ] struct in_addr / struct in6_addr [ ...] [ MDBE_ATTR_GROUP_MODE ] // new u8 Response: [ struct nlmsghdr ] [ struct br_port_msg ] [ MDBA_MDB ] [ MDBA_MDB_ENTRY ] [ MDBA_MDB_ENTRY_INFO ] struct br_mdb_entry [ MDBA_MDB_EATTR_TIMER ] u32 [ MDBA_MDB_EATTR_SOURCE ] struct in_addr / struct in6_addr [ MDBA_MDB_EATTR_RTPROT ] u8 [ MDBA_MDB_EATTR_SRC_LIST ] [ MDBA_MDB_SRCLIST_ENTRY ] [ MDBA_MDB_SRCATTR_ADDRESS ] struct in_addr / struct in6_addr [ MDBA_MDB_SRCATTR_TIMER ] u8 [...] [ MDBA_MDB_EATTR_GROUP_MODE ] u8 Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index a86a7e7b811f..0d9fe73fc48c 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -723,10 +723,30 @@ enum { enum { MDBE_ATTR_UNSPEC, MDBE_ATTR_SOURCE, + MDBE_ATTR_SRC_LIST, + MDBE_ATTR_GROUP_MODE, __MDBE_ATTR_MAX, }; #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) +/* per mdb entry source */ +enum { + MDBE_SRC_LIST_UNSPEC, + MDBE_SRC_LIST_ENTRY, + __MDBE_SRC_LIST_MAX, +}; +#define MDBE_SRC_LIST_MAX (__MDBE_SRC_LIST_MAX - 1) + +/* per mdb entry per source attributes + * these are embedded in MDBE_SRC_LIST_ENTRY + */ +enum { + MDBE_SRCATTR_UNSPEC, + MDBE_SRCATTR_ADDRESS, + __MDBE_SRCATTR_MAX, +}; +#define MDBE_SRCATTR_MAX (__MDBE_SRCATTR_MAX - 1) + /* Embedded inside LINK_XSTATS_TYPE_BRIDGE */ enum { BRIDGE_XSTATS_UNSPEC, -- cgit v1.2.3 From 1d7b66a7d9754c229d12c53ad6a1effe88c16ca4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 10 Dec 2022 16:56:30 +0200 Subject: bridge: mcast: Allow user space to specify MDB entry routing protocol Add the 'MDBE_ATTR_RTPORT' attribute to allow user space to specify the routing protocol of the MDB port group entry. Enforce a minimum value of 'RTPROT_STATIC' to prevent user space from using protocol values that should only be set by the kernel (e.g., 'RTPROT_KERNEL'). Maintain backward compatibility by defaulting to 'RTPROT_STATIC'. The protocol is already visible to user space in RTM_NEWMDB responses and notifications via the 'MDBA_MDB_EATTR_RTPROT' attribute. The routing protocol allows a routing daemon to distinguish between entries configured by it and those configured by the administrator. Once MDB flush is supported, the protocol can be used as a criterion according to which the flush is performed. Examples: # bridge mdb add dev br0 port dummy10 grp 239.1.1.1 permanent proto kernel Error: integer out of range. # bridge mdb add dev br0 port dummy10 grp 239.1.1.1 permanent proto static # bridge mdb add dev br0 port dummy10 grp 239.1.1.1 src 192.0.2.1 permanent proto zebra # bridge mdb add dev br0 port dummy10 grp 239.1.1.2 permanent source_list 198.51.100.1,198.51.100.2 filter_mode include proto 250 # bridge -d mdb show dev br0 port dummy10 grp 239.1.1.2 src 198.51.100.2 permanent filter_mode include proto 250 dev br0 port dummy10 grp 239.1.1.2 src 198.51.100.1 permanent filter_mode include proto 250 dev br0 port dummy10 grp 239.1.1.2 permanent filter_mode include source_list 198.51.100.2/0.00,198.51.100.1/0.00 proto 250 dev br0 port dummy10 grp 239.1.1.1 src 192.0.2.1 permanent filter_mode include proto zebra dev br0 port dummy10 grp 239.1.1.1 permanent filter_mode exclude proto static Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 0d9fe73fc48c..d9de241d90f9 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -725,6 +725,7 @@ enum { MDBE_ATTR_SOURCE, MDBE_ATTR_SRC_LIST, MDBE_ATTR_GROUP_MODE, + MDBE_ATTR_RTPROT, __MDBE_ATTR_MAX, }; #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) -- cgit v1.2.3 From b0305c1e0e27ad91187bc6d5ac3d502799faf239 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 26 Dec 2022 12:03:19 +0000 Subject: KVM: x86/xen: Add KVM_XEN_INVALID_GPA and KVM_XEN_INVALID_GFN to uapi These are (uint64_t)-1 magic values are a userspace ABI, allowing the shared info pages and other enlightenments to be disabled. This isn't a Xen ABI because Xen doesn't let the guest turn these off except with the full SHUTDOWN_soft_reset mechanism. Under KVM, the userspace VMM is expected to handle soft reset, and tear down the kernel parts of the enlightenments accordingly. Suggested-by: Sean Christopherson Signed-off-by: David Woodhouse Message-Id: <20221226120320.1125390-5-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 20522d4ba1e0..55155e262646 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1767,6 +1767,7 @@ struct kvm_xen_hvm_attr { __u8 runstate_update_flag; struct { __u64 gfn; +#define KVM_XEN_INVALID_GFN ((__u64)-1) } shared_info; struct { __u32 send_port; @@ -1798,6 +1799,7 @@ struct kvm_xen_hvm_attr { } u; }; + /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 #define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 @@ -1823,6 +1825,7 @@ struct kvm_xen_vcpu_attr { __u16 pad[3]; union { __u64 gpa; +#define KVM_XEN_INVALID_GPA ((__u64)-1) __u64 pad[8]; struct { __u64 state; -- cgit v1.2.3 From 9eb803402a2a83400c6c6afd900e3b7c87c06816 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 16 Nov 2022 21:25:24 +0100 Subject: uapi:io_uring.h: allow linux/time_types.h to be skipped include/uapi/linux/io_uring.h is synced 1:1 into liburing:src/include/liburing/io_uring.h. liburing has a configure check to detect the need for linux/time_types.h. It can opt-out by defining UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H Fixes: 78a861b94959 ("io_uring: add sync cancelation API through io_uring_register()") Link: https://github.com/axboe/liburing/issues/708 Link: https://github.com/axboe/liburing/pull/709 Link: https://lore.kernel.org/io-uring/20221115212614.1308132-1-ammar.faizi@intel.com/T/#m9f5dd571cd4f6a5dee84452dbbca3b92ba7a4091 CC: Jens Axboe Cc: Ammar Faizi Signed-off-by: Stefan Metzmacher Reviewed-by: Ammar Faizi Link: https://lore.kernel.org/r/7071a0a1d751221538b20b63f9160094fc7e06f4.1668630247.git.metze@samba.org Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 9d4c4078e8d0..2780bce62faf 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -10,7 +10,15 @@ #include #include +/* + * this file is shared with liburing and that has to autodetect + * if linux/time_types.h is available or not, it can + * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H + * if linux/time_types.h is not available + */ +#ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H #include +#endif #ifdef __cplusplus extern "C" { -- cgit v1.2.3